From 76e2582252e499824950f058a5c34ab65367d914 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 28 Jun 2018 19:34:40 -0700 Subject: [PATCH 0001/2269] f2fs: flush journal nat entries for nat_bits during unmount Let's flush journal nat entries for speed up in the next run. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 874b0a15ebe95..6f1db1942e542 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2582,6 +2582,13 @@ void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) nid_t set_idx = 0; LIST_HEAD(sets); + /* during unmount, let's flush nat_bits before checking dirty_nat_cnt */ + if (enabled_nat_bits(sbi, cpc)) { + down_write(&nm_i->nat_tree_lock); + remove_nats_in_journal(sbi); + up_write(&nm_i->nat_tree_lock); + } + if (!nm_i->dirty_nat_cnt) return; -- GitLab From 2a909bb2d8ac28fb960f1e69095bb9199eb73fa0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 29 Jun 2018 18:55:12 -0700 Subject: [PATCH 0002/2269] f2fs: allow wrong configured dio to buffered write This fixes to support dio having unaligned buffers as buffered writes. xfs_io -f -d -c "pwrite 0 512" $testfile -> okay xfs_io -f -d -c "pwrite 1 512" $testfile -> EINVAL Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a8a1558b7689c..78596e49d653c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2371,14 +2371,20 @@ unlock_out: static int check_direct_IO(struct inode *inode, struct iov_iter *iter, loff_t offset) { - unsigned blocksize_mask = inode->i_sb->s_blocksize - 1; - - if (offset & blocksize_mask) - return -EINVAL; - - if (iov_iter_alignment(iter) & blocksize_mask) - return -EINVAL; - + unsigned i_blkbits = READ_ONCE(inode->i_blkbits); + unsigned blkbits = i_blkbits; + unsigned blocksize_mask = (1 << blkbits) - 1; + unsigned long align = offset | iov_iter_alignment(iter); + struct block_device *bdev = inode->i_sb->s_bdev; + + if (align & blocksize_mask) { + if (bdev) + blkbits = blksize_bits(bdev_logical_block_size(bdev)); + blocksize_mask = (1 << blkbits) - 1; + if (align & blocksize_mask) + return -EINVAL; + return 1; + } return 0; } @@ -2396,7 +2402,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) err = check_direct_IO(inode, iter, offset); if (err) - return err; + return err < 0 ? err : 0; if (f2fs_force_buffered_io(inode, rw)) return 0; -- GitLab From 5aa4222bcc926f21786f4188efbc063024330797 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 6 Jul 2018 16:47:34 -0700 Subject: [PATCH 0003/2269] f2fs: do checkpoint in kill_sb When unmounting f2fs in force mode, we can get it stuck by io_schedule() by some pending IOs in meta_inode. io_schedule+0xd/0x30 wait_on_page_bit_common+0xc6/0x130 __filemap_fdatawait_range+0xbd/0x100 filemap_fdatawait_keep_errors+0x15/0x40 sync_inodes_sb+0x1cf/0x240 sync_filesystem+0x52/0x90 generic_shutdown_super+0x1d/0x110 kill_f2fs_super+0x28/0x80 [f2fs] deactivate_locked_super+0x35/0x60 cleanup_mnt+0x36/0x70 task_work_run+0x79/0xa0 exit_to_usermode_loop+0x62/0x70 do_syscall_64+0xdb/0xf0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 0xffffffffffffffff Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 08886c9f62da8..055a5e51e3886 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3089,9 +3089,19 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags, static void kill_f2fs_super(struct super_block *sb) { if (sb->s_root) { - set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE); - f2fs_stop_gc_thread(F2FS_SB(sb)); - f2fs_stop_discard_thread(F2FS_SB(sb)); + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + set_sbi_flag(sbi, SBI_IS_CLOSE); + f2fs_stop_gc_thread(sbi); + f2fs_stop_discard_thread(sbi); + + if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) || + !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { + struct cp_control cpc = { + .reason = CP_UMOUNT, + }; + f2fs_write_checkpoint(sbi, &cpc); + } } kill_block_super(sb); } -- GitLab From 526c162ba9800180e7dd4774bb789e7fcb8ec421 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 11 Jul 2018 18:30:42 -0700 Subject: [PATCH 0004/2269] f2fs: keep meta pages in cp_error state It turns out losing meta pages in shutdown period makes f2fs very unstable so that I could see many unexpected error conditions. Let's keep meta pages for fault injection and sudden power-off tests. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index d7a53c677452c..bc124b6216c23 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -242,11 +242,8 @@ static int __f2fs_write_meta_page(struct page *page, trace_f2fs_writepage(page, META); - if (unlikely(f2fs_cp_error(sbi))) { - dec_page_count(sbi, F2FS_DIRTY_META); - unlock_page(page); - return 0; - } + if (unlikely(f2fs_cp_error(sbi))) + goto redirty_out; if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) @@ -1129,6 +1126,9 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) if (!get_pages(sbi, F2FS_WB_CP_DATA)) break; + if (unlikely(f2fs_cp_error(sbi))) + break; + io_schedule_timeout(5*HZ); } finish_wait(&sbi->cp_wait, &wait); @@ -1202,8 +1202,12 @@ static void commit_checkpoint(struct f2fs_sb_info *sbi, /* writeout cp pack 2 page */ err = __f2fs_write_meta_page(page, &wbc, FS_CP_META_IO); - f2fs_bug_on(sbi, err); + if (unlikely(err && f2fs_cp_error(sbi))) { + f2fs_put_page(page, 1); + return; + } + f2fs_bug_on(sbi, err); f2fs_put_page(page, 0); /* submit checkpoint (with barrier if NOBARRIER is not set) */ @@ -1229,7 +1233,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) while (get_pages(sbi, F2FS_DIRTY_META)) { f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); if (unlikely(f2fs_cp_error(sbi))) - return -EIO; + break; } /* @@ -1309,7 +1313,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); if (unlikely(f2fs_cp_error(sbi))) - return -EIO; + break; } } @@ -1350,9 +1354,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* wait for previous submitted meta pages writeback */ wait_on_all_pages_writeback(sbi); - if (unlikely(f2fs_cp_error(sbi))) - return -EIO; - /* flush all device cache */ err = f2fs_flush_device_cache(sbi); if (err) @@ -1364,9 +1365,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_release_ino_entry(sbi, false); - if (unlikely(f2fs_cp_error(sbi))) - return -EIO; - clear_sbi_flag(sbi, SBI_IS_DIRTY); clear_sbi_flag(sbi, SBI_NEED_CP); __set_cp_next_pack(sbi); @@ -1381,7 +1379,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_DENTS)); - return 0; + return unlikely(f2fs_cp_error(sbi)) ? -EIO : 0; } /* -- GitLab From f87a488252498fda4891a5c8305b6e45e1290ed2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 21 Jun 2018 13:46:23 -0700 Subject: [PATCH 0005/2269] f2fs: indicate shutdown f2fs to allow unmount successfully Once we shutdown f2fs, we have to flush stale pages in order to unmount the system. In order to make stable, we need to stop fault injection as well. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 1 + fs/f2fs/f2fs.h | 7 +++++++ fs/f2fs/file.c | 4 ++++ fs/f2fs/inode.c | 3 +++ fs/f2fs/node.c | 3 ++- fs/f2fs/super.c | 5 +---- 6 files changed, 18 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index bc124b6216c23..90a61e22eedd9 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -28,6 +28,7 @@ struct kmem_cache *f2fs_inode_entry_slab; void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io) { + f2fs_build_fault_attr(sbi, 0); set_ckpt_flags(sbi, CP_ERROR_FLAG); if (!end_io) f2fs_flush_merged_writes(sbi); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b3569721249ec..95ea07b8e0e39 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1067,6 +1067,7 @@ enum { SBI_POR_DOING, /* recovery is doing or not */ SBI_NEED_SB_WRITE, /* need to recover superblock */ SBI_NEED_CP, /* need to checkpoint */ + SBI_IS_SHUTDOWN, /* shutdown by ioctl */ }; enum { @@ -3370,4 +3371,10 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, int rw) F2FS_I_SB(inode)->s_ndevs); } +#ifdef CONFIG_F2FS_FAULT_INJECTION +extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate); +#else +#define f2fs_build_fault_attr(sbi, rate) do { } while (0) +#endif + #endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d85d172161265..4dab0fd828257 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1889,6 +1889,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) } if (sb) { f2fs_stop_checkpoint(sbi, false); + set_sbi_flag(sbi, SBI_IS_SHUTDOWN); thaw_bdev(sb->s_bdev, sb); } break; @@ -1898,13 +1899,16 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) if (ret) goto out; f2fs_stop_checkpoint(sbi, false); + set_sbi_flag(sbi, SBI_IS_SHUTDOWN); break; case F2FS_GOING_DOWN_NOSYNC: f2fs_stop_checkpoint(sbi, false); + set_sbi_flag(sbi, SBI_IS_SHUTDOWN); break; case F2FS_GOING_DOWN_METAFLUSH: f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO); f2fs_stop_checkpoint(sbi, false); + set_sbi_flag(sbi, SBI_IS_SHUTDOWN); break; default: ret = -EINVAL; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 30a777369d2b9..f9d1bccebcb65 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -159,6 +159,9 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page) struct f2fs_inode *ri; __u32 provided, calculated; + if (unlikely(is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))) + return true; + if (!f2fs_enable_inode_chksum(sbi, page) || PageDirty(page) || PageWriteback(page)) return true; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 6f1db1942e542..828ece0e39efc 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1146,7 +1146,8 @@ static int read_node_page(struct page *page, int op_flags) f2fs_get_node_info(sbi, page->index, &ni); - if (unlikely(ni.blk_addr == NULL_ADDR)) { + if (unlikely(ni.blk_addr == NULL_ADDR) || + is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) { ClearPageUptodate(page); return -ENOENT; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 055a5e51e3886..3bf8846222fb4 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -57,8 +57,7 @@ char *fault_name[FAULT_MAX] = { [FAULT_CHECKPOINT] = "checkpoint error", }; -static void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, - unsigned int rate) +void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate) { struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info; @@ -1379,9 +1378,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, POSIX_ACL); #endif -#ifdef CONFIG_F2FS_FAULT_INJECTION f2fs_build_fault_attr(sbi, 0); -#endif } #ifdef CONFIG_QUOTA -- GitLab From 77045a0a67f99df2ff800900e861bc1ed55efbd9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 15 Jul 2018 09:58:08 +0900 Subject: [PATCH 0006/2269] f2fs: avoid potential deadlock in f2fs_sbi_store [ 155.018460] ====================================================== [ 155.021431] WARNING: possible circular locking dependency detected [ 155.024339] 4.18.0-rc3+ #5 Tainted: G OE [ 155.026879] ------------------------------------------------------ [ 155.029783] umount/2901 is trying to acquire lock: [ 155.032187] 00000000c4282f1f (kn->count#130){++++}, at: kernfs_remove+0x1f/0x30 [ 155.035439] [ 155.035439] but task is already holding lock: [ 155.038892] 0000000056e4307b (&type->s_umount_key#41){++++}, at: deactivate_super+0x33/0x50 [ 155.042602] [ 155.042602] which lock already depends on the new lock. [ 155.042602] [ 155.047465] [ 155.047465] the existing dependency chain (in reverse order) is: [ 155.051354] [ 155.051354] -> #1 (&type->s_umount_key#41){++++}: [ 155.054768] f2fs_sbi_store+0x61/0x460 [f2fs] [ 155.057083] kernfs_fop_write+0x113/0x1a0 [ 155.059277] __vfs_write+0x36/0x180 [ 155.061250] vfs_write+0xbe/0x1b0 [ 155.063179] ksys_write+0x55/0xc0 [ 155.065068] do_syscall_64+0x60/0x1b0 [ 155.067071] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 155.069529] [ 155.069529] -> #0 (kn->count#130){++++}: [ 155.072421] __kernfs_remove+0x26f/0x2e0 [ 155.074452] kernfs_remove+0x1f/0x30 [ 155.076342] kobject_del.part.5+0xe/0x40 [ 155.078354] f2fs_put_super+0x12d/0x290 [f2fs] [ 155.080500] generic_shutdown_super+0x6c/0x110 [ 155.082655] kill_block_super+0x21/0x50 [ 155.084634] kill_f2fs_super+0x9c/0xc0 [f2fs] [ 155.086726] deactivate_locked_super+0x3f/0x70 [ 155.088826] cleanup_mnt+0x3b/0x70 [ 155.090584] task_work_run+0x93/0xc0 [ 155.092367] exit_to_usermode_loop+0xf0/0x100 [ 155.094466] do_syscall_64+0x162/0x1b0 [ 155.096312] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 155.098603] [ 155.098603] other info that might help us debug this: [ 155.098603] [ 155.102418] Possible unsafe locking scenario: [ 155.102418] [ 155.105134] CPU0 CPU1 [ 155.107037] ---- ---- [ 155.108910] lock(&type->s_umount_key#41); [ 155.110674] lock(kn->count#130); [ 155.113010] lock(&type->s_umount_key#41); [ 155.115608] lock(kn->count#130); Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 60c827eadd825..27ddf60e3362b 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -286,8 +286,10 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, bool gc_entry = (!strcmp(a->attr.name, "gc_urgent") || a->struct_type == GC_THREAD); - if (gc_entry) - down_read(&sbi->sb->s_umount); + if (gc_entry) { + if (!down_read_trylock(&sbi->sb->s_umount)) + return -EAGAIN; + } ret = __sbi_store(a, sbi, buf, count); if (gc_entry) up_read(&sbi->sb->s_umount); -- GitLab From 9f58f986286d7d38f299cd31e1b94ffbed45a52a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 6 Jun 2018 23:55:01 +0800 Subject: [PATCH 0007/2269] f2fs: don't acquire orphan ino during recovery During orphan inode recovery, checkpoint should never succeed due to SBI_POR_DOING flag, so we don't need acquire orphan ino which only be used by checkpoint. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 90a61e22eedd9..4158e8012012f 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -570,12 +570,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) { struct inode *inode; struct node_info ni; - int err = f2fs_acquire_orphan_inode(sbi); - - if (err) - goto err_out; - - __add_ino_entry(sbi, ino, 0, ORPHAN_INO); + int err; inode = f2fs_iget_retry(sbi->sb, ino); if (IS_ERR(inode)) { @@ -605,7 +600,6 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) err = -EIO; goto err_out; } - __remove_ino_entry(sbi, ino, ORPHAN_INO); return 0; err_out: -- GitLab From 4bb15ac86b0033d0a11783685b8fb822b823ab66 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 6 Jun 2018 23:55:02 +0800 Subject: [PATCH 0008/2269] f2fs: move s_res{u,g}id initialization to default_options() Let default_options() initialize s_res{u,g}id with default value like other options. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3bf8846222fb4..8ff3900a5b57e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1354,6 +1354,8 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; F2FS_OPTION(sbi).test_dummy_encryption = false; + F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); + F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); sbi->readdir_ra = 1; set_opt(sbi, BG_GC); @@ -2697,9 +2699,6 @@ try_onemore: sb->s_fs_info = sbi; sbi->raw_super = raw_super; - F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); - F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); - /* precompute checksum seed for metadata */ if (f2fs_sb_has_inode_chksum(sb)) sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid, -- GitLab From 63d1ae286243714dc49988a2e7f146418c8ec607 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 11 Jun 2018 18:02:01 +0800 Subject: [PATCH 0009/2269] f2fs: relocate readdir_ra configure initialization readdir_ra is sysfs configuration instead of mount option, so it should not be initialized in default_options(), otherwise after remount, it can be reset to be enabled which may not as user wish, so let's move it to f2fs_tuning_parameters(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8ff3900a5b57e..6bf2cdc30e172 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1356,7 +1356,6 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).test_dummy_encryption = false; F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); - sbi->readdir_ra = 1; set_opt(sbi, BG_GC); set_opt(sbi, INLINE_XATTR); @@ -2650,6 +2649,8 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) sm_i->dcc_info->discard_granularity = 1; sm_i->ipu_policy = 1 << F2FS_IPU_FORCE; } + + sbi->readdir_ra = 1; } static int f2fs_fill_super(struct super_block *sb, void *data, int silent) -- GitLab From 16124c619df5e985e75a8caea0a3109200503683 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 11 Jun 2018 18:02:02 +0800 Subject: [PATCH 0010/2269] f2fs: fix error path of fill_super In fill_super, if root inode's attribute is incorrect, we need to call f2fs_destroy_stats to release stats memory. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6bf2cdc30e172..520b76a353ab8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2911,7 +2911,7 @@ try_onemore: if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { iput(root); err = -EINVAL; - goto free_node_inode; + goto free_stats; } sb->s_root = d_make_root(root); /* allocate root dentry */ -- GitLab From 7b42f2b145594b42bb30d09cc12246e34023e8f9 Mon Sep 17 00:00:00 2001 From: Weichao Guo Date: Fri, 9 Mar 2018 23:10:21 +0800 Subject: [PATCH 0011/2269] f2fs: support in-memory inode checksum when checking consistency Enable in-memory inode checksum to protect metadata blocks from in-memory scribbles when checking consistency, which has no performance requirements. Signed-off-by: Weichao Guo Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 8 ++++++++ fs/f2fs/node.c | 10 +++++++++- fs/f2fs/node.h | 4 ++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index f9d1bccebcb65..2076225787d1d 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -162,8 +162,12 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page) if (unlikely(is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))) return true; +#ifdef CONFIG_F2FS_CHECK_FS + if (!f2fs_enable_inode_chksum(sbi, page)) +#else if (!f2fs_enable_inode_chksum(sbi, page) || PageDirty(page) || PageWriteback(page)) +#endif return true; ri = &F2FS_NODE(page)->i; @@ -477,6 +481,10 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) F2FS_I(inode)->i_disk_time[1] = inode->i_ctime; F2FS_I(inode)->i_disk_time[2] = inode->i_mtime; F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime; + +#ifdef CONFIG_F2FS_CHECK_FS + f2fs_inode_chksum_set(F2FS_I_SB(inode), node_page); +#endif } void f2fs_update_inode_page(struct inode *inode) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 828ece0e39efc..2803bddda3f64 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1141,8 +1141,12 @@ static int read_node_page(struct page *page, int op_flags) .encrypted_page = NULL, }; - if (PageUptodate(page)) + if (PageUptodate(page)) { +#ifdef CONFIG_F2FS_CHECK_FS + f2fs_bug_on(sbi, !f2fs_inode_chksum_verify(sbi, page)); +#endif return LOCKED_PAGE; + } f2fs_get_node_info(sbi, page->index, &ni); @@ -1775,6 +1779,10 @@ static int f2fs_set_node_page_dirty(struct page *page) if (!PageUptodate(page)) SetPageUptodate(page); +#ifdef CONFIG_F2FS_CHECK_FS + if (IS_INODE(page)) + f2fs_inode_chksum_set(F2FS_P_SB(page), page); +#endif if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index b95e49e4a928a..8f34bdffde934 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -444,6 +444,10 @@ static inline void set_mark(struct page *page, int mark, int type) else flag &= ~(0x1 << type); rn->footer.flag = cpu_to_le32(flag); + +#ifdef CONFIG_F2FS_CHECK_FS + f2fs_inode_chksum_set(F2FS_P_SB(page), page); +#endif } #define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT) #define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT) -- GitLab From 130ad54771710d116b35800d008102e5df92857c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 15 Jun 2018 14:45:57 +0800 Subject: [PATCH 0012/2269] f2fs: fix to propagate return value of scan_nat_page() As Anatoly Trosinenko reported in bugzilla: How to reproduce: 1. Compile the 73fcb1a370c76 version of the kernel using the config attached 2. Unpack and mount the attached filesystem image as F2FS 3. The kernel will BUG() on mount (BUGs are explicitly enabled in config) [ 2.233612] F2FS-fs (sda): Found nat_bits in checkpoint [ 2.248422] ------------[ cut here ]------------ [ 2.248857] kernel BUG at fs/f2fs/node.c:1967! [ 2.249760] invalid opcode: 0000 [#1] SMP NOPTI [ 2.250219] Modules linked in: [ 2.251848] CPU: 0 PID: 944 Comm: mount Not tainted 4.17.0-rc5+ #1 [ 2.252331] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 [ 2.253305] RIP: 0010:build_free_nids+0x337/0x3f0 [ 2.253672] RSP: 0018:ffffae7fc0857c50 EFLAGS: 00000246 [ 2.254080] RAX: 00000000ffffffff RBX: 0000000000000123 RCX: 0000000000000001 [ 2.254638] RDX: ffff9aa7063d5c00 RSI: 0000000000000122 RDI: ffff9aa705852e00 [ 2.255190] RBP: ffff9aa705852e00 R08: 0000000000000001 R09: ffff9aa7059090c0 [ 2.255719] R10: 0000000000000000 R11: 0000000000000000 R12: ffff9aa705852e00 [ 2.256242] R13: ffff9aa7063ad000 R14: ffff9aa705919000 R15: 0000000000000123 [ 2.256809] FS: 00000000023078c0(0000) GS:ffff9aa707800000(0000) knlGS:0000000000000000 [ 2.258654] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2.259153] CR2: 00000000005511ae CR3: 0000000005872000 CR4: 00000000000006f0 [ 2.259801] Call Trace: [ 2.260583] build_node_manager+0x5cd/0x600 [ 2.260963] f2fs_fill_super+0x66a/0x17c0 [ 2.261300] ? f2fs_commit_super+0xe0/0xe0 [ 2.261622] mount_bdev+0x16e/0x1a0 [ 2.261899] mount_fs+0x30/0x150 [ 2.262398] vfs_kern_mount.part.28+0x4f/0xf0 [ 2.262743] do_mount+0x5d0/0xc60 [ 2.263010] ? _copy_from_user+0x37/0x60 [ 2.263313] ? memdup_user+0x39/0x60 [ 2.263692] ksys_mount+0x7b/0xd0 [ 2.263960] __x64_sys_mount+0x1c/0x20 [ 2.264268] do_syscall_64+0x43/0xf0 [ 2.264560] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 2.265095] RIP: 0033:0x48d31a [ 2.265502] RSP: 002b:00007ffc6fe60a08 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 [ 2.266089] RAX: ffffffffffffffda RBX: 0000000000008000 RCX: 000000000048d31a [ 2.266607] RDX: 00007ffc6fe62fa5 RSI: 00007ffc6fe62f9d RDI: 00007ffc6fe62f94 [ 2.267130] RBP: 00000000023078a0 R08: 0000000000000000 R09: 0000000000000000 [ 2.267670] R10: 0000000000008000 R11: 0000000000000246 R12: 0000000000000000 [ 2.268192] R13: 0000000000000000 R14: 00007ffc6fe60c78 R15: 0000000000000000 [ 2.268767] Code: e8 5f c3 ff ff 83 c3 01 41 83 c7 01 81 fb c7 01 00 00 74 48 44 39 7d 04 76 42 48 63 c3 48 8d 04 c0 41 8b 44 06 05 83 f8 ff 75 c1 <0f> 0b 49 8b 45 50 48 8d b8 b0 00 00 00 e8 37 59 69 00 b9 01 00 [ 2.270434] RIP: build_free_nids+0x337/0x3f0 RSP: ffffae7fc0857c50 [ 2.271426] ---[ end trace ab20c06cd3c8fde4 ]--- During loading NAT entries, we will do sanity check, once the entry info is corrupted, it will cause BUG_ON directly to protect user data from being overwrited. In this case, it will be better to just return failure on mount() instead of panic, so that user can get hint from kmsg and try fsck for recovery immediately rather than after an abnormal reboot. https://bugzilla.kernel.org/show_bug.cgi?id=199769 Reported-by: Anatoly Trosinenko Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/node.c | 42 ++++++++++++++++++++++++++++++------------ 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 95ea07b8e0e39..0fc2cf0be1157 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2810,7 +2810,7 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, bool do_balance, enum iostat_type io_type); -void f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount); +int f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount); bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid); void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid); void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 2803bddda3f64..1a5c449c0758f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1977,7 +1977,7 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) kmem_cache_free(free_nid_slab, i); } -static void scan_nat_page(struct f2fs_sb_info *sbi, +static int scan_nat_page(struct f2fs_sb_info *sbi, struct page *nat_page, nid_t start_nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); @@ -1995,7 +1995,10 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, break; blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); - f2fs_bug_on(sbi, blk_addr == NEW_ADDR); + + if (blk_addr == NEW_ADDR) + return -EINVAL; + if (blk_addr == NULL_ADDR) { add_free_nid(sbi, start_nid, true, true); } else { @@ -2004,6 +2007,8 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, spin_unlock(&NM_I(sbi)->nid_list_lock); } } + + return 0; } static void scan_curseg_cache(struct f2fs_sb_info *sbi) @@ -2059,11 +2064,11 @@ out: up_read(&nm_i->nat_tree_lock); } -static void __f2fs_build_free_nids(struct f2fs_sb_info *sbi, +static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) { struct f2fs_nm_info *nm_i = NM_I(sbi); - int i = 0; + int i = 0, ret; nid_t nid = nm_i->next_scan_nid; if (unlikely(nid >= nm_i->max_nid)) @@ -2071,17 +2076,17 @@ static void __f2fs_build_free_nids(struct f2fs_sb_info *sbi, /* Enough entries */ if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK) - return; + return 0; if (!sync && !f2fs_available_free_memory(sbi, FREE_NIDS)) - return; + return 0; if (!mount) { /* try to find free nids in free_nid_bitmap */ scan_free_nid_bits(sbi); if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK) - return; + return 0; } /* readahead nat pages to be scanned */ @@ -2095,8 +2100,16 @@ static void __f2fs_build_free_nids(struct f2fs_sb_info *sbi, nm_i->nat_block_bitmap)) { struct page *page = get_current_nat_page(sbi, nid); - scan_nat_page(sbi, page, nid); + ret = scan_nat_page(sbi, page, nid); f2fs_put_page(page, 1); + + if (ret) { + up_read(&nm_i->nat_tree_lock); + f2fs_bug_on(sbi, !mount); + f2fs_msg(sbi->sb, KERN_ERR, + "NAT is corrupt, run fsck to fix it"); + return -EINVAL; + } } nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); @@ -2117,13 +2130,19 @@ static void __f2fs_build_free_nids(struct f2fs_sb_info *sbi, f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), nm_i->ra_nid_pages, META_NAT, false); + + return 0; } -void f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) +int f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) { + int ret; + mutex_lock(&NM_I(sbi)->build_lock); - __f2fs_build_free_nids(sbi, sync, mount); + ret = __f2fs_build_free_nids(sbi, sync, mount); mutex_unlock(&NM_I(sbi)->build_lock); + + return ret; } /* @@ -2817,8 +2836,7 @@ int f2fs_build_node_manager(struct f2fs_sb_info *sbi) /* load free nid status from nat_bits table */ load_free_nid_bitmap(sbi); - f2fs_build_free_nids(sbi, true, true); - return 0; + return f2fs_build_free_nids(sbi, true, true); } void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) -- GitLab From 0d71db20c3a31384762c0ea402b4e7d96a65ef2f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 20 Jun 2018 21:27:21 -0700 Subject: [PATCH 0013/2269] f2fs: don't issue discard commands in online discard is on Actually, we don't need to issue discard commands, if discard is on, as mentioned in the comment. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9efce174c51a9..6dc8828b4d871 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2469,23 +2469,24 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) if (err) goto out; - start_block = START_BLOCK(sbi, start_segno); - end_block = START_BLOCK(sbi, end_segno + 1); - - __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen); - __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block); - /* * We filed discard candidates, but actually we don't need to wait for * all of them, since they'll be issued in idle time along with runtime * discard option. User configuration looks like using runtime discard * or periodic fstrim instead of it. */ - if (!test_opt(sbi, DISCARD)) { - trimmed = __wait_discard_cmd_range(sbi, &dpolicy, + if (test_opt(sbi, DISCARD)) + goto out; + + start_block = START_BLOCK(sbi, start_segno); + end_block = START_BLOCK(sbi, end_segno + 1); + + __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen); + __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block); + + trimmed = __wait_discard_cmd_range(sbi, &dpolicy, start_block, end_block); - range->len = F2FS_BLK_TO_BYTES(trimmed); - } + range->len = F2FS_BLK_TO_BYTES(trimmed); out: return err; } -- GitLab From 2c077ed3d901d6ed3e468639fd16ef911a250003 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 20 Jun 2018 13:39:53 +0300 Subject: [PATCH 0014/2269] f2fs: Fix uninitialized return in f2fs_ioc_shutdown() "ret" can be uninitialized on the success path when "in == F2FS_GOING_DOWN_FULLSYNC". Fixes: 60b2b4ee2bc0 ("f2fs: Fix deadlock in shutdown ioctl") Signed-off-by: Dan Carpenter Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 4dab0fd828257..2081a17f7ff42 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1866,7 +1866,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct super_block *sb = sbi->sb; __u32 in; - int ret; + int ret = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; -- GitLab From 4af3211804f8ccbfd1612523a3f1a1992b4592ed Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Thu, 21 Jun 2018 14:49:06 +0800 Subject: [PATCH 0015/2269] f2fs: fix a hungtask problem caused by congestion_wait This patch fix hungtask problem which can be reproduced as follow: Thread 0~3: while true do touch /xxx/test/file_xxx done Thread 4 write a new checkpoint every three seconds. In the meantime, fio start 16 threads for randwrite. With my debug info, cycles num will exceed 1000 in function f2fs_sync_dirty_inodes, and most of cycle will be dropped into congestion_wait() and sleep more than 20ms. Cycles num reduced to 3 with this patch. Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 4158e8012012f..6a90570137f89 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -972,12 +972,10 @@ retry: iput(inode); /* We need to give cpu to another writers. */ - if (ino == cur_ino) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + if (ino == cur_ino) cond_resched(); - } else { + else ino = cur_ino; - } } else { /* * We should submit bio, since it exists several -- GitLab From 7369fa7d5f18343fc628c2711a0904d2721ab39c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 21 Jun 2018 11:29:43 -0700 Subject: [PATCH 0016/2269] f2fs: assign REQ_RAHEAD to bio for ->readpages As Jens reported, we'd better assign REQ_RAHEAD to bio by the fact that ->readpages is called only from read-ahead. In Documentation/filesystems/vfs.txt, readpages: called by the VM to read pages associated with the address_space object. This is essentially just a vector version of readpage. Instead of just one page, several pages are requested. readpages is only used for read-ahead, so read errors are ignored. If anything goes wrong, feel free to give up. Signed-off-by: Jens Axboe Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 78596e49d653c..571bafbf5c72f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -534,7 +534,7 @@ out: } static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, - unsigned nr_pages) + unsigned nr_pages, unsigned op_flag) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct bio *bio; @@ -546,7 +546,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, return ERR_PTR(-ENOMEM); f2fs_target_device(sbi, blkaddr, bio); bio->bi_end_io = f2fs_read_end_io; - bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio_set_op_attrs(bio, REQ_OP_READ, op_flag); if (f2fs_encrypted_file(inode)) post_read_steps |= 1 << STEP_DECRYPT; @@ -571,7 +571,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, static int f2fs_submit_page_read(struct inode *inode, struct page *page, block_t blkaddr) { - struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1); + struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0); if (IS_ERR(bio)) return PTR_ERR(bio); @@ -1421,10 +1421,15 @@ out: /* * This function was originally taken from fs/mpage.c, and customized for f2fs. * Major change was from block_size == page_size in f2fs by default. + * + * Note that the aops->readpages() function is ONLY used for read-ahead. If + * this function ever deviates from doing just read-ahead, it should either + * use ->readpage() or do the necessary surgery to decouple ->readpages() + * from read-ahead. */ static int f2fs_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, - unsigned nr_pages) + unsigned nr_pages, bool is_readahead) { struct bio *bio = NULL; sector_t last_block_in_bio = 0; @@ -1514,7 +1519,8 @@ submit_and_realloc: bio = NULL; } if (bio == NULL) { - bio = f2fs_grab_read_bio(inode, block_nr, nr_pages); + bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, + is_readahead ? REQ_RAHEAD : 0); if (IS_ERR(bio)) { bio = NULL; goto set_error_page; @@ -1558,7 +1564,7 @@ static int f2fs_read_data_page(struct file *file, struct page *page) if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); if (ret == -EAGAIN) - ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1); + ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1, false); return ret; } @@ -1575,7 +1581,7 @@ static int f2fs_read_data_pages(struct file *file, if (f2fs_has_inline_data(inode)) return 0; - return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages); + return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true); } static int encrypt_one_page(struct f2fs_io_info *fio) -- GitLab From 49be0b6829f722f8d2975ee823205c60e6b16465 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 21 Jun 2018 22:38:28 +0800 Subject: [PATCH 0017/2269] f2fs: fix to wait on page writeback before updating page In error path of f2fs_move_rehashed_dirents, inode page could be writeback state, so we should wait on inode page writeback before updating it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 043830be5662f..9a245d2d5b7c6 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -477,6 +477,7 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, return 0; recover: lock_page(ipage); + f2fs_wait_on_page_writeback(ipage, NODE, true); memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA(dir)); f2fs_i_depth_write(dir, 0); f2fs_i_size_write(dir, MAX_INLINE_DATA(dir)); -- GitLab From 98c7ff4c05a06cd81e432ee6b80d3c129cdfa19d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 20 Jun 2018 10:02:19 +0200 Subject: [PATCH 0018/2269] f2fs: use timespec64 for inode timestamps The on-disk representation and the vfs both use 64-bit tv_sec values, so let's change the last missing piece in the middle. Signed-off-by: Arnd Bergmann Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 12 ++++++------ fs/f2fs/namei.c | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0fc2cf0be1157..ac5e86b11524a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -670,8 +670,8 @@ struct f2fs_inode_info { int i_extra_isize; /* size of extra space located in i_addr */ kprojid_t i_projid; /* id for project quota */ int i_inline_xattr_size; /* inline xattr size */ - struct timespec i_crtime; /* inode creation time */ - struct timespec i_disk_time[4]; /* inode disk times */ + struct timespec64 i_crtime; /* inode creation time */ + struct timespec64 i_disk_time[4];/* inode disk times */ }; static inline void get_extent_info(struct extent_info *ext, @@ -2535,13 +2535,13 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) i_size_read(inode) & ~PAGE_MASK) return false; - if (!timespec_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime)) + if (!timespec64_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime)) return false; - if (!timespec_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime)) + if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime)) return false; - if (!timespec_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime)) + if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime)) return false; - if (!timespec_equal(F2FS_I(inode)->i_disk_time + 3, + if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 3, &F2FS_I(inode)->i_crtime)) return false; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 5d2d822092a29..2992219668a02 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -50,8 +50,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) inode->i_ino = ino; inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = - F2FS_I(inode)->i_crtime = current_time(inode); + inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + F2FS_I(inode)->i_crtime = inode->i_mtime; inode->i_generation = sbi->s_next_generation++; if (S_ISDIR(inode->i_mode)) -- GitLab From 77fa5e9aed03180bd3e77a343440ae2faa0cd625 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 5 Jun 2018 17:44:11 +0800 Subject: [PATCH 0019/2269] f2fs: introduce and spread verify_blkaddr This patch introduces verify_blkaddr to check meta/data block address with valid range to detect bug earlier. In addition, once we encounter an invalid blkaddr, notice user to run fsck to fix, and let the kernel panic. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 10 ++++++++-- fs/f2fs/data.c | 8 ++++---- fs/f2fs/f2fs.h | 33 +++++++++++++++++++++++++++++---- fs/f2fs/file.c | 9 +++++---- fs/f2fs/inode.c | 7 ++++--- fs/f2fs/node.c | 4 ++-- fs/f2fs/recovery.c | 6 +++--- fs/f2fs/segment.c | 4 ++-- fs/f2fs/segment.h | 8 +++----- 9 files changed, 60 insertions(+), 29 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6a90570137f89..3a2eb094a6ee5 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -120,7 +120,7 @@ struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index) return __get_meta_page(sbi, index, false); } -bool f2fs_is_valid_meta_blkaddr(struct f2fs_sb_info *sbi, +bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { switch (type) { @@ -141,10 +141,16 @@ bool f2fs_is_valid_meta_blkaddr(struct f2fs_sb_info *sbi, return false; break; case META_POR: + case DATA_GENERIC: if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || blkaddr < MAIN_BLKADDR(sbi))) return false; break; + case META_GENERIC: + if (unlikely(blkaddr < SEG0_BLKADDR(sbi) || + blkaddr >= MAIN_BLKADDR(sbi))) + return false; + break; default: BUG(); } @@ -177,7 +183,7 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, blk_start_plug(&plug); for (; nrpages-- > 0; blkno++) { - if (!f2fs_is_valid_meta_blkaddr(sbi, blkno, type)) + if (!f2fs_is_valid_blkaddr(sbi, blkno, type)) goto out; switch (type) { diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 571bafbf5c72f..e57327665934b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -485,7 +485,7 @@ next: spin_unlock(&io->io_lock); } - if (is_valid_blkaddr(fio->old_blkaddr)) + if (__is_valid_data_blkaddr(fio->old_blkaddr)) verify_block_addr(fio, fio->old_blkaddr); verify_block_addr(fio, fio->new_blkaddr); @@ -1045,7 +1045,7 @@ next_dnode: next_block: blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); - if (!is_valid_blkaddr(blkaddr)) { + if (!is_valid_data_blkaddr(sbi, blkaddr)) { if (create) { if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; @@ -1700,7 +1700,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) f2fs_lookup_extent_cache(inode, page->index, &ei)) { fio->old_blkaddr = ei.blk + page->index - ei.fofs; - if (is_valid_blkaddr(fio->old_blkaddr)) { + if (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr)) { ipu_force = true; fio->need_lock = LOCK_DONE; goto got_it; @@ -1727,7 +1727,7 @@ got_it: * If current allocation needs SSR, * it had better in-place writes for updated data. */ - if (ipu_force || (is_valid_blkaddr(fio->old_blkaddr) && + if (ipu_force || (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) && need_inplace_update(fio))) { err = encrypt_one_page(fio); if (err) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ac5e86b11524a..0a6c2a3106897 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -195,7 +195,7 @@ struct cp_control { }; /* - * For CP/NAT/SIT/SSA readahead + * indicate meta/data type */ enum { META_CP, @@ -203,6 +203,8 @@ enum { META_SIT, META_SSA, META_POR, + DATA_GENERIC, + META_GENERIC, }; /* for the list of ino */ @@ -2667,13 +2669,36 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, spin_unlock(&sbi->iostat_lock); } -static inline bool is_valid_blkaddr(block_t blkaddr) +bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type); +void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...); +static inline void verify_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type) +{ + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) { + f2fs_msg(sbi->sb, KERN_ERR, + "invalid blkaddr: %u, type: %d, run fsck to fix.", + blkaddr, type); + f2fs_bug_on(sbi, 1); + } +} + +static inline bool __is_valid_data_blkaddr(block_t blkaddr) { if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) return false; return true; } +static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr) +{ + if (!__is_valid_data_blkaddr(blkaddr)) + return false; + verify_blkaddr(sbi, blkaddr, DATA_GENERIC); + return true; +} + /* * file.c */ @@ -2897,8 +2922,8 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io); struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index); -bool f2fs_is_valid_meta_blkaddr(struct f2fs_sb_info *sbi, - block_t blkaddr, int type); +bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type); int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync); void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 2081a17f7ff42..94475b322e433 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -350,13 +350,13 @@ static pgoff_t __get_first_dirty_index(struct address_space *mapping, return pgofs; } -static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs, - int whence) +static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr, + pgoff_t dirty, pgoff_t pgofs, int whence) { switch (whence) { case SEEK_DATA: if ((blkaddr == NEW_ADDR && dirty == pgofs) || - is_valid_blkaddr(blkaddr)) + is_valid_data_blkaddr(sbi, blkaddr)) return true; break; case SEEK_HOLE: @@ -420,7 +420,8 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); - if (__found_offset(blkaddr, dirty, pgofs, whence)) { + if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty, + pgofs, whence)) { f2fs_put_dnode(&dn); goto found; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 2076225787d1d..e9cfcdbbe24cc 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -68,11 +68,12 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) } } -static bool __written_first_block(struct f2fs_inode *ri) +static bool __written_first_block(struct f2fs_sb_info *sbi, + struct f2fs_inode *ri) { block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]); - if (is_valid_blkaddr(addr)) + if (is_valid_data_blkaddr(sbi, addr)) return true; return false; } @@ -282,7 +283,7 @@ static int do_read_inode(struct inode *inode) /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); - if (__written_first_block(ri)) + if (__written_first_block(sbi, ri)) set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); if (!f2fs_need_inode_block_update(sbi, inode->i_ino)) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1a5c449c0758f..0562a34d43550 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -371,7 +371,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, new_blkaddr == NULL_ADDR); f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR && new_blkaddr == NEW_ADDR); - f2fs_bug_on(sbi, is_valid_blkaddr(nat_get_blkaddr(e)) && + f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) && new_blkaddr == NEW_ADDR); /* increment version no as node is removed */ @@ -382,7 +382,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, /* change address */ nat_set_blkaddr(e, new_blkaddr); - if (!is_valid_blkaddr(new_blkaddr)) + if (!is_valid_data_blkaddr(sbi, new_blkaddr)) set_nat_flag(e, IS_CHECKPOINTED, false); __set_nat_cache_dirty(nm_i, e); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index daf81d416b898..184b34be635b5 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -252,7 +252,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, while (1) { struct fsync_inode_entry *entry; - if (!f2fs_is_valid_meta_blkaddr(sbi, blkaddr, META_POR)) + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR)) return 0; page = f2fs_get_tmp_page(sbi, blkaddr); @@ -507,7 +507,7 @@ retry_dn: } /* dest is valid block, try to recover from src to dest */ - if (f2fs_is_valid_meta_blkaddr(sbi, dest, META_POR)) { + if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) { if (src == NULL_ADDR) { err = f2fs_reserve_new_block(&dn); @@ -568,7 +568,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, while (1) { struct fsync_inode_entry *entry; - if (!f2fs_is_valid_meta_blkaddr(sbi, blkaddr, META_POR)) + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR)) break; f2fs_ra_meta_pages_cond(sbi, blkaddr); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 6dc8828b4d871..654091ec9cfe9 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1919,7 +1919,7 @@ bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) struct seg_entry *se; bool is_cp = false; - if (!is_valid_blkaddr(blkaddr)) + if (!is_valid_data_blkaddr(sbi, blkaddr)) return true; down_read(&sit_i->sentry_lock); @@ -2993,7 +2993,7 @@ void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr) { struct page *cpage; - if (!is_valid_blkaddr(blkaddr)) + if (!is_valid_data_blkaddr(sbi, blkaddr)) return; cpage = find_lock_page(META_MAPPING(sbi), blkaddr); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index f18fc82fbe998..a7460da9af431 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -85,7 +85,7 @@ (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1)) #define GET_SEGNO(sbi, blk_addr) \ - ((!is_valid_blkaddr(blk_addr)) ? \ + ((!is_valid_data_blkaddr(sbi, blk_addr)) ? \ NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ GET_SEGNO_FROM_SEG0(sbi, blk_addr))) #define BLKS_PER_SEC(sbi) \ @@ -647,11 +647,9 @@ static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr) if (PAGE_TYPE_OF_BIO(fio->type) == META && (!is_read_io(fio->op) || fio->is_meta)) - BUG_ON(blk_addr < SEG0_BLKADDR(sbi) || - blk_addr >= MAIN_BLKADDR(sbi)); + verify_blkaddr(sbi, blk_addr, META_GENERIC); else - BUG_ON(blk_addr < MAIN_BLKADDR(sbi) || - blk_addr >= MAX_BLKADDR(sbi)); + verify_blkaddr(sbi, blk_addr, DATA_GENERIC); } /* -- GitLab From 94b5dda77634d330de0e90b7f598842940e953d5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 22 Jun 2018 16:06:59 +0800 Subject: [PATCH 0020/2269] f2fs: disable f2fs_check_rb_tree_consistence If there is millions of discard entries cached in rb tree, each sanity check of it can cause very long latency as held cmd_lock blocking other lock grabbers. In other aspect, we have enabled the check very long time, as we see, there is no such inconsistent condition caused by bugs. But still we do not choose to kill it directly, instead, adding an flag to disable the check now, if there is related code change, we can reuse it to detect bugs. Signed-off-by: Yunlei He Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0a6c2a3106897..181f48b707006 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -312,6 +312,7 @@ struct discard_cmd_control { atomic_t issing_discard; /* # of issing discard */ atomic_t discard_cmd_cnt; /* # of cached cmd count */ struct rb_root root; /* root of discard rb-tree */ + bool rbtree_check; /* config for consistence check */ }; /* for the list of fsync inodes, used only during recovery */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 654091ec9cfe9..d63d89287c53e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1199,8 +1199,9 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, mutex_lock(&dcc->cmd_lock); if (list_empty(pend_list)) goto next; - f2fs_bug_on(sbi, - !f2fs_check_rb_tree_consistence(sbi, &dcc->root)); + if (unlikely(dcc->rbtree_check)) + f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, + &dcc->root)); blk_start_plug(&plug); list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); @@ -1752,6 +1753,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; dcc->undiscard_blks = 0; dcc->root = RB_ROOT; + dcc->rbtree_check = false; init_waitqueue_head(&dcc->discard_wait_queue); SM_I(sbi)->dcc_info = dcc; @@ -2381,7 +2383,9 @@ next: issued = 0; mutex_lock(&dcc->cmd_lock); - f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, &dcc->root)); + if (unlikely(dcc->rbtree_check)) + f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, + &dcc->root)); dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, NULL, start, -- GitLab From caa74712a6d626e2fabe89aa38f85380b1962ec1 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 23 Jun 2018 00:12:36 +0800 Subject: [PATCH 0021/2269] f2fs: fix to do sanity check with secs_per_zone As Wen Xu reported in below link: https://bugzilla.kernel.org/show_bug.cgi?id=200183 - Overview Divide zero in reset_curseg() when mounting a crafted f2fs image - Reproduce - Kernel message [ 588.281510] divide error: 0000 [#1] SMP KASAN PTI [ 588.282701] CPU: 0 PID: 1293 Comm: mount Not tainted 4.18.0-rc1+ #4 [ 588.284000] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 588.286178] RIP: 0010:reset_curseg+0x94/0x1a0 [ 588.298166] RSP: 0018:ffff8801e88d7940 EFLAGS: 00010246 [ 588.299360] RAX: 0000000000000014 RBX: ffff8801e1d46d00 RCX: ffffffffb88bf60b [ 588.300809] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffff8801e1d46d64 [ 588.305272] R13: 0000000000000000 R14: 0000000000000014 R15: 0000000000000000 [ 588.306822] FS: 00007fad85008840(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 [ 588.308456] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 588.309623] CR2: 0000000001705078 CR3: 00000001f30f8000 CR4: 00000000000006f0 [ 588.311085] Call Trace: [ 588.311637] f2fs_build_segment_manager+0x103f/0x3410 [ 588.316136] ? f2fs_commit_super+0x1b0/0x1b0 [ 588.317031] ? set_blocksize+0x90/0x140 [ 588.319473] f2fs_mount+0x15/0x20 [ 588.320166] mount_fs+0x60/0x1a0 [ 588.320847] ? alloc_vfsmnt+0x309/0x360 [ 588.321647] vfs_kern_mount+0x6b/0x1a0 [ 588.322432] do_mount+0x34a/0x18c0 [ 588.323175] ? strndup_user+0x46/0x70 [ 588.323937] ? copy_mount_string+0x20/0x20 [ 588.324793] ? memcg_kmem_put_cache+0x1b/0xa0 [ 588.325702] ? kasan_check_write+0x14/0x20 [ 588.326562] ? _copy_from_user+0x6a/0x90 [ 588.327375] ? memdup_user+0x42/0x60 [ 588.328118] ksys_mount+0x83/0xd0 [ 588.328808] __x64_sys_mount+0x67/0x80 [ 588.329607] do_syscall_64+0x78/0x170 [ 588.330400] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 588.331461] RIP: 0033:0x7fad848e8b9a [ 588.336022] RSP: 002b:00007ffd7c5b6be8 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 [ 588.337547] RAX: ffffffffffffffda RBX: 00000000016f8030 RCX: 00007fad848e8b9a [ 588.338999] RDX: 00000000016f8210 RSI: 00000000016f9f30 RDI: 0000000001700ec0 [ 588.340442] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013 [ 588.341887] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000000001700ec0 [ 588.343341] R13: 00000000016f8210 R14: 0000000000000000 R15: 0000000000000003 [ 588.354891] ---[ end trace 4ce02f25ff7d3df5 ]--- [ 588.355862] RIP: 0010:reset_curseg+0x94/0x1a0 [ 588.360742] RSP: 0018:ffff8801e88d7940 EFLAGS: 00010246 [ 588.361812] RAX: 0000000000000014 RBX: ffff8801e1d46d00 RCX: ffffffffb88bf60b [ 588.363485] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffff8801e1d46d64 [ 588.365213] RBP: ffff8801e88d7968 R08: ffffed003c32266f R09: ffffed003c32266f [ 588.366661] R10: 0000000000000001 R11: ffffed003c32266e R12: ffff8801f0337700 [ 588.368110] R13: 0000000000000000 R14: 0000000000000014 R15: 0000000000000000 [ 588.370057] FS: 00007fad85008840(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 [ 588.372099] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 588.373291] CR2: 0000000001705078 CR3: 00000001f30f8000 CR4: 00000000000006f0 - Location https://elixir.bootlin.com/linux/latest/source/fs/f2fs/segment.c#L2147 curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno); If secs_per_zone is corrupted due to fuzzing test, it will cause divide zero operation when using GET_ZONE_FROM_SEG macro, so we should do more sanity check with secs_per_zone during mount to avoid this issue. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 520b76a353ab8..27e30a3d99bd7 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2227,9 +2227,9 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, return 1; } - if (secs_per_zone > total_sections) { + if (secs_per_zone > total_sections || !secs_per_zone) { f2fs_msg(sb, KERN_INFO, - "Wrong secs_per_zone (%u > %u)", + "Wrong secs_per_zone / total_sections (%u, %u)", secs_per_zone, total_sections); return 1; } -- GitLab From aec040ed6be3286770fe697dd2d9dfa0d372064f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 23 Jun 2018 11:25:19 +0800 Subject: [PATCH 0022/2269] f2fs: fix to do sanity check with {sit,nat}_ver_bitmap_bytesize This patch adds to do sanity check with {sit,nat}_ver_bitmap_bytesize during mount, in order to avoid accessing across cache boundary with this abnormal bitmap size. - Overview buffer overrun in build_sit_info() when mounting a crafted f2fs image - Reproduce - Kernel message [ 548.580867] F2FS-fs (loop0): Invalid log blocks per segment (8201) [ 548.580877] F2FS-fs (loop0): Can't find valid F2FS filesystem in 1th superblock [ 548.584979] ================================================================== [ 548.586568] BUG: KASAN: use-after-free in kmemdup+0x36/0x50 [ 548.587715] Read of size 64 at addr ffff8801e9c265ff by task mount/1295 [ 548.589428] CPU: 1 PID: 1295 Comm: mount Not tainted 4.18.0-rc1+ #4 [ 548.589432] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 548.589438] Call Trace: [ 548.589474] dump_stack+0x7b/0xb5 [ 548.589487] print_address_description+0x70/0x290 [ 548.589492] kasan_report+0x291/0x390 [ 548.589496] ? kmemdup+0x36/0x50 [ 548.589509] check_memory_region+0x139/0x190 [ 548.589514] memcpy+0x23/0x50 [ 548.589518] kmemdup+0x36/0x50 [ 548.589545] f2fs_build_segment_manager+0x8fa/0x3410 [ 548.589551] ? __asan_loadN+0xf/0x20 [ 548.589560] ? f2fs_sanity_check_ckpt+0x1be/0x240 [ 548.589566] ? f2fs_flush_sit_entries+0x10c0/0x10c0 [ 548.589587] ? __put_user_ns+0x40/0x40 [ 548.589604] ? find_next_bit+0x57/0x90 [ 548.589610] f2fs_fill_super+0x194b/0x2b40 [ 548.589617] ? f2fs_commit_super+0x1b0/0x1b0 [ 548.589637] ? set_blocksize+0x90/0x140 [ 548.589651] mount_bdev+0x1c5/0x210 [ 548.589655] ? f2fs_commit_super+0x1b0/0x1b0 [ 548.589667] f2fs_mount+0x15/0x20 [ 548.589672] mount_fs+0x60/0x1a0 [ 548.589683] ? alloc_vfsmnt+0x309/0x360 [ 548.589688] vfs_kern_mount+0x6b/0x1a0 [ 548.589699] do_mount+0x34a/0x18c0 [ 548.589710] ? lockref_put_or_lock+0xcf/0x160 [ 548.589716] ? copy_mount_string+0x20/0x20 [ 548.589728] ? memcg_kmem_put_cache+0x1b/0xa0 [ 548.589734] ? kasan_check_write+0x14/0x20 [ 548.589740] ? _copy_from_user+0x6a/0x90 [ 548.589744] ? memdup_user+0x42/0x60 [ 548.589750] ksys_mount+0x83/0xd0 [ 548.589755] __x64_sys_mount+0x67/0x80 [ 548.589781] do_syscall_64+0x78/0x170 [ 548.589797] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 548.589820] RIP: 0033:0x7f76fc331b9a [ 548.589821] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48 [ 548.589880] RSP: 002b:00007ffd4f0a0e48 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 [ 548.589890] RAX: ffffffffffffffda RBX: 000000000146c030 RCX: 00007f76fc331b9a [ 548.589892] RDX: 000000000146c210 RSI: 000000000146df30 RDI: 0000000001474ec0 [ 548.589895] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013 [ 548.589897] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000000001474ec0 [ 548.589900] R13: 000000000146c210 R14: 0000000000000000 R15: 0000000000000003 [ 548.590242] The buggy address belongs to the page: [ 548.591243] page:ffffea0007a70980 count:0 mapcount:0 mapping:0000000000000000 index:0x0 [ 548.592886] flags: 0x2ffff0000000000() [ 548.593665] raw: 02ffff0000000000 dead000000000100 dead000000000200 0000000000000000 [ 548.595258] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 [ 548.603713] page dumped because: kasan: bad access detected [ 548.605203] Memory state around the buggy address: [ 548.606198] ffff8801e9c26480: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 548.607676] ffff8801e9c26500: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 548.609157] >ffff8801e9c26580: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 548.610629] ^ [ 548.612088] ffff8801e9c26600: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 548.613674] ffff8801e9c26680: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 548.615141] ================================================================== [ 548.616613] Disabling lock debugging due to kernel taint [ 548.622871] WARNING: CPU: 1 PID: 1295 at mm/page_alloc.c:4065 __alloc_pages_slowpath+0xe4a/0x1420 [ 548.622878] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy [ 548.623217] CPU: 1 PID: 1295 Comm: mount Tainted: G B 4.18.0-rc1+ #4 [ 548.623219] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 548.623226] RIP: 0010:__alloc_pages_slowpath+0xe4a/0x1420 [ 548.623227] Code: ff ff 01 89 85 c8 fe ff ff e9 91 fc ff ff 41 89 c5 e9 5c fc ff ff 0f 0b 89 f8 25 ff ff f7 ff 89 85 8c fe ff ff e9 d5 f2 ff ff <0f> 0b e9 65 f2 ff ff 65 8b 05 38 81 d2 47 f6 c4 01 74 1c 65 48 8b [ 548.623281] RSP: 0018:ffff8801f28c7678 EFLAGS: 00010246 [ 548.623284] RAX: 0000000000000000 RBX: 00000000006040c0 RCX: ffffffffb82f73b7 [ 548.623287] RDX: 1ffff1003e518eeb RSI: 000000000000000c RDI: 0000000000000000 [ 548.623290] RBP: ffff8801f28c7880 R08: 0000000000000000 R09: ffffed0047fff2c5 [ 548.623292] R10: 0000000000000001 R11: ffffed0047fff2c4 R12: ffff8801e88de040 [ 548.623295] R13: 00000000006040c0 R14: 000000000000000c R15: ffff8801f28c7938 [ 548.623299] FS: 00007f76fca51840(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000 [ 548.623302] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 548.623304] CR2: 00007f19b9171760 CR3: 00000001ed952000 CR4: 00000000000006e0 [ 548.623317] Call Trace: [ 548.623325] ? kasan_check_read+0x11/0x20 [ 548.623330] ? __zone_watermark_ok+0x92/0x240 [ 548.623336] ? get_page_from_freelist+0x1c3/0x1d90 [ 548.623347] ? _raw_spin_lock_irqsave+0x2a/0x60 [ 548.623353] ? warn_alloc+0x250/0x250 [ 548.623358] ? save_stack+0x46/0xd0 [ 548.623361] ? kasan_kmalloc+0xad/0xe0 [ 548.623366] ? __isolate_free_page+0x2a0/0x2a0 [ 548.623370] ? mount_fs+0x60/0x1a0 [ 548.623374] ? vfs_kern_mount+0x6b/0x1a0 [ 548.623378] ? do_mount+0x34a/0x18c0 [ 548.623383] ? ksys_mount+0x83/0xd0 [ 548.623387] ? __x64_sys_mount+0x67/0x80 [ 548.623391] ? do_syscall_64+0x78/0x170 [ 548.623396] ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 548.623401] __alloc_pages_nodemask+0x3c5/0x400 [ 548.623407] ? __alloc_pages_slowpath+0x1420/0x1420 [ 548.623412] ? __mutex_lock_slowpath+0x20/0x20 [ 548.623417] ? kvmalloc_node+0x31/0x80 [ 548.623424] alloc_pages_current+0x75/0x110 [ 548.623436] kmalloc_order+0x24/0x60 [ 548.623442] kmalloc_order_trace+0x24/0xb0 [ 548.623448] __kmalloc_track_caller+0x207/0x220 [ 548.623455] ? f2fs_build_node_manager+0x399/0xbb0 [ 548.623460] kmemdup+0x20/0x50 [ 548.623465] f2fs_build_node_manager+0x399/0xbb0 [ 548.623470] f2fs_fill_super+0x195e/0x2b40 [ 548.623477] ? f2fs_commit_super+0x1b0/0x1b0 [ 548.623481] ? set_blocksize+0x90/0x140 [ 548.623486] mount_bdev+0x1c5/0x210 [ 548.623489] ? f2fs_commit_super+0x1b0/0x1b0 [ 548.623495] f2fs_mount+0x15/0x20 [ 548.623498] mount_fs+0x60/0x1a0 [ 548.623503] ? alloc_vfsmnt+0x309/0x360 [ 548.623508] vfs_kern_mount+0x6b/0x1a0 [ 548.623513] do_mount+0x34a/0x18c0 [ 548.623518] ? lockref_put_or_lock+0xcf/0x160 [ 548.623523] ? copy_mount_string+0x20/0x20 [ 548.623528] ? memcg_kmem_put_cache+0x1b/0xa0 [ 548.623533] ? kasan_check_write+0x14/0x20 [ 548.623537] ? _copy_from_user+0x6a/0x90 [ 548.623542] ? memdup_user+0x42/0x60 [ 548.623547] ksys_mount+0x83/0xd0 [ 548.623552] __x64_sys_mount+0x67/0x80 [ 548.623557] do_syscall_64+0x78/0x170 [ 548.623562] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 548.623566] RIP: 0033:0x7f76fc331b9a [ 548.623567] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48 [ 548.623632] RSP: 002b:00007ffd4f0a0e48 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 [ 548.623636] RAX: ffffffffffffffda RBX: 000000000146c030 RCX: 00007f76fc331b9a [ 548.623639] RDX: 000000000146c210 RSI: 000000000146df30 RDI: 0000000001474ec0 [ 548.623641] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013 [ 548.623643] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000000001474ec0 [ 548.623646] R13: 000000000146c210 R14: 0000000000000000 R15: 0000000000000003 [ 548.623650] ---[ end trace 4ce02f25ff7d3df5 ]--- [ 548.623656] F2FS-fs (loop0): Failed to initialize F2FS node manager [ 548.627936] F2FS-fs (loop0): Invalid log blocks per segment (8201) [ 548.627940] F2FS-fs (loop0): Can't find valid F2FS filesystem in 1th superblock [ 548.635835] F2FS-fs (loop0): Failed to initialize F2FS node manager - Location https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/f2fs/segment.c#L3578 sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL); Buffer overrun happens when doing memcpy. I suspect there is missing (inconsistent) checks on bitmap_size. Reported by Wen Xu (wen.xu@gatech.edu) from SSLab, Gatech. Reported-by: Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 27e30a3d99bd7..9b903eca9d385 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2280,12 +2280,17 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); unsigned int ovp_segments, reserved_segments; unsigned int main_segs, blocks_per_seg; + unsigned int sit_segs, nat_segs; + unsigned int sit_bitmap_size, nat_bitmap_size; + unsigned int log_blocks_per_seg; int i; total = le32_to_cpu(raw_super->segment_count); fsmeta = le32_to_cpu(raw_super->segment_count_ckpt); - fsmeta += le32_to_cpu(raw_super->segment_count_sit); - fsmeta += le32_to_cpu(raw_super->segment_count_nat); + sit_segs = le32_to_cpu(raw_super->segment_count_sit); + fsmeta += sit_segs; + nat_segs = le32_to_cpu(raw_super->segment_count_nat); + fsmeta += nat_segs; fsmeta += le32_to_cpu(ckpt->rsvd_segment_count); fsmeta += le32_to_cpu(raw_super->segment_count_ssa); @@ -2316,6 +2321,18 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) return 1; } + sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize); + nat_bitmap_size = le32_to_cpu(ckpt->nat_ver_bitmap_bytesize); + log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); + + if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 || + nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) { + f2fs_msg(sbi->sb, KERN_ERR, + "Wrong bitmap size: sit: %u, nat:%u", + sit_bitmap_size, nat_bitmap_size); + return 1; + } + if (unlikely(f2fs_cp_error(sbi))) { f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); return 1; -- GitLab From d69ffa5bdd2693f4baa9f8f120cbd57252f4a405 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 25 Jun 2018 20:33:24 +0800 Subject: [PATCH 0023/2269] f2fs: fix to correct return value of f2fs_trim_fs We should account trimmed block number from __wait_all_discard_cmd in __issue_discard_cmd_range, otherwise trimmed blocks returned by f2fs_trim_fs will be wrong, this patch fixes it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d63d89287c53e..47b6595a078c5 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1320,21 +1320,22 @@ next: return trimmed; } -static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi, +static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy) { struct discard_policy dp; + unsigned int discard_blks; - if (dpolicy) { - __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX); - return; - } + if (dpolicy) + return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX); /* wait all */ __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1); - __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX); + discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX); __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1); - __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX); + discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX); + + return discard_blks; } /* This should be covered by global mutex, &sit_i->sentry_lock */ @@ -2368,7 +2369,7 @@ bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, return has_candidate; } -static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi, +static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy, unsigned int start, unsigned int end) { @@ -2378,6 +2379,7 @@ static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi, struct discard_cmd *dc; struct blk_plug plug; int issued; + unsigned int trimmed = 0; next: issued = 0; @@ -2415,7 +2417,7 @@ next: blk_finish_plug(&plug); mutex_unlock(&dcc->cmd_lock); - __wait_all_discard_cmd(sbi, NULL); + trimmed += __wait_all_discard_cmd(sbi, NULL); congestion_wait(BLK_RW_ASYNC, HZ/50); goto next; } @@ -2429,6 +2431,8 @@ skip: blk_finish_plug(&plug); mutex_unlock(&dcc->cmd_lock); + + return trimmed; } int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) @@ -2486,9 +2490,10 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) end_block = START_BLOCK(sbi, end_segno + 1); __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen); - __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block); + trimmed = __issue_discard_cmd_range(sbi, &dpolicy, + start_block, end_block); - trimmed = __wait_discard_cmd_range(sbi, &dpolicy, + trimmed += __wait_discard_cmd_range(sbi, &dpolicy, start_block, end_block); range->len = F2FS_BLK_TO_BYTES(trimmed); out: -- GitLab From 57453c6f9f6980d7621da6dacd9e9850da29c0b7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 25 Jun 2018 23:29:49 +0800 Subject: [PATCH 0024/2269] f2fs: fix to do sanity check with extra_attr feature If FI_EXTRA_ATTR is set in inode by fuzzing, inode.i_addr[0] will be parsed as inode.i_extra_isize, then in __recover_inline_status, inline data address will beyond boundary of page, result in accessing invalid memory. So in this condition, during reading inode page, let's do sanity check with EXTRA_ATTR feature of fs and extra_attr bit of inode, if they're inconsistent, deny to load this inode. - Overview Out-of-bound access in f2fs_iget() when mounting a corrupted f2fs image - Reproduce The following message will be got in KASAN build of 4.18 upstream kernel. [ 819.392227] ================================================================== [ 819.393901] BUG: KASAN: slab-out-of-bounds in f2fs_iget+0x736/0x1530 [ 819.395329] Read of size 4 at addr ffff8801f099c968 by task mount/1292 [ 819.397079] CPU: 1 PID: 1292 Comm: mount Not tainted 4.18.0-rc1+ #4 [ 819.397082] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 819.397088] Call Trace: [ 819.397124] dump_stack+0x7b/0xb5 [ 819.397154] print_address_description+0x70/0x290 [ 819.397159] kasan_report+0x291/0x390 [ 819.397163] ? f2fs_iget+0x736/0x1530 [ 819.397176] check_memory_region+0x139/0x190 [ 819.397182] __asan_loadN+0xf/0x20 [ 819.397185] f2fs_iget+0x736/0x1530 [ 819.397197] f2fs_fill_super+0x1b4f/0x2b40 [ 819.397202] ? f2fs_fill_super+0x1b4f/0x2b40 [ 819.397208] ? f2fs_commit_super+0x1b0/0x1b0 [ 819.397227] ? set_blocksize+0x90/0x140 [ 819.397241] mount_bdev+0x1c5/0x210 [ 819.397245] ? f2fs_commit_super+0x1b0/0x1b0 [ 819.397252] f2fs_mount+0x15/0x20 [ 819.397256] mount_fs+0x60/0x1a0 [ 819.397267] ? alloc_vfsmnt+0x309/0x360 [ 819.397272] vfs_kern_mount+0x6b/0x1a0 [ 819.397282] do_mount+0x34a/0x18c0 [ 819.397300] ? lockref_put_or_lock+0xcf/0x160 [ 819.397306] ? copy_mount_string+0x20/0x20 [ 819.397318] ? memcg_kmem_put_cache+0x1b/0xa0 [ 819.397324] ? kasan_check_write+0x14/0x20 [ 819.397334] ? _copy_from_user+0x6a/0x90 [ 819.397353] ? memdup_user+0x42/0x60 [ 819.397359] ksys_mount+0x83/0xd0 [ 819.397365] __x64_sys_mount+0x67/0x80 [ 819.397388] do_syscall_64+0x78/0x170 [ 819.397403] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 819.397422] RIP: 0033:0x7f54c667cb9a [ 819.397424] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48 [ 819.397483] RSP: 002b:00007ffd8f46cd08 EFLAGS: 00000202 ORIG_RAX: 00000000000000a5 [ 819.397496] RAX: ffffffffffffffda RBX: 0000000000dfa030 RCX: 00007f54c667cb9a [ 819.397498] RDX: 0000000000dfa210 RSI: 0000000000dfbf30 RDI: 0000000000e02ec0 [ 819.397501] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013 [ 819.397503] R10: 00000000c0ed0000 R11: 0000000000000202 R12: 0000000000e02ec0 [ 819.397505] R13: 0000000000dfa210 R14: 0000000000000000 R15: 0000000000000003 [ 819.397866] Allocated by task 139: [ 819.398702] save_stack+0x46/0xd0 [ 819.398705] kasan_kmalloc+0xad/0xe0 [ 819.398709] kasan_slab_alloc+0x11/0x20 [ 819.398713] kmem_cache_alloc+0xd1/0x1e0 [ 819.398717] dup_fd+0x50/0x4c0 [ 819.398740] copy_process.part.37+0xbed/0x32e0 [ 819.398744] _do_fork+0x16e/0x590 [ 819.398748] __x64_sys_clone+0x69/0x80 [ 819.398752] do_syscall_64+0x78/0x170 [ 819.398756] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 819.399097] Freed by task 159: [ 819.399743] save_stack+0x46/0xd0 [ 819.399747] __kasan_slab_free+0x13c/0x1a0 [ 819.399750] kasan_slab_free+0xe/0x10 [ 819.399754] kmem_cache_free+0x89/0x1e0 [ 819.399757] put_files_struct+0x132/0x150 [ 819.399761] exit_files+0x62/0x70 [ 819.399766] do_exit+0x47b/0x1390 [ 819.399770] do_group_exit+0x86/0x130 [ 819.399774] __x64_sys_exit_group+0x2c/0x30 [ 819.399778] do_syscall_64+0x78/0x170 [ 819.399782] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 819.400115] The buggy address belongs to the object at ffff8801f099c680 which belongs to the cache files_cache of size 704 [ 819.403234] The buggy address is located 40 bytes to the right of 704-byte region [ffff8801f099c680, ffff8801f099c940) [ 819.405689] The buggy address belongs to the page: [ 819.406709] page:ffffea0007c26700 count:1 mapcount:0 mapping:ffff8801f69a3340 index:0xffff8801f099d380 compound_mapcount: 0 [ 819.408984] flags: 0x2ffff0000008100(slab|head) [ 819.409932] raw: 02ffff0000008100 ffffea00077fb600 0000000200000002 ffff8801f69a3340 [ 819.411514] raw: ffff8801f099d380 0000000080130000 00000001ffffffff 0000000000000000 [ 819.413073] page dumped because: kasan: bad access detected [ 819.414539] Memory state around the buggy address: [ 819.415521] ffff8801f099c800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 819.416981] ffff8801f099c880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 819.418454] >ffff8801f099c900: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 819.419921] ^ [ 819.421265] ffff8801f099c980: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb [ 819.422745] ffff8801f099ca00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 819.424206] ================================================================== [ 819.425668] Disabling lock debugging due to kernel taint [ 819.457463] F2FS-fs (loop0): Mounted with checkpoint version = 3 The kernel still mounts the image. If you run the following program on the mounted folder mnt, (poc.c) static void activity(char *mpoint) { char *foo_bar_baz; int err; static int buf[8192]; memset(buf, 0, sizeof(buf)); err = asprintf(&foo_bar_baz, "%s/foo/bar/baz", mpoint); int fd = open(foo_bar_baz, O_RDONLY, 0); if (fd >= 0) { read(fd, (char *)buf, 11); close(fd); } } int main(int argc, char *argv[]) { activity(argv[1]); return 0; } You can get kernel crash: [ 819.457463] F2FS-fs (loop0): Mounted with checkpoint version = 3 [ 918.028501] BUG: unable to handle kernel paging request at ffffed0048000d82 [ 918.044020] PGD 23ffee067 P4D 23ffee067 PUD 23fbef067 PMD 0 [ 918.045207] Oops: 0000 [#1] SMP KASAN PTI [ 918.046048] CPU: 0 PID: 1309 Comm: poc Tainted: G B 4.18.0-rc1+ #4 [ 918.047573] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 918.049552] RIP: 0010:check_memory_region+0x5e/0x190 [ 918.050565] Code: f8 49 c1 e8 03 49 89 db 49 c1 eb 03 4d 01 cb 4d 01 c1 4d 8d 63 01 4c 89 c8 4d 89 e2 4d 29 ca 49 83 fa 10 7f 3d 4d 85 d2 74 32 <41> 80 39 00 75 23 48 b8 01 00 00 00 00 fc ff df 4d 01 d1 49 01 c0 [ 918.054322] RSP: 0018:ffff8801e3a1f258 EFLAGS: 00010202 [ 918.055400] RAX: ffffed0048000d82 RBX: ffff880240006c11 RCX: ffffffffb8867d14 [ 918.056832] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff880240006c10 [ 918.058253] RBP: ffff8801e3a1f268 R08: 1ffff10048000d82 R09: ffffed0048000d82 [ 918.059717] R10: 0000000000000001 R11: ffffed0048000d82 R12: ffffed0048000d83 [ 918.061159] R13: ffff8801e3a1f390 R14: 0000000000000000 R15: ffff880240006c08 [ 918.062614] FS: 00007fac9732c700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 [ 918.064246] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 918.065412] CR2: ffffed0048000d82 CR3: 00000001df77a000 CR4: 00000000000006f0 [ 918.066882] Call Trace: [ 918.067410] __asan_loadN+0xf/0x20 [ 918.068149] f2fs_find_target_dentry+0xf4/0x270 [ 918.069083] ? __get_node_page+0x331/0x5b0 [ 918.069925] f2fs_find_in_inline_dir+0x24b/0x310 [ 918.070881] ? f2fs_recover_inline_data+0x4c0/0x4c0 [ 918.071905] ? unwind_next_frame.part.5+0x34f/0x490 [ 918.072901] ? unwind_dump+0x290/0x290 [ 918.073695] ? is_bpf_text_address+0xe/0x20 [ 918.074566] __f2fs_find_entry+0x599/0x670 [ 918.075408] ? kasan_unpoison_shadow+0x36/0x50 [ 918.076315] ? kasan_kmalloc+0xad/0xe0 [ 918.077100] ? memcg_kmem_put_cache+0x55/0xa0 [ 918.077998] ? f2fs_find_target_dentry+0x270/0x270 [ 918.079006] ? d_set_d_op+0x30/0x100 [ 918.079749] ? __d_lookup_rcu+0x69/0x2e0 [ 918.080556] ? __d_alloc+0x275/0x450 [ 918.081297] ? kasan_check_write+0x14/0x20 [ 918.082135] ? memset+0x31/0x40 [ 918.082820] ? fscrypt_setup_filename+0x1ec/0x4c0 [ 918.083782] ? d_alloc_parallel+0x5bb/0x8c0 [ 918.084640] f2fs_find_entry+0xe9/0x110 [ 918.085432] ? __f2fs_find_entry+0x670/0x670 [ 918.086308] ? kasan_check_write+0x14/0x20 [ 918.087163] f2fs_lookup+0x297/0x590 [ 918.087902] ? f2fs_link+0x2b0/0x2b0 [ 918.088646] ? legitimize_path.isra.29+0x61/0xa0 [ 918.089589] __lookup_slow+0x12e/0x240 [ 918.090371] ? may_delete+0x2b0/0x2b0 [ 918.091123] ? __nd_alloc_stack+0xa0/0xa0 [ 918.091944] lookup_slow+0x44/0x60 [ 918.092642] walk_component+0x3ee/0xa40 [ 918.093428] ? is_bpf_text_address+0xe/0x20 [ 918.094283] ? pick_link+0x3e0/0x3e0 [ 918.095047] ? in_group_p+0xa5/0xe0 [ 918.095771] ? generic_permission+0x53/0x1e0 [ 918.096666] ? security_inode_permission+0x1d/0x70 [ 918.097646] ? inode_permission+0x7a/0x1f0 [ 918.098497] link_path_walk+0x2a2/0x7b0 [ 918.099298] ? apparmor_capget+0x3d0/0x3d0 [ 918.100140] ? walk_component+0xa40/0xa40 [ 918.100958] ? path_init+0x2e6/0x580 [ 918.101695] path_openat+0x1bb/0x2160 [ 918.102471] ? __save_stack_trace+0x92/0x100 [ 918.103352] ? save_stack+0xb5/0xd0 [ 918.104070] ? vfs_unlink+0x250/0x250 [ 918.104822] ? save_stack+0x46/0xd0 [ 918.105538] ? kasan_slab_alloc+0x11/0x20 [ 918.106370] ? kmem_cache_alloc+0xd1/0x1e0 [ 918.107213] ? getname_flags+0x76/0x2c0 [ 918.107997] ? getname+0x12/0x20 [ 918.108677] ? do_sys_open+0x14b/0x2c0 [ 918.109450] ? __x64_sys_open+0x4c/0x60 [ 918.110255] ? do_syscall_64+0x78/0x170 [ 918.111083] ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 918.112148] ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 918.113204] ? f2fs_empty_inline_dir+0x1e0/0x1e0 [ 918.114150] ? timespec64_trunc+0x5c/0x90 [ 918.114993] ? wb_io_lists_depopulated+0x1a/0xc0 [ 918.115937] ? inode_io_list_move_locked+0x102/0x110 [ 918.116949] do_filp_open+0x12b/0x1d0 [ 918.117709] ? may_open_dev+0x50/0x50 [ 918.118475] ? kasan_kmalloc+0xad/0xe0 [ 918.119246] do_sys_open+0x17c/0x2c0 [ 918.119983] ? do_sys_open+0x17c/0x2c0 [ 918.120751] ? filp_open+0x60/0x60 [ 918.121463] ? task_work_run+0x4d/0xf0 [ 918.122237] __x64_sys_open+0x4c/0x60 [ 918.123001] do_syscall_64+0x78/0x170 [ 918.123759] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 918.124802] RIP: 0033:0x7fac96e3e040 [ 918.125537] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 83 3d 09 27 2d 00 00 75 10 b8 02 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 7e e0 01 00 48 89 04 24 [ 918.129341] RSP: 002b:00007fff1b37f848 EFLAGS: 00000246 ORIG_RAX: 0000000000000002 [ 918.130870] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fac96e3e040 [ 918.132295] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 000000000122d080 [ 918.133748] RBP: 00007fff1b37f9b0 R08: 00007fac9710bbd8 R09: 0000000000000001 [ 918.135209] R10: 000000000000069d R11: 0000000000000246 R12: 0000000000400c20 [ 918.136650] R13: 00007fff1b37fab0 R14: 0000000000000000 R15: 0000000000000000 [ 918.138093] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy [ 918.147924] CR2: ffffed0048000d82 [ 918.148619] ---[ end trace 4ce02f25ff7d3df5 ]--- [ 918.149563] RIP: 0010:check_memory_region+0x5e/0x190 [ 918.150576] Code: f8 49 c1 e8 03 49 89 db 49 c1 eb 03 4d 01 cb 4d 01 c1 4d 8d 63 01 4c 89 c8 4d 89 e2 4d 29 ca 49 83 fa 10 7f 3d 4d 85 d2 74 32 <41> 80 39 00 75 23 48 b8 01 00 00 00 00 fc ff df 4d 01 d1 49 01 c0 [ 918.154360] RSP: 0018:ffff8801e3a1f258 EFLAGS: 00010202 [ 918.155411] RAX: ffffed0048000d82 RBX: ffff880240006c11 RCX: ffffffffb8867d14 [ 918.156833] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff880240006c10 [ 918.158257] RBP: ffff8801e3a1f268 R08: 1ffff10048000d82 R09: ffffed0048000d82 [ 918.159722] R10: 0000000000000001 R11: ffffed0048000d82 R12: ffffed0048000d83 [ 918.161149] R13: ffff8801e3a1f390 R14: 0000000000000000 R15: ffff880240006c08 [ 918.162587] FS: 00007fac9732c700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 [ 918.164203] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 918.165356] CR2: ffffed0048000d82 CR3: 00000001df77a000 CR4: 00000000000006f0 Reported-by: Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index e9cfcdbbe24cc..22810d30c0543 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -205,6 +205,16 @@ static bool sanity_check_inode(struct inode *inode) __func__, inode->i_ino); return false; } + + if (f2fs_has_extra_attr(inode) && + !f2fs_sb_has_extra_attr(sbi->sb)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: inode (ino=%lx) is with extra_attr, " + "but extra_attr feature is off", + __func__, inode->i_ino); + return false; + } return true; } @@ -257,6 +267,11 @@ static int do_read_inode(struct inode *inode) get_inline_info(inode, ri); + if (!sanity_check_inode(inode)) { + f2fs_put_page(node_page, 1); + return -EINVAL; + } + fi->i_extra_isize = f2fs_has_extra_attr(inode) ? le16_to_cpu(ri->i_extra_isize) : 0; @@ -338,10 +353,6 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) ret = do_read_inode(inode); if (ret) goto bad_inode; - if (!sanity_check_inode(inode)) { - ret = -EINVAL; - goto bad_inode; - } make_now: if (ino == F2FS_NODE_INO(sbi)) { inode->i_mapping->a_ops = &f2fs_node_aops; -- GitLab From 9bc2ba602ff89f54b8d4b82e97d6d81480edf9d6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 27 Jun 2018 18:05:54 +0800 Subject: [PATCH 0025/2269] f2fs: fix to do sanity check with user_block_count This patch fixs to do sanity check with user_block_count. - Overview Divide zero in utilization when mount() a corrupted f2fs image - Reproduce (4.18 upstream kernel) - Kernel message [ 564.099503] F2FS-fs (loop0): invalid crc value [ 564.101991] divide error: 0000 [#1] SMP KASAN PTI [ 564.103103] CPU: 1 PID: 1298 Comm: f2fs_discard-7: Not tainted 4.18.0-rc1+ #4 [ 564.104584] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 564.106624] RIP: 0010:issue_discard_thread+0x248/0x5c0 [ 564.107692] Code: ff ff 48 8b bd e8 fe ff ff 41 8b 9d 4c 04 00 00 e8 cd b8 ad ff 41 8b 85 50 04 00 00 31 d2 48 8d 04 80 48 8d 04 80 48 c1 e0 02 <48> f7 f3 83 f8 50 7e 16 41 c7 86 7c ff ff ff 01 00 00 00 41 c7 86 [ 564.111686] RSP: 0018:ffff8801f3117dc0 EFLAGS: 00010206 [ 564.112775] RAX: 0000000000000384 RBX: 0000000000000000 RCX: ffffffffb88c1e03 [ 564.114250] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffff8801e3aa4850 [ 564.115706] RBP: ffff8801f3117f00 R08: 1ffffffff751a1d0 R09: fffffbfff751a1d0 [ 564.117177] R10: 0000000000000001 R11: fffffbfff751a1d0 R12: 00000000fffffffc [ 564.118634] R13: ffff8801e3aa4400 R14: ffff8801f3117ed8 R15: ffff8801e2050000 [ 564.120094] FS: 0000000000000000(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000 [ 564.121748] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 564.122923] CR2: 000000000202b078 CR3: 00000001f11ac000 CR4: 00000000000006e0 [ 564.124383] Call Trace: [ 564.124924] ? __issue_discard_cmd+0x480/0x480 [ 564.125882] ? __sched_text_start+0x8/0x8 [ 564.126756] ? __kthread_parkme+0xcb/0x100 [ 564.127620] ? kthread_blkcg+0x70/0x70 [ 564.128412] kthread+0x180/0x1d0 [ 564.129105] ? __issue_discard_cmd+0x480/0x480 [ 564.130029] ? kthread_associate_blkcg+0x150/0x150 [ 564.131033] ret_from_fork+0x35/0x40 [ 564.131794] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy [ 564.141798] ---[ end trace 4ce02f25ff7d3df5 ]--- [ 564.142773] RIP: 0010:issue_discard_thread+0x248/0x5c0 [ 564.143885] Code: ff ff 48 8b bd e8 fe ff ff 41 8b 9d 4c 04 00 00 e8 cd b8 ad ff 41 8b 85 50 04 00 00 31 d2 48 8d 04 80 48 8d 04 80 48 c1 e0 02 <48> f7 f3 83 f8 50 7e 16 41 c7 86 7c ff ff ff 01 00 00 00 41 c7 86 [ 564.147776] RSP: 0018:ffff8801f3117dc0 EFLAGS: 00010206 [ 564.148856] RAX: 0000000000000384 RBX: 0000000000000000 RCX: ffffffffb88c1e03 [ 564.150424] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffff8801e3aa4850 [ 564.151906] RBP: ffff8801f3117f00 R08: 1ffffffff751a1d0 R09: fffffbfff751a1d0 [ 564.153463] R10: 0000000000000001 R11: fffffbfff751a1d0 R12: 00000000fffffffc [ 564.154915] R13: ffff8801e3aa4400 R14: ffff8801f3117ed8 R15: ffff8801e2050000 [ 564.156405] FS: 0000000000000000(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000 [ 564.158070] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 564.159279] CR2: 000000000202b078 CR3: 00000001f11ac000 CR4: 00000000000006e0 [ 564.161043] ================================================================== [ 564.162587] BUG: KASAN: stack-out-of-bounds in from_kuid_munged+0x1d/0x50 [ 564.163994] Read of size 4 at addr ffff8801f3117c84 by task f2fs_discard-7:/1298 [ 564.165852] CPU: 1 PID: 1298 Comm: f2fs_discard-7: Tainted: G D 4.18.0-rc1+ #4 [ 564.167593] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 564.169522] Call Trace: [ 564.170057] dump_stack+0x7b/0xb5 [ 564.170778] print_address_description+0x70/0x290 [ 564.171765] kasan_report+0x291/0x390 [ 564.172540] ? from_kuid_munged+0x1d/0x50 [ 564.173408] __asan_load4+0x78/0x80 [ 564.174148] from_kuid_munged+0x1d/0x50 [ 564.174962] do_notify_parent+0x1f5/0x4f0 [ 564.175808] ? send_sigqueue+0x390/0x390 [ 564.176639] ? css_set_move_task+0x152/0x340 [ 564.184197] do_exit+0x1290/0x1390 [ 564.184950] ? __issue_discard_cmd+0x480/0x480 [ 564.185884] ? mm_update_next_owner+0x380/0x380 [ 564.186829] ? __sched_text_start+0x8/0x8 [ 564.187672] ? __kthread_parkme+0xcb/0x100 [ 564.188528] ? kthread_blkcg+0x70/0x70 [ 564.189333] ? kthread+0x180/0x1d0 [ 564.190052] ? __issue_discard_cmd+0x480/0x480 [ 564.190983] rewind_stack_do_exit+0x17/0x20 [ 564.192190] The buggy address belongs to the page: [ 564.193213] page:ffffea0007cc45c0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 [ 564.194856] flags: 0x2ffff0000000000() [ 564.195644] raw: 02ffff0000000000 0000000000000000 dead000000000200 0000000000000000 [ 564.197247] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 [ 564.198826] page dumped because: kasan: bad access detected [ 564.200299] Memory state around the buggy address: [ 564.201306] ffff8801f3117b80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 564.202779] ffff8801f3117c00: 00 00 00 00 00 00 00 00 00 00 00 f3 f3 f3 f3 f3 [ 564.204252] >ffff8801f3117c80: f3 f3 f3 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 [ 564.205742] ^ [ 564.206424] ffff8801f3117d00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 564.207908] ffff8801f3117d80: f3 f3 f3 f3 f3 f3 f3 f3 00 00 00 00 00 00 00 00 [ 564.209389] ================================================================== [ 564.231795] F2FS-fs (loop0): Mounted with checkpoint version = 2 - Location https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/f2fs/segment.h#L586 return div_u64((u64)valid_user_blocks(sbi) * 100, sbi->user_block_count); Missing checks on sbi->user_block_count. Reported-by: Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9b903eca9d385..c4272522c36fb 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2283,6 +2283,8 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) unsigned int sit_segs, nat_segs; unsigned int sit_bitmap_size, nat_bitmap_size; unsigned int log_blocks_per_seg; + unsigned int segment_count_main; + block_t user_block_count; int i; total = le32_to_cpu(raw_super->segment_count); @@ -2307,6 +2309,16 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) return 1; } + user_block_count = le64_to_cpu(ckpt->user_block_count); + segment_count_main = le32_to_cpu(raw_super->segment_count_main); + log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); + if (!user_block_count || user_block_count >= + segment_count_main << log_blocks_per_seg) { + f2fs_msg(sbi->sb, KERN_ERR, + "Wrong user_block_count: %u", user_block_count); + return 1; + } + main_segs = le32_to_cpu(raw_super->segment_count_main); blocks_per_seg = sbi->blocks_per_seg; @@ -2323,7 +2335,6 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize); nat_bitmap_size = le32_to_cpu(ckpt->nat_ver_bitmap_bytesize); - log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 || nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) { -- GitLab From 63f7dcc1025b18a506c04276fb660ddbd39cbfce Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Wed, 27 Jun 2018 14:46:21 +0800 Subject: [PATCH 0026/2269] f2fs: Allocate and stat mem used by free nid bitmap more accurately This patch used f2fs_bitmap_size macro to calculate mem used by free nid bitmap, and stat used mem including aligned part. Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 3 ++- fs/f2fs/node.c | 2 +- include/linux/f2fs_fs.h | 5 ----- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 2d65e77ae5cf9..214a968962a1d 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -215,7 +215,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += sizeof(struct f2fs_nm_info); si->base_mem += __bitmap_size(sbi, NAT_BITMAP); si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS); - si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE; + si->base_mem += NM_I(sbi)->nat_blocks * + f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK); si->base_mem += NM_I(sbi)->nat_blocks / 8; si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 0562a34d43550..b45354eed7c8e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2797,7 +2797,7 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi) for (i = 0; i < nm_i->nat_blocks; i++) { nm_i->free_nid_bitmap[i] = f2fs_kvzalloc(sbi, - NAT_ENTRY_BITMAP_SIZE_ALIGNED, GFP_KERNEL); + f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK), GFP_KERNEL); if (!nm_i->free_nid_bitmap) return -ENOMEM; } diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index aa5db8b5521a5..f70f8ac9c4f44 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -304,11 +304,6 @@ struct f2fs_node { * For NAT entries */ #define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry)) -#define NAT_ENTRY_BITMAP_SIZE ((NAT_ENTRY_PER_BLOCK + 7) / 8) -#define NAT_ENTRY_BITMAP_SIZE_ALIGNED \ - ((NAT_ENTRY_BITMAP_SIZE + BITS_PER_LONG - 1) / \ - BITS_PER_LONG * BITS_PER_LONG) - struct f2fs_nat_entry { __u8 version; /* latest version of cached nat entry */ -- GitLab From a314f364010888c1b21774d25efe6b08f95fc2e0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 29 Jun 2018 13:55:22 +0800 Subject: [PATCH 0027/2269] f2fs: fix to do sanity check with node footer and iblocks This patch adds to do sanity check with below fields of inode to avoid reported panic. - node footer - iblocks https://bugzilla.kernel.org/show_bug.cgi?id=200223 - Overview BUG() triggered in f2fs_truncate_inode_blocks() when un-mounting a mounted f2fs image after writing to it - Reproduce - POC (poc.c) static void activity(char *mpoint) { char *foo_bar_baz; int err; static int buf[8192]; memset(buf, 0, sizeof(buf)); err = asprintf(&foo_bar_baz, "%s/foo/bar/baz", mpoint); // open / write / read int fd = open(foo_bar_baz, O_RDWR | O_TRUNC, 0777); if (fd >= 0) { write(fd, (char *)buf, 517); write(fd, (char *)buf, sizeof(buf)); close(fd); } } int main(int argc, char *argv[]) { activity(argv[1]); return 0; } - Kernel meesage [ 552.479723] F2FS-fs (loop0): Mounted with checkpoint version = 2 [ 556.451891] ------------[ cut here ]------------ [ 556.451899] kernel BUG at fs/f2fs/node.c:987! [ 556.452920] invalid opcode: 0000 [#1] SMP KASAN PTI [ 556.453936] CPU: 1 PID: 1310 Comm: umount Not tainted 4.18.0-rc1+ #4 [ 556.455213] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 556.457140] RIP: 0010:f2fs_truncate_inode_blocks+0x4a7/0x6f0 [ 556.458280] Code: e8 ae ea ff ff 41 89 c7 c1 e8 1f 84 c0 74 0a 41 83 ff fe 0f 85 35 ff ff ff 81 85 b0 fe ff ff fb 03 00 00 e9 f7 fd ff ff 0f 0b <0f> 0b e8 62 b7 9a 00 48 8b bd a0 fe ff ff e8 56 54 ae ff 48 8b b5 [ 556.462015] RSP: 0018:ffff8801f292f808 EFLAGS: 00010286 [ 556.463068] RAX: ffffed003e73242d RBX: ffff8801f292f958 RCX: ffffffffb88b81bc [ 556.464479] RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffff8801f3992164 [ 556.465901] RBP: ffff8801f292f980 R08: ffffed003e73242d R09: ffffed003e73242d [ 556.467311] R10: 0000000000000001 R11: ffffed003e73242c R12: 00000000fffffc64 [ 556.468706] R13: ffff8801f3992000 R14: 0000000000000058 R15: 00000000ffff8801 [ 556.470117] FS: 00007f8029297840(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000 [ 556.471702] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 556.472838] CR2: 000055f5f57305d8 CR3: 00000001f18b0000 CR4: 00000000000006e0 [ 556.474265] Call Trace: [ 556.474782] ? f2fs_alloc_nid_failed+0xf0/0xf0 [ 556.475686] ? truncate_nodes+0x980/0x980 [ 556.476516] ? pagecache_get_page+0x21f/0x2f0 [ 556.477412] ? __asan_loadN+0xf/0x20 [ 556.478153] ? __get_node_page+0x331/0x5b0 [ 556.478992] ? reweight_entity+0x1e6/0x3b0 [ 556.479826] f2fs_truncate_blocks+0x55e/0x740 [ 556.480709] ? f2fs_truncate_data_blocks+0x20/0x20 [ 556.481689] ? __radix_tree_lookup+0x34/0x160 [ 556.482630] ? radix_tree_lookup+0xd/0x10 [ 556.483445] f2fs_truncate+0xd4/0x1a0 [ 556.484206] f2fs_evict_inode+0x5ce/0x630 [ 556.485032] evict+0x16f/0x290 [ 556.485664] iput+0x280/0x300 [ 556.486300] dentry_unlink_inode+0x165/0x1e0 [ 556.487169] __dentry_kill+0x16a/0x260 [ 556.487936] dentry_kill+0x70/0x250 [ 556.488651] shrink_dentry_list+0x125/0x260 [ 556.489504] shrink_dcache_parent+0xc1/0x110 [ 556.490379] ? shrink_dcache_sb+0x200/0x200 [ 556.491231] ? bit_wait_timeout+0xc0/0xc0 [ 556.492047] do_one_tree+0x12/0x40 [ 556.492743] shrink_dcache_for_umount+0x3f/0xa0 [ 556.493656] generic_shutdown_super+0x43/0x1c0 [ 556.494561] kill_block_super+0x52/0x80 [ 556.495341] kill_f2fs_super+0x62/0x70 [ 556.496105] deactivate_locked_super+0x6f/0xa0 [ 556.497004] deactivate_super+0x5e/0x80 [ 556.497785] cleanup_mnt+0x61/0xa0 [ 556.498492] __cleanup_mnt+0x12/0x20 [ 556.499218] task_work_run+0xc8/0xf0 [ 556.499949] exit_to_usermode_loop+0x125/0x130 [ 556.500846] do_syscall_64+0x138/0x170 [ 556.501609] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 556.502659] RIP: 0033:0x7f8028b77487 [ 556.503384] Code: 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 31 f6 e9 09 00 00 00 66 0f 1f 84 00 00 00 00 00 b8 a6 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e1 c9 2b 00 f7 d8 64 89 01 48 [ 556.507137] RSP: 002b:00007fff9f2e3598 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [ 556.508637] RAX: 0000000000000000 RBX: 0000000000ebd030 RCX: 00007f8028b77487 [ 556.510069] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000ec41e0 [ 556.511481] RBP: 0000000000ec41e0 R08: 0000000000000000 R09: 0000000000000014 [ 556.512892] R10: 00000000000006b2 R11: 0000000000000246 R12: 00007f802908083c [ 556.514320] R13: 0000000000000000 R14: 0000000000ebd210 R15: 00007fff9f2e3820 [ 556.515745] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy [ 556.529276] ---[ end trace 4ce02f25ff7d3df5 ]--- [ 556.530340] RIP: 0010:f2fs_truncate_inode_blocks+0x4a7/0x6f0 [ 556.531513] Code: e8 ae ea ff ff 41 89 c7 c1 e8 1f 84 c0 74 0a 41 83 ff fe 0f 85 35 ff ff ff 81 85 b0 fe ff ff fb 03 00 00 e9 f7 fd ff ff 0f 0b <0f> 0b e8 62 b7 9a 00 48 8b bd a0 fe ff ff e8 56 54 ae ff 48 8b b5 [ 556.535330] RSP: 0018:ffff8801f292f808 EFLAGS: 00010286 [ 556.536395] RAX: ffffed003e73242d RBX: ffff8801f292f958 RCX: ffffffffb88b81bc [ 556.537824] RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffff8801f3992164 [ 556.539290] RBP: ffff8801f292f980 R08: ffffed003e73242d R09: ffffed003e73242d [ 556.540709] R10: 0000000000000001 R11: ffffed003e73242c R12: 00000000fffffc64 [ 556.542131] R13: ffff8801f3992000 R14: 0000000000000058 R15: 00000000ffff8801 [ 556.543579] FS: 00007f8029297840(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000 [ 556.545180] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 556.546338] CR2: 000055f5f57305d8 CR3: 00000001f18b0000 CR4: 00000000000006e0 [ 556.547809] ================================================================== [ 556.549248] BUG: KASAN: stack-out-of-bounds in arch_tlb_gather_mmu+0x52/0x170 [ 556.550672] Write of size 8 at addr ffff8801f292fd10 by task umount/1310 [ 556.552338] CPU: 1 PID: 1310 Comm: umount Tainted: G D 4.18.0-rc1+ #4 [ 556.553886] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 556.555756] Call Trace: [ 556.556264] dump_stack+0x7b/0xb5 [ 556.556944] print_address_description+0x70/0x290 [ 556.557903] kasan_report+0x291/0x390 [ 556.558649] ? arch_tlb_gather_mmu+0x52/0x170 [ 556.559537] __asan_store8+0x57/0x90 [ 556.560268] arch_tlb_gather_mmu+0x52/0x170 [ 556.561110] tlb_gather_mmu+0x12/0x40 [ 556.561862] exit_mmap+0x123/0x2a0 [ 556.562555] ? __ia32_sys_munmap+0x50/0x50 [ 556.563384] ? exit_aio+0x98/0x230 [ 556.564079] ? __x32_compat_sys_io_submit+0x260/0x260 [ 556.565099] ? taskstats_exit+0x1f4/0x640 [ 556.565925] ? kasan_check_read+0x11/0x20 [ 556.566739] ? mm_update_next_owner+0x322/0x380 [ 556.567652] mmput+0x8b/0x1d0 [ 556.568260] do_exit+0x43a/0x1390 [ 556.568937] ? mm_update_next_owner+0x380/0x380 [ 556.569855] ? deactivate_super+0x5e/0x80 [ 556.570668] ? cleanup_mnt+0x61/0xa0 [ 556.571395] ? __cleanup_mnt+0x12/0x20 [ 556.572156] ? task_work_run+0xc8/0xf0 [ 556.572917] ? exit_to_usermode_loop+0x125/0x130 [ 556.573861] rewind_stack_do_exit+0x17/0x20 [ 556.574707] RIP: 0033:0x7f8028b77487 [ 556.575428] Code: Bad RIP value. [ 556.576106] RSP: 002b:00007fff9f2e3598 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [ 556.577599] RAX: 0000000000000000 RBX: 0000000000ebd030 RCX: 00007f8028b77487 [ 556.579020] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000ec41e0 [ 556.580422] RBP: 0000000000ec41e0 R08: 0000000000000000 R09: 0000000000000014 [ 556.581833] R10: 00000000000006b2 R11: 0000000000000246 R12: 00007f802908083c [ 556.583252] R13: 0000000000000000 R14: 0000000000ebd210 R15: 00007fff9f2e3820 [ 556.584983] The buggy address belongs to the page: [ 556.585961] page:ffffea0007ca4bc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 [ 556.587540] flags: 0x2ffff0000000000() [ 556.588296] raw: 02ffff0000000000 0000000000000000 dead000000000200 0000000000000000 [ 556.589822] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 [ 556.591359] page dumped because: kasan: bad access detected [ 556.592786] Memory state around the buggy address: [ 556.593753] ffff8801f292fc00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 556.595191] ffff8801f292fc80: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 00 00 00 [ 556.596613] >ffff8801f292fd00: 00 00 f3 00 00 00 00 f3 f3 00 00 00 00 f4 f4 f4 [ 556.598044] ^ [ 556.598797] ffff8801f292fd80: f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 [ 556.600225] ffff8801f292fe00: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 f4 f4 f4 [ 556.601647] ================================================================== - Location https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/f2fs/node.c#L987 case NODE_DIND_BLOCK: err = truncate_nodes(&dn, nofs, offset[1], 3); cont = 0; break; default: BUG(); <--- } Reported-by Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 22810d30c0543..f490393397d41 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -193,9 +193,30 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page) ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page)); } -static bool sanity_check_inode(struct inode *inode) +static bool sanity_check_inode(struct inode *inode, struct page *node_page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + unsigned long long iblocks; + + iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks); + if (!iblocks) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, " + "run fsck to fix.", + __func__, inode->i_ino, iblocks); + return false; + } + + if (ino_of_node(node_page) != nid_of_node(node_page)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: corrupted inode footer i_ino=%lx, ino,nid: " + "[%u, %u] run fsck to fix.", + __func__, inode->i_ino, + ino_of_node(node_page), nid_of_node(node_page)); + return false; + } if (f2fs_sb_has_flexible_inline_xattr(sbi->sb) && !f2fs_has_extra_attr(inode)) { @@ -267,7 +288,7 @@ static int do_read_inode(struct inode *inode) get_inline_info(inode, ri); - if (!sanity_check_inode(inode)) { + if (!sanity_check_inode(inode, node_page)) { f2fs_put_page(node_page, 1); return -EINVAL; } -- GitLab From 290cba1acc1b45d20278a51aaf128eedaeec1f0b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 30 Jun 2018 18:13:40 +0800 Subject: [PATCH 0028/2269] f2fs: fix to do sanity check with reserved blkaddr of inline inode As Wen Xu reported in bugzilla, after image was injected with random data by fuzzing, inline inode would contain invalid reserved blkaddr, then during inline conversion, we will encounter illegal memory accessing reported by KASAN, the root cause of this is when writing out converted inline page, we will use invalid reserved blkaddr to update sit bitmap, result in accessing memory beyond sit bitmap boundary. In order to fix this issue, let's do sanity check with reserved block address of inline inode to avoid above condition. https://bugzilla.kernel.org/show_bug.cgi?id=200179 [ 1428.846352] BUG: KASAN: use-after-free in update_sit_entry+0x80/0x7f0 [ 1428.846618] Read of size 4 at addr ffff880194483540 by task a.out/2741 [ 1428.846855] CPU: 0 PID: 2741 Comm: a.out Tainted: G W 4.17.0+ #1 [ 1428.846858] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 1428.846860] Call Trace: [ 1428.846868] dump_stack+0x71/0xab [ 1428.846875] print_address_description+0x6b/0x290 [ 1428.846881] kasan_report+0x28e/0x390 [ 1428.846888] ? update_sit_entry+0x80/0x7f0 [ 1428.846898] update_sit_entry+0x80/0x7f0 [ 1428.846906] f2fs_allocate_data_block+0x6db/0xc70 [ 1428.846914] ? f2fs_get_node_info+0x14f/0x590 [ 1428.846920] do_write_page+0xc8/0x150 [ 1428.846928] f2fs_outplace_write_data+0xfe/0x210 [ 1428.846935] ? f2fs_do_write_node_page+0x170/0x170 [ 1428.846941] ? radix_tree_tag_clear+0xff/0x130 [ 1428.846946] ? __mod_node_page_state+0x22/0xa0 [ 1428.846951] ? inc_zone_page_state+0x54/0x100 [ 1428.846956] ? __test_set_page_writeback+0x336/0x5d0 [ 1428.846964] f2fs_convert_inline_page+0x407/0x6d0 [ 1428.846971] ? f2fs_read_inline_data+0x3b0/0x3b0 [ 1428.846978] ? __get_node_page+0x335/0x6b0 [ 1428.846987] f2fs_convert_inline_inode+0x41b/0x500 [ 1428.846994] ? f2fs_convert_inline_page+0x6d0/0x6d0 [ 1428.847000] ? kasan_unpoison_shadow+0x31/0x40 [ 1428.847005] ? kasan_kmalloc+0xa6/0xd0 [ 1428.847024] f2fs_file_mmap+0x79/0xc0 [ 1428.847029] mmap_region+0x58b/0x880 [ 1428.847037] ? arch_get_unmapped_area+0x370/0x370 [ 1428.847042] do_mmap+0x55b/0x7a0 [ 1428.847048] vm_mmap_pgoff+0x16f/0x1c0 [ 1428.847055] ? vma_is_stack_for_current+0x50/0x50 [ 1428.847062] ? __fsnotify_update_child_dentry_flags.part.1+0x160/0x160 [ 1428.847068] ? do_sys_open+0x206/0x2a0 [ 1428.847073] ? __fget+0xb4/0x100 [ 1428.847079] ksys_mmap_pgoff+0x278/0x360 [ 1428.847085] ? find_mergeable_anon_vma+0x50/0x50 [ 1428.847091] do_syscall_64+0x73/0x160 [ 1428.847098] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 1428.847102] RIP: 0033:0x7fb1430766ba [ 1428.847103] Code: 89 f5 41 54 49 89 fc 55 53 74 35 49 63 e8 48 63 da 4d 89 f9 49 89 e8 4d 63 d6 48 89 da 4c 89 ee 4c 89 e7 b8 09 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 56 5b 5d 41 5c 41 5d 41 5e 41 5f c3 0f 1f 00 [ 1428.847162] RSP: 002b:00007ffc651d9388 EFLAGS: 00000246 ORIG_RAX: 0000000000000009 [ 1428.847167] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007fb1430766ba [ 1428.847170] RDX: 0000000000000001 RSI: 0000000000001000 RDI: 0000000000000000 [ 1428.847173] RBP: 0000000000000003 R08: 0000000000000003 R09: 0000000000000000 [ 1428.847176] R10: 0000000000008002 R11: 0000000000000246 R12: 0000000000000000 [ 1428.847179] R13: 0000000000001000 R14: 0000000000008002 R15: 0000000000000000 [ 1428.847252] Allocated by task 2683: [ 1428.847372] kasan_kmalloc+0xa6/0xd0 [ 1428.847380] kmem_cache_alloc+0xc8/0x1e0 [ 1428.847385] getname_flags+0x73/0x2b0 [ 1428.847390] user_path_at_empty+0x1d/0x40 [ 1428.847395] vfs_statx+0xc1/0x150 [ 1428.847401] __do_sys_newlstat+0x7e/0xd0 [ 1428.847405] do_syscall_64+0x73/0x160 [ 1428.847411] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 1428.847466] Freed by task 2683: [ 1428.847566] __kasan_slab_free+0x137/0x190 [ 1428.847571] kmem_cache_free+0x85/0x1e0 [ 1428.847575] filename_lookup+0x191/0x280 [ 1428.847580] vfs_statx+0xc1/0x150 [ 1428.847585] __do_sys_newlstat+0x7e/0xd0 [ 1428.847590] do_syscall_64+0x73/0x160 [ 1428.847596] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 1428.847648] The buggy address belongs to the object at ffff880194483300 which belongs to the cache names_cache of size 4096 [ 1428.847946] The buggy address is located 576 bytes inside of 4096-byte region [ffff880194483300, ffff880194484300) [ 1428.848234] The buggy address belongs to the page: [ 1428.848366] page:ffffea0006512000 count:1 mapcount:0 mapping:ffff8801f3586380 index:0x0 compound_mapcount: 0 [ 1428.848606] flags: 0x17fff8000008100(slab|head) [ 1428.848737] raw: 017fff8000008100 dead000000000100 dead000000000200 ffff8801f3586380 [ 1428.848931] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 [ 1428.849122] page dumped because: kasan: bad access detected [ 1428.849305] Memory state around the buggy address: [ 1428.849436] ffff880194483400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1428.849620] ffff880194483480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1428.849804] >ffff880194483500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1428.849985] ^ [ 1428.850120] ffff880194483580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1428.850303] ffff880194483600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1428.850498] ================================================================== Reported-by: Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 9a245d2d5b7c6..2bcb2d36f024e 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -130,6 +130,16 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) if (err) return err; + if (unlikely(dn->data_blkaddr != NEW_ADDR)) { + f2fs_put_dnode(dn); + set_sbi_flag(fio.sbi, SBI_NEED_FSCK); + f2fs_msg(fio.sbi->sb, KERN_WARNING, + "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, " + "run fsck to fix.", + __func__, dn->inode->i_ino, dn->data_blkaddr); + return -EINVAL; + } + f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page)); f2fs_do_read_inline_data(page, dn->inode_page); @@ -363,6 +373,17 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, if (err) goto out; + if (unlikely(dn.data_blkaddr != NEW_ADDR)) { + f2fs_put_dnode(&dn); + set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK); + f2fs_msg(F2FS_P_SB(page)->sb, KERN_WARNING, + "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, " + "run fsck to fix.", + __func__, dir->i_ino, dn.data_blkaddr); + err = -EINVAL; + goto out; + } + f2fs_wait_on_page_writeback(page, DATA, true); dentry_blk = page_address(page); -- GitLab From 0d6b75ec9265a3ff2c2ea2c935b2264271290939 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Sat, 30 Jun 2018 23:57:03 +0800 Subject: [PATCH 0029/2269] f2fs: avoid the global name 'fault_name' Non-prefix global name 'fault_name' will pollute global namespace, fix it. Refer to: https://lists.01.org/pipermail/kbuild-all/2018-June/049660.html To: Jaegeuk Kim To: Chao Yu Cc: linux-f2fs-devel@lists.sourceforge.net Cc: linux-kernel@vger.kernel.org Reported-by: kbuild test robot Signed-off-by: Gao Xiang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 ++-- fs/f2fs/super.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 181f48b707006..fd5ddce8210d5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -66,7 +66,7 @@ struct f2fs_fault_info { unsigned int inject_type; }; -extern char *fault_name[FAULT_MAX]; +extern char *f2fs_fault_name[FAULT_MAX]; #define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type))) #endif @@ -1284,7 +1284,7 @@ struct f2fs_sb_info { #ifdef CONFIG_F2FS_FAULT_INJECTION #define f2fs_show_injection_info(type) \ printk("%sF2FS-fs : inject %s in %s of %pF\n", \ - KERN_INFO, fault_name[type], \ + KERN_INFO, f2fs_fault_name[type], \ __func__, __builtin_return_address(0)) static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c4272522c36fb..61b0feb60ecbc 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -41,7 +41,7 @@ static struct kmem_cache *f2fs_inode_cachep; #ifdef CONFIG_F2FS_FAULT_INJECTION -char *fault_name[FAULT_MAX] = { +char *f2fs_fault_name[FAULT_MAX] = { [FAULT_KMALLOC] = "kmalloc", [FAULT_KVMALLOC] = "kvmalloc", [FAULT_PAGE_ALLOC] = "page alloc", -- GitLab From 518400f3612c18c462b6647a470721e0e550076f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 1 Jul 2018 13:57:06 -0700 Subject: [PATCH 0030/2269] f2fs: Replace strncpy with memcpy gcc 8.1.0 complains: fs/f2fs/namei.c: In function 'f2fs_update_extension_list': fs/f2fs/namei.c:257:3: warning: 'strncpy' output truncated before terminating nul copying as many bytes from a string as its length fs/f2fs/namei.c:249:3: warning: 'strncpy' output truncated before terminating nul copying as many bytes from a string as its length Using strncpy() is indeed less than perfect since the length of data to be copied has already been determined with strlen(). Replace strncpy() with memcpy() to address the warning and optimize the code a little. Signed-off-by: Guenter Roeck Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 2992219668a02..ce2e6af3054be 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -246,7 +246,7 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, return -EINVAL; if (hot) { - strncpy(extlist[count], name, strlen(name)); + memcpy(extlist[count], name, strlen(name)); sbi->raw_super->hot_ext_count = hot_count + 1; } else { char buf[F2FS_MAX_EXTENSION][F2FS_EXTENSION_LEN]; @@ -254,7 +254,7 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, memcpy(buf, &extlist[cold_count], F2FS_EXTENSION_LEN * hot_count); memset(extlist[cold_count], 0, F2FS_EXTENSION_LEN); - strncpy(extlist[cold_count], name, strlen(name)); + memcpy(extlist[cold_count], name, strlen(name)); memcpy(&extlist[cold_count + 1], buf, F2FS_EXTENSION_LEN * hot_count); sbi->raw_super->extension_count = cpu_to_le32(cold_count + 1); -- GitLab From 60f460025177b78ed898250e99250f3ed685fd78 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Mon, 2 Jul 2018 10:40:19 +0800 Subject: [PATCH 0031/2269] f2fs: check the right return value of memory alloc function This patch check the right return value of memory alloc function Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b45354eed7c8e..ec91f78c2c99f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2798,7 +2798,7 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi) for (i = 0; i < nm_i->nat_blocks; i++) { nm_i->free_nid_bitmap[i] = f2fs_kvzalloc(sbi, f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK), GFP_KERNEL); - if (!nm_i->free_nid_bitmap) + if (!nm_i->free_nid_bitmap[i]) return -ENOMEM; } -- GitLab From b46d535bdcfe0f0398c2db837f74ae98f24b961c Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Mon, 2 Jul 2018 11:37:40 +0530 Subject: [PATCH 0032/2269] f2fs: show the fsync_mode=nobarrier mount option This patch shows the fsync_mode=nobarrier mount option in f2fs_show_options(). Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 61b0feb60ecbc..148020ef7fddd 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1342,6 +1342,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",fsync_mode=%s", "posix"); else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) seq_printf(seq, ",fsync_mode=%s", "strict"); + else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_NOBARRIER) + seq_printf(seq, ",fsync_mode=%s", "nobarrier"); return 0; } -- GitLab From 1918081740c3128b58fe87b0f33fd40d45a5ec61 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 4 Jul 2018 18:04:10 +0800 Subject: [PATCH 0033/2269] f2fs: try grabbing node page lock aggressively in sync scenario In synchronous scenario, like in checkpoint(), we are going to flush dirty node pages to device synchronously, we can easily failed writebacking node page due to trylock_page() failure, especially in condition of intensive lock competition, which can cause long latency of checkpoint(). So let's use lock_page() in synchronous scenario to avoid this issue. Signed-off-by: Yunlei He Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index ec91f78c2c99f..0311e12f26e67 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1638,7 +1638,9 @@ next_step: !is_cold_node(page))) continue; lock_node: - if (!trylock_page(page)) + if (wbc->sync_mode == WB_SYNC_ALL) + lock_page(page); + else if (!trylock_page(page)) continue; if (unlikely(page->mapping != NODE_MAPPING(sbi))) { -- GitLab From 71d96f553631b64f03b1172603597ba34a55c06a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 4 Jul 2018 21:20:05 +0800 Subject: [PATCH 0034/2269] f2fs: fix to skip GC if type in SSA and SIT is inconsistent If segment type in SSA and SIT is inconsistent, we will encounter below BUG_ON during GC, to avoid this panic, let's just skip doing GC on such segment. The bug is triggered with image reported in below link: https://bugzilla.kernel.org/show_bug.cgi?id=200223 [ 388.060262] ------------[ cut here ]------------ [ 388.060268] kernel BUG at /home/y00370721/git/devf2fs/gc.c:989! [ 388.061172] invalid opcode: 0000 [#1] SMP [ 388.061773] Modules linked in: f2fs(O) bluetooth ecdh_generic xt_tcpudp iptable_filter ip_tables x_tables lp ttm drm_kms_helper drm intel_rapl sb_edac crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel fb_sys_fops ppdev aes_x86_64 syscopyarea crypto_simd sysfillrect parport_pc joydev sysimgblt glue_helper parport cryptd i2c_piix4 serio_raw mac_hid btrfs hid_generic usbhid hid raid6_pq psmouse pata_acpi floppy [ 388.064247] CPU: 7 PID: 4151 Comm: f2fs_gc-7:0 Tainted: G O 4.13.0-rc1+ #26 [ 388.065306] Hardware name: Xen HVM domU, BIOS 4.1.2_115-900.260_ 11/06/2015 [ 388.066058] task: ffff880201583b80 task.stack: ffffc90004d7c000 [ 388.069948] RIP: 0010:do_garbage_collect+0xcc8/0xcd0 [f2fs] [ 388.070766] RSP: 0018:ffffc90004d7fc68 EFLAGS: 00010202 [ 388.071783] RAX: ffff8801ed227000 RBX: 0000000000000001 RCX: ffffea0007b489c0 [ 388.072700] RDX: ffff880000000000 RSI: 0000000000000001 RDI: ffffea0007b489c0 [ 388.073607] RBP: ffffc90004d7fd58 R08: 0000000000000003 R09: ffffea0007b489dc [ 388.074619] R10: 0000000000000000 R11: 0052782ab317138d R12: 0000000000000018 [ 388.075625] R13: 0000000000000018 R14: ffff880211ceb000 R15: ffff880211ceb000 [ 388.076687] FS: 0000000000000000(0000) GS:ffff880214fc0000(0000) knlGS:0000000000000000 [ 388.083277] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 388.084536] CR2: 0000000000e18c60 CR3: 00000001ecf2e000 CR4: 00000000001406e0 [ 388.085748] Call Trace: [ 388.086690] ? find_next_bit+0xb/0x10 [ 388.088091] f2fs_gc+0x1a8/0x9d0 [f2fs] [ 388.088888] ? lock_timer_base+0x7d/0xa0 [ 388.090213] ? try_to_del_timer_sync+0x44/0x60 [ 388.091698] gc_thread_func+0x342/0x4b0 [f2fs] [ 388.092892] ? wait_woken+0x80/0x80 [ 388.094098] kthread+0x109/0x140 [ 388.095010] ? f2fs_gc+0x9d0/0x9d0 [f2fs] [ 388.096043] ? kthread_park+0x60/0x60 [ 388.097281] ret_from_fork+0x25/0x30 [ 388.098401] Code: ff ff 48 83 e8 01 48 89 44 24 58 e9 27 f8 ff ff 48 83 e8 01 e9 78 fc ff ff 48 8d 78 ff e9 17 fb ff ff 48 83 ef 01 e9 4d f4 ff ff <0f> 0b 66 0f 1f 44 00 00 0f 1f 44 00 00 55 48 89 e5 41 56 41 55 [ 388.100864] RIP: do_garbage_collect+0xcc8/0xcd0 [f2fs] RSP: ffffc90004d7fc68 [ 388.101810] ---[ end trace 81c73d6e6b7da61d ]--- Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 857dcc05df640..9be5b5007459b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -986,7 +986,13 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, goto next; sum = page_address(sum_page); - f2fs_bug_on(sbi, type != GET_SUM_TYPE((&sum->footer))); + if (type != GET_SUM_TYPE((&sum->footer))) { + f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent segment (%u) " + "type [%d, %d] in SSA and SIT", + segno, type, GET_SUM_TYPE((&sum->footer))); + set_sbi_flag(sbi, SBI_NEED_FSCK); + goto next; + } /* * this is to avoid deadlock: -- GitLab From 95a3d95a7ac1b3c7cb035a858d6dbc1f6be6e125 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 1 Aug 2018 19:13:44 +0800 Subject: [PATCH 0035/2269] f2fs: fix to do sanity check with block address in main area This patch add to do sanity check with below field: - cp_pack_total_block_count - blkaddr of data/node - extent info - Overview BUG() in verify_block_addr() when writing to a corrupted f2fs image - Reproduce (4.18 upstream kernel) - POC (poc.c) static void activity(char *mpoint) { char *foo_bar_baz; int err; static int buf[8192]; memset(buf, 0, sizeof(buf)); err = asprintf(&foo_bar_baz, "%s/foo/bar/baz", mpoint); int fd = open(foo_bar_baz, O_RDWR | O_TRUNC, 0777); if (fd >= 0) { write(fd, (char *)buf, sizeof(buf)); fdatasync(fd); close(fd); } } int main(int argc, char *argv[]) { activity(argv[1]); return 0; } - Kernel message [ 689.349473] F2FS-fs (loop0): Mounted with checkpoint version = 3 [ 699.728662] WARNING: CPU: 0 PID: 1309 at fs/f2fs/segment.c:2860 f2fs_inplace_write_data+0x232/0x240 [ 699.728670] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy [ 699.729056] CPU: 0 PID: 1309 Comm: a.out Not tainted 4.18.0-rc1+ #4 [ 699.729064] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 699.729074] RIP: 0010:f2fs_inplace_write_data+0x232/0x240 [ 699.729076] Code: ff e9 cf fe ff ff 49 8d 7d 10 e8 39 45 ad ff 4d 8b 7d 10 be 04 00 00 00 49 8d 7f 48 e8 07 49 ad ff 45 8b 7f 48 e9 fb fe ff ff <0f> 0b f0 41 80 4d 48 04 e9 65 fe ff ff 90 66 66 66 66 90 55 48 8d [ 699.729130] RSP: 0018:ffff8801f43af568 EFLAGS: 00010202 [ 699.729139] RAX: 000000000000003f RBX: ffff8801f43af7b8 RCX: ffffffffb88c9113 [ 699.729142] RDX: 0000000000000003 RSI: dffffc0000000000 RDI: ffff8802024e5540 [ 699.729144] RBP: ffff8801f43af590 R08: 0000000000000009 R09: ffffffffffffffe8 [ 699.729147] R10: 0000000000000001 R11: ffffed0039b0596a R12: ffff8802024e5540 [ 699.729149] R13: ffff8801f0335500 R14: ffff8801e3e7a700 R15: ffff8801e1ee4450 [ 699.729154] FS: 00007f9bf97f5700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 [ 699.729156] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 699.729159] CR2: 00007f9bf925d170 CR3: 00000001f0c34000 CR4: 00000000000006f0 [ 699.729171] Call Trace: [ 699.729192] f2fs_do_write_data_page+0x2e2/0xe00 [ 699.729203] ? f2fs_should_update_outplace+0xd0/0xd0 [ 699.729238] ? memcg_drain_all_list_lrus+0x280/0x280 [ 699.729269] ? __radix_tree_replace+0xa3/0x120 [ 699.729276] __write_data_page+0x5c7/0xe30 [ 699.729291] ? kasan_check_read+0x11/0x20 [ 699.729310] ? page_mapped+0x8a/0x110 [ 699.729321] ? page_mkclean+0xe9/0x160 [ 699.729327] ? f2fs_do_write_data_page+0xe00/0xe00 [ 699.729331] ? invalid_page_referenced_vma+0x130/0x130 [ 699.729345] ? clear_page_dirty_for_io+0x332/0x450 [ 699.729351] f2fs_write_cache_pages+0x4ca/0x860 [ 699.729358] ? __write_data_page+0xe30/0xe30 [ 699.729374] ? percpu_counter_add_batch+0x22/0xa0 [ 699.729380] ? kasan_check_write+0x14/0x20 [ 699.729391] ? _raw_spin_lock+0x17/0x40 [ 699.729403] ? f2fs_mark_inode_dirty_sync.part.18+0x16/0x30 [ 699.729413] ? iov_iter_advance+0x113/0x640 [ 699.729418] ? f2fs_write_end+0x133/0x2e0 [ 699.729423] ? balance_dirty_pages_ratelimited+0x239/0x640 [ 699.729428] f2fs_write_data_pages+0x329/0x520 [ 699.729433] ? generic_perform_write+0x250/0x320 [ 699.729438] ? f2fs_write_cache_pages+0x860/0x860 [ 699.729454] ? current_time+0x110/0x110 [ 699.729459] ? f2fs_preallocate_blocks+0x1ef/0x370 [ 699.729464] do_writepages+0x37/0xb0 [ 699.729468] ? f2fs_write_cache_pages+0x860/0x860 [ 699.729472] ? do_writepages+0x37/0xb0 [ 699.729478] __filemap_fdatawrite_range+0x19a/0x1f0 [ 699.729483] ? delete_from_page_cache_batch+0x4e0/0x4e0 [ 699.729496] ? __vfs_write+0x2b2/0x410 [ 699.729501] file_write_and_wait_range+0x66/0xb0 [ 699.729506] f2fs_do_sync_file+0x1f9/0xd90 [ 699.729511] ? truncate_partial_data_page+0x290/0x290 [ 699.729521] ? __sb_end_write+0x30/0x50 [ 699.729526] ? vfs_write+0x20f/0x260 [ 699.729530] f2fs_sync_file+0x9a/0xb0 [ 699.729534] ? f2fs_do_sync_file+0xd90/0xd90 [ 699.729548] vfs_fsync_range+0x68/0x100 [ 699.729554] ? __fget_light+0xc9/0xe0 [ 699.729558] do_fsync+0x3d/0x70 [ 699.729562] __x64_sys_fdatasync+0x24/0x30 [ 699.729585] do_syscall_64+0x78/0x170 [ 699.729595] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 699.729613] RIP: 0033:0x7f9bf930d800 [ 699.729615] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 83 3d 49 bf 2c 00 00 75 10 b8 4b 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be 78 01 00 48 89 04 24 [ 699.729668] RSP: 002b:00007ffee3606c68 EFLAGS: 00000246 ORIG_RAX: 000000000000004b [ 699.729673] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9bf930d800 [ 699.729675] RDX: 0000000000008000 RSI: 00000000006010a0 RDI: 0000000000000003 [ 699.729678] RBP: 00007ffee3606ca0 R08: 0000000001503010 R09: 0000000000000000 [ 699.729680] R10: 00000000000002e8 R11: 0000000000000246 R12: 0000000000400610 [ 699.729683] R13: 00007ffee3606da0 R14: 0000000000000000 R15: 0000000000000000 [ 699.729687] ---[ end trace 4ce02f25ff7d3df5 ]--- [ 699.729782] ------------[ cut here ]------------ [ 699.729785] kernel BUG at fs/f2fs/segment.h:654! [ 699.731055] invalid opcode: 0000 [#1] SMP KASAN PTI [ 699.732104] CPU: 0 PID: 1309 Comm: a.out Tainted: G W 4.18.0-rc1+ #4 [ 699.733684] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 699.735611] RIP: 0010:f2fs_submit_page_bio+0x29b/0x730 [ 699.736649] Code: 54 49 8d bd 18 04 00 00 e8 b2 59 af ff 41 8b 8d 18 04 00 00 8b 45 b8 41 d3 e6 44 01 f0 4c 8d 73 14 41 39 c7 0f 82 37 fe ff ff <0f> 0b 65 8b 05 2c 04 77 47 89 c0 48 0f a3 05 52 c1 d5 01 0f 92 c0 [ 699.740524] RSP: 0018:ffff8801f43af508 EFLAGS: 00010283 [ 699.741573] RAX: 0000000000000000 RBX: ffff8801f43af7b8 RCX: ffffffffb88a7cef [ 699.743006] RDX: 0000000000000007 RSI: dffffc0000000000 RDI: ffff8801e3e7a64c [ 699.744426] RBP: ffff8801f43af558 R08: ffffed003e066b55 R09: ffffed003e066b55 [ 699.745833] R10: 0000000000000001 R11: ffffed003e066b54 R12: ffffea0007876940 [ 699.747256] R13: ffff8801f0335500 R14: ffff8801e3e7a600 R15: 0000000000000001 [ 699.748683] FS: 00007f9bf97f5700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 [ 699.750293] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 699.751462] CR2: 00007f9bf925d170 CR3: 00000001f0c34000 CR4: 00000000000006f0 [ 699.752874] Call Trace: [ 699.753386] ? f2fs_inplace_write_data+0x93/0x240 [ 699.754341] f2fs_inplace_write_data+0xd2/0x240 [ 699.755271] f2fs_do_write_data_page+0x2e2/0xe00 [ 699.756214] ? f2fs_should_update_outplace+0xd0/0xd0 [ 699.757215] ? memcg_drain_all_list_lrus+0x280/0x280 [ 699.758209] ? __radix_tree_replace+0xa3/0x120 [ 699.759164] __write_data_page+0x5c7/0xe30 [ 699.760002] ? kasan_check_read+0x11/0x20 [ 699.760823] ? page_mapped+0x8a/0x110 [ 699.761573] ? page_mkclean+0xe9/0x160 [ 699.762345] ? f2fs_do_write_data_page+0xe00/0xe00 [ 699.763332] ? invalid_page_referenced_vma+0x130/0x130 [ 699.764374] ? clear_page_dirty_for_io+0x332/0x450 [ 699.765347] f2fs_write_cache_pages+0x4ca/0x860 [ 699.766276] ? __write_data_page+0xe30/0xe30 [ 699.767161] ? percpu_counter_add_batch+0x22/0xa0 [ 699.768112] ? kasan_check_write+0x14/0x20 [ 699.768951] ? _raw_spin_lock+0x17/0x40 [ 699.769739] ? f2fs_mark_inode_dirty_sync.part.18+0x16/0x30 [ 699.770885] ? iov_iter_advance+0x113/0x640 [ 699.771743] ? f2fs_write_end+0x133/0x2e0 [ 699.772569] ? balance_dirty_pages_ratelimited+0x239/0x640 [ 699.773680] f2fs_write_data_pages+0x329/0x520 [ 699.774603] ? generic_perform_write+0x250/0x320 [ 699.775544] ? f2fs_write_cache_pages+0x860/0x860 [ 699.776510] ? current_time+0x110/0x110 [ 699.777299] ? f2fs_preallocate_blocks+0x1ef/0x370 [ 699.778279] do_writepages+0x37/0xb0 [ 699.779026] ? f2fs_write_cache_pages+0x860/0x860 [ 699.779978] ? do_writepages+0x37/0xb0 [ 699.780755] __filemap_fdatawrite_range+0x19a/0x1f0 [ 699.781746] ? delete_from_page_cache_batch+0x4e0/0x4e0 [ 699.782820] ? __vfs_write+0x2b2/0x410 [ 699.783597] file_write_and_wait_range+0x66/0xb0 [ 699.784540] f2fs_do_sync_file+0x1f9/0xd90 [ 699.785381] ? truncate_partial_data_page+0x290/0x290 [ 699.786415] ? __sb_end_write+0x30/0x50 [ 699.787204] ? vfs_write+0x20f/0x260 [ 699.787941] f2fs_sync_file+0x9a/0xb0 [ 699.788694] ? f2fs_do_sync_file+0xd90/0xd90 [ 699.789572] vfs_fsync_range+0x68/0x100 [ 699.790360] ? __fget_light+0xc9/0xe0 [ 699.791128] do_fsync+0x3d/0x70 [ 699.791779] __x64_sys_fdatasync+0x24/0x30 [ 699.792614] do_syscall_64+0x78/0x170 [ 699.793371] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 699.794406] RIP: 0033:0x7f9bf930d800 [ 699.795134] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 83 3d 49 bf 2c 00 00 75 10 b8 4b 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be 78 01 00 48 89 04 24 [ 699.798960] RSP: 002b:00007ffee3606c68 EFLAGS: 00000246 ORIG_RAX: 000000000000004b [ 699.800483] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9bf930d800 [ 699.801923] RDX: 0000000000008000 RSI: 00000000006010a0 RDI: 0000000000000003 [ 699.803373] RBP: 00007ffee3606ca0 R08: 0000000001503010 R09: 0000000000000000 [ 699.804798] R10: 00000000000002e8 R11: 0000000000000246 R12: 0000000000400610 [ 699.806233] R13: 00007ffee3606da0 R14: 0000000000000000 R15: 0000000000000000 [ 699.807667] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy [ 699.817079] ---[ end trace 4ce02f25ff7d3df6 ]--- [ 699.818068] RIP: 0010:f2fs_submit_page_bio+0x29b/0x730 [ 699.819114] Code: 54 49 8d bd 18 04 00 00 e8 b2 59 af ff 41 8b 8d 18 04 00 00 8b 45 b8 41 d3 e6 44 01 f0 4c 8d 73 14 41 39 c7 0f 82 37 fe ff ff <0f> 0b 65 8b 05 2c 04 77 47 89 c0 48 0f a3 05 52 c1 d5 01 0f 92 c0 [ 699.822919] RSP: 0018:ffff8801f43af508 EFLAGS: 00010283 [ 699.823977] RAX: 0000000000000000 RBX: ffff8801f43af7b8 RCX: ffffffffb88a7cef [ 699.825436] RDX: 0000000000000007 RSI: dffffc0000000000 RDI: ffff8801e3e7a64c [ 699.826881] RBP: ffff8801f43af558 R08: ffffed003e066b55 R09: ffffed003e066b55 [ 699.828292] R10: 0000000000000001 R11: ffffed003e066b54 R12: ffffea0007876940 [ 699.829750] R13: ffff8801f0335500 R14: ffff8801e3e7a600 R15: 0000000000000001 [ 699.831192] FS: 00007f9bf97f5700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 [ 699.832793] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 699.833981] CR2: 00007f9bf925d170 CR3: 00000001f0c34000 CR4: 00000000000006f0 [ 699.835556] ================================================================== [ 699.837029] BUG: KASAN: stack-out-of-bounds in update_stack_state+0x38c/0x3e0 [ 699.838462] Read of size 8 at addr ffff8801f43af970 by task a.out/1309 [ 699.840086] CPU: 0 PID: 1309 Comm: a.out Tainted: G D W 4.18.0-rc1+ #4 [ 699.841603] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 699.843475] Call Trace: [ 699.843982] dump_stack+0x7b/0xb5 [ 699.844661] print_address_description+0x70/0x290 [ 699.845607] kasan_report+0x291/0x390 [ 699.846351] ? update_stack_state+0x38c/0x3e0 [ 699.853831] __asan_load8+0x54/0x90 [ 699.854569] update_stack_state+0x38c/0x3e0 [ 699.855428] ? __read_once_size_nocheck.constprop.7+0x20/0x20 [ 699.856601] ? __save_stack_trace+0x5e/0x100 [ 699.857476] unwind_next_frame.part.5+0x18e/0x490 [ 699.858448] ? unwind_dump+0x290/0x290 [ 699.859217] ? clear_page_dirty_for_io+0x332/0x450 [ 699.860185] __unwind_start+0x106/0x190 [ 699.860974] __save_stack_trace+0x5e/0x100 [ 699.861808] ? __save_stack_trace+0x5e/0x100 [ 699.862691] ? unlink_anon_vmas+0xba/0x2c0 [ 699.863525] save_stack_trace+0x1f/0x30 [ 699.864312] save_stack+0x46/0xd0 [ 699.864993] ? __alloc_pages_slowpath+0x1420/0x1420 [ 699.865990] ? flush_tlb_mm_range+0x15e/0x220 [ 699.866889] ? kasan_check_write+0x14/0x20 [ 699.867724] ? __dec_node_state+0x92/0xb0 [ 699.868543] ? lock_page_memcg+0x85/0xf0 [ 699.869350] ? unlock_page_memcg+0x16/0x80 [ 699.870185] ? page_remove_rmap+0x198/0x520 [ 699.871048] ? mark_page_accessed+0x133/0x200 [ 699.871930] ? _cond_resched+0x1a/0x50 [ 699.872700] ? unmap_page_range+0xcd4/0xe50 [ 699.873551] ? rb_next+0x58/0x80 [ 699.874217] ? rb_next+0x58/0x80 [ 699.874895] __kasan_slab_free+0x13c/0x1a0 [ 699.875734] ? unlink_anon_vmas+0xba/0x2c0 [ 699.876563] kasan_slab_free+0xe/0x10 [ 699.877315] kmem_cache_free+0x89/0x1e0 [ 699.878095] unlink_anon_vmas+0xba/0x2c0 [ 699.878913] free_pgtables+0x101/0x1b0 [ 699.879677] exit_mmap+0x146/0x2a0 [ 699.880378] ? __ia32_sys_munmap+0x50/0x50 [ 699.881214] ? kasan_check_read+0x11/0x20 [ 699.882052] ? mm_update_next_owner+0x322/0x380 [ 699.882985] mmput+0x8b/0x1d0 [ 699.883602] do_exit+0x43a/0x1390 [ 699.884288] ? mm_update_next_owner+0x380/0x380 [ 699.885212] ? f2fs_sync_file+0x9a/0xb0 [ 699.885995] ? f2fs_do_sync_file+0xd90/0xd90 [ 699.886877] ? vfs_fsync_range+0x68/0x100 [ 699.887694] ? __fget_light+0xc9/0xe0 [ 699.888442] ? do_fsync+0x3d/0x70 [ 699.889118] ? __x64_sys_fdatasync+0x24/0x30 [ 699.889996] rewind_stack_do_exit+0x17/0x20 [ 699.890860] RIP: 0033:0x7f9bf930d800 [ 699.891585] Code: Bad RIP value. [ 699.892268] RSP: 002b:00007ffee3606c68 EFLAGS: 00000246 ORIG_RAX: 000000000000004b [ 699.893781] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9bf930d800 [ 699.895220] RDX: 0000000000008000 RSI: 00000000006010a0 RDI: 0000000000000003 [ 699.896643] RBP: 00007ffee3606ca0 R08: 0000000001503010 R09: 0000000000000000 [ 699.898069] R10: 00000000000002e8 R11: 0000000000000246 R12: 0000000000400610 [ 699.899505] R13: 00007ffee3606da0 R14: 0000000000000000 R15: 0000000000000000 [ 699.901241] The buggy address belongs to the page: [ 699.902215] page:ffffea0007d0ebc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 [ 699.903811] flags: 0x2ffff0000000000() [ 699.904585] raw: 02ffff0000000000 0000000000000000 ffffffff07d00101 0000000000000000 [ 699.906125] raw: 0000000000000000 0000000000240000 00000000ffffffff 0000000000000000 [ 699.907673] page dumped because: kasan: bad access detected [ 699.909108] Memory state around the buggy address: [ 699.910077] ffff8801f43af800: 00 f1 f1 f1 f1 00 f4 f4 f4 f3 f3 f3 f3 00 00 00 [ 699.911528] ffff8801f43af880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 699.912953] >ffff8801f43af900: 00 00 00 00 00 00 00 00 f1 01 f4 f4 f4 f2 f2 f2 [ 699.914392] ^ [ 699.915758] ffff8801f43af980: f2 00 f4 f4 00 00 00 00 f2 00 00 00 00 00 00 00 [ 699.917193] ffff8801f43afa00: 00 00 00 00 00 00 00 00 00 f3 f3 f3 00 00 00 00 [ 699.918634] ================================================================== - Location https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/f2fs/segment.h#L644 Reported-by Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 22 +++++++++++++++++++--- fs/f2fs/data.c | 33 +++++++++++++++++++++++++++------ fs/f2fs/f2fs.h | 3 +++ fs/f2fs/file.c | 12 ++++++++++++ fs/f2fs/inode.c | 17 +++++++++++++++++ fs/f2fs/node.c | 4 ++++ fs/f2fs/segment.h | 3 +-- 7 files changed, 83 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 3a2eb094a6ee5..480205ed76790 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -86,8 +86,10 @@ repeat: fio.page = page; if (f2fs_submit_page_bio(&fio)) { - f2fs_put_page(page, 1); - goto repeat; + memset(page_address(page), 0, PAGE_SIZE); + f2fs_stop_checkpoint(sbi, false); + f2fs_bug_on(sbi, 1); + return page; } lock_page(page); @@ -143,8 +145,14 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, case META_POR: case DATA_GENERIC: if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || - blkaddr < MAIN_BLKADDR(sbi))) + blkaddr < MAIN_BLKADDR(sbi))) { + if (type == DATA_GENERIC) { + f2fs_msg(sbi->sb, KERN_WARNING, + "access invalid blkaddr:%u", blkaddr); + WARN_ON(1); + } return false; + } break; case META_GENERIC: if (unlikely(blkaddr < SEG0_BLKADDR(sbi) || @@ -771,6 +779,14 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, &cp_page_1, version); if (err) goto invalid_cp1; + + if (le32_to_cpu(cp_block->cp_pack_total_block_count) > + sbi->blocks_per_seg) { + f2fs_msg(sbi->sb, KERN_WARNING, + "invalid cp_pack_total_block_count:%u", + le32_to_cpu(cp_block->cp_pack_total_block_count)); + goto invalid_cp1; + } pre_version = *version; cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e57327665934b..589b5ea34f728 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -441,7 +441,10 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page; - verify_block_addr(fio, fio->new_blkaddr); + if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, + __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) + return -EFAULT; + trace_f2fs_submit_page_bio(page, fio); f2fs_trace_ios(fio, 0); @@ -1045,6 +1048,12 @@ next_dnode: next_block: blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); + if (__is_valid_data_blkaddr(blkaddr) && + !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) { + err = -EFAULT; + goto sync_out; + } + if (!is_valid_data_blkaddr(sbi, blkaddr)) { if (create) { if (unlikely(f2fs_cp_error(sbi))) { @@ -1500,6 +1509,10 @@ got_it: SetPageUptodate(page); goto confused; } + + if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, + DATA_GENERIC)) + goto set_error_page; } else { zero_user_segment(page, 0, PAGE_SIZE); if (!PageUptodate(page)) @@ -1700,11 +1713,13 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) f2fs_lookup_extent_cache(inode, page->index, &ei)) { fio->old_blkaddr = ei.blk + page->index - ei.fofs; - if (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr)) { - ipu_force = true; - fio->need_lock = LOCK_DONE; - goto got_it; - } + if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, + DATA_GENERIC)) + return -EFAULT; + + ipu_force = true; + fio->need_lock = LOCK_DONE; + goto got_it; } /* Deadlock due to between page->lock and f2fs_lock_op */ @@ -1723,6 +1738,12 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) goto out_writepage; } got_it: + if (__is_valid_data_blkaddr(fio->old_blkaddr) && + !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, + DATA_GENERIC)) { + err = -EFAULT; + goto out_writepage; + } /* * If current allocation needs SSR, * it had better in-place writes for updated data. diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fd5ddce8210d5..f11721e76ee34 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2670,6 +2670,9 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, spin_unlock(&sbi->iostat_lock); } +#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META && \ + (!is_read_io(fio->op) || fio->is_meta)) + bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 94475b322e433..41c562feaa991 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -420,6 +420,13 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); + if (__is_valid_data_blkaddr(blkaddr) && + !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), + blkaddr, DATA_GENERIC)) { + f2fs_put_dnode(&dn); + goto fail; + } + if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty, pgofs, whence)) { f2fs_put_dnode(&dn); @@ -514,6 +521,11 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) dn->data_blkaddr = NULL_ADDR; f2fs_set_data_blkaddr(dn); + + if (__is_valid_data_blkaddr(blkaddr) && + !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) + continue; + f2fs_invalidate_blocks(sbi, blkaddr); if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index f490393397d41..b52440f06fa58 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -236,6 +236,23 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) __func__, inode->i_ino); return false; } + + if (F2FS_I(inode)->extent_tree) { + struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest; + + if (ei->len && + (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC) || + !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1, + DATA_GENERIC))) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: inode (ino=%lx) extent info [%u, %u, %u] " + "is incorrect, run fsck to fix", + __func__, inode->i_ino, + ei->blk, ei->fofs, ei->len); + return false; + } + } return true; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 0311e12f26e67..3f7af7608c83c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1401,6 +1401,10 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, return 0; } + if (__is_valid_data_blkaddr(ni.blk_addr) && + !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) + goto redirty_out; + if (atomic && !test_opt(sbi, NOBARRIER)) fio.op_flags |= REQ_PREFLUSH | REQ_FUA; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index a7460da9af431..b5bd3287e1044 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -645,8 +645,7 @@ static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr) { struct f2fs_sb_info *sbi = fio->sbi; - if (PAGE_TYPE_OF_BIO(fio->type) == META && - (!is_read_io(fio->op) || fio->is_meta)) + if (__is_meta_io(fio)) verify_blkaddr(sbi, blk_addr, META_GENERIC); else verify_blkaddr(sbi, blk_addr, DATA_GENERIC); -- GitLab From 0099e0e9b454267a35e3349a045d8424386ff8e5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Jul 2018 19:37:00 +0800 Subject: [PATCH 0036/2269] f2fs: fix to detect looped node chain correctly Below dmesg was printed when testing generic/388 of fstest: F2FS-fs (zram1): find_fsync_dnodes: detect looped node chain, blkaddr:526615, next:526616 F2FS-fs (zram1): Cannot recover all fsync data errno=-22 F2FS-fs (zram1): Mounted with checkpoint version = 22300d0e F2FS-fs (zram1): find_fsync_dnodes: detect looped node chain, blkaddr:526615, next:526616 F2FS-fs (zram1): Cannot recover all fsync data errno=-22 The reason is that we initialize free_blocks with free blocks of filesystem, so if filesystem is full, free_blocks can be zero, below condition will be true, so that, it will fail recovery. if (++loop_cnt >= free_blocks || blkaddr == next_blkaddr_of_node(page)) To fix this issue, initialize free_blocks with correct value which includes over-privision blocks. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 184b34be635b5..501767451e2b9 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -241,8 +241,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, struct page *page = NULL; block_t blkaddr; unsigned int loop_cnt = 0; - unsigned int free_blocks = sbi->user_block_count - - valid_user_blocks(sbi); + unsigned int free_blocks = MAIN_SEGS(sbi) * sbi->blocks_per_seg - + valid_user_blocks(sbi); int err = 0; /* get node pages in the current segment */ -- GitLab From 90ed631c85515bc349fc38b81c518454d6550ea8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Jul 2018 14:24:11 +0800 Subject: [PATCH 0037/2269] f2fs: enable real-time discard by default f2fs is focused on flash based storage, so let's enable real-time discard by default, if user don't want to enable it, 'nodiscard' mount option should be used on mount. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 148020ef7fddd..55642d50b2277 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1367,12 +1367,12 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, NOHEAP); sbi->sb->s_flags |= MS_LAZYTIME; set_opt(sbi, FLUSH_MERGE); - if (f2fs_sb_has_blkzoned(sbi->sb)) { - set_opt_mode(sbi, F2FS_MOUNT_LFS); + if (blk_queue_discard(bdev_get_queue(sbi->sb->s_bdev))) set_opt(sbi, DISCARD); - } else { + if (f2fs_sb_has_blkzoned(sbi->sb)) + set_opt_mode(sbi, F2FS_MOUNT_LFS); + else set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE); - } #ifdef CONFIG_F2FS_FS_XATTR set_opt(sbi, XATTR_USER); -- GitLab From 62ba3ac8630b1cc94e8855a75e1f2c2e654f9ffb Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 6 Jul 2018 20:50:57 -0700 Subject: [PATCH 0038/2269] f2fs: fix defined but not used build warnings Fix build warnings in f2fs when CONFIG_PROC_FS is not enabled by marking the unused functions as __maybe_unused. ../fs/f2fs/sysfs.c:519:12: warning: 'segment_info_seq_show' defined but not used [-Wunused-function] ../fs/f2fs/sysfs.c:546:12: warning: 'segment_bits_seq_show' defined but not used [-Wunused-function] ../fs/f2fs/sysfs.c:570:12: warning: 'iostat_info_seq_show' defined but not used [-Wunused-function] Signed-off-by: Randy Dunlap Cc: Jaegeuk Kim Cc: Chao Yu Cc: linux-f2fs-devel@lists.sourceforge.net Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 27ddf60e3362b..47f24b922af05 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -9,6 +9,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include #include #include #include @@ -518,7 +519,8 @@ static struct kobject f2fs_feat = { .kset = &f2fs_kset, }; -static int segment_info_seq_show(struct seq_file *seq, void *offset) +static int __maybe_unused segment_info_seq_show(struct seq_file *seq, + void *offset) { struct super_block *sb = seq->private; struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -545,7 +547,8 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset) return 0; } -static int segment_bits_seq_show(struct seq_file *seq, void *offset) +static int __maybe_unused segment_bits_seq_show(struct seq_file *seq, + void *offset) { struct super_block *sb = seq->private; struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -569,7 +572,8 @@ static int segment_bits_seq_show(struct seq_file *seq, void *offset) return 0; } -static int iostat_info_seq_show(struct seq_file *seq, void *offset) +static int __maybe_unused iostat_info_seq_show(struct seq_file *seq, + void *offset) { struct super_block *sb = seq->private; struct f2fs_sb_info *sbi = F2FS_SB(sb); -- GitLab From 507bc6b940f8ca3079e51b239e7dca4a3db0acda Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 8 Jul 2018 22:16:53 +0800 Subject: [PATCH 0039/2269] f2fs: detect bug_on in f2fs_wait_discard_bios Add bug_on to detect potential non-empty discard wait list. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 47b6595a078c5..199a77a9c4a93 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1388,6 +1388,8 @@ bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) /* just to make sure there is no pending discard commands */ __wait_all_discard_cmd(sbi, NULL); + + f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt)); return dropped; } -- GitLab From 76e482e5473aefcd752f4ccc9e19cc0b9a6ed0fb Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 8 Jul 2018 22:16:54 +0800 Subject: [PATCH 0040/2269] f2fs: clean up with IS_INODE() Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index b52440f06fa58..ccdf6abde5f76 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -122,7 +122,7 @@ static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page if (!f2fs_sb_has_inode_chksum(sbi->sb)) return false; - if (!RAW_IS_INODE(F2FS_NODE(page)) || !(ri->i_inline & F2FS_EXTRA_ATTR)) + if (!IS_INODE(page) || !(ri->i_inline & F2FS_EXTRA_ATTR)) return false; if (!F2FS_FITS_IN_INODE(ri, le16_to_cpu(ri->i_extra_isize), -- GitLab From 713ebba10b804e60df0659a185b7f0be24529c74 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 8 Jul 2018 22:08:09 +0800 Subject: [PATCH 0041/2269] f2fs: stop issuing discard immediately if there is queued IO For background discard policy, even if there is queued user IO, still we will check max_requests times for next discard entry, it is unneeded, let's just stop this round submission immediately. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 199a77a9c4a93..478a2a87d491b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1188,7 +1188,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, struct list_head *pend_list; struct discard_cmd *dc, *tmp; struct blk_plug plug; - int i, iter = 0, issued = 0; + int i, issued = 0; bool io_interrupted = false; for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { @@ -1209,20 +1209,19 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, if (dpolicy->io_aware && i < dpolicy->io_aware_gran && !is_idle(sbi)) { io_interrupted = true; - goto skip; + break; } __submit_discard_cmd(sbi, dpolicy, dc); - issued++; -skip: - if (++iter >= dpolicy->max_requests) + + if (++issued >= dpolicy->max_requests) break; } blk_finish_plug(&plug); next: mutex_unlock(&dcc->cmd_lock); - if (iter >= dpolicy->max_requests) + if (issued >= dpolicy->max_requests || io_interrupted) break; } -- GitLab From d7acdc136c49d4eb499959cf7c122f938ea09215 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 8 Jul 2018 22:11:01 +0800 Subject: [PATCH 0042/2269] f2fs: issue small discard by LBA order For small granularity discard which size is smaller than 64KB, if we issue those kind of discards orderly by size, their IOs will be spread into entire logical address, so that in FTL, L2P table will be updated randomly, result bad wear rate in the table. In this patch, we choose to issue small discard by LBA order, by this way, we can expect that L2P table updates from adjacent discard IOs can be merged in the cache, so it can reduce lifetime wearing of flash. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/segment.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f11721e76ee34..3ab54ed6aa124 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -292,6 +292,7 @@ struct discard_policy { unsigned int io_aware_gran; /* minimum granularity discard not be aware of I/O */ bool io_aware; /* issue discard in idle time */ bool sync; /* submit discard with REQ_SYNC flag */ + bool ordered; /* issue discard by lba order */ unsigned int granularity; /* discard granularity */ }; @@ -308,6 +309,7 @@ struct discard_cmd_control { unsigned int max_discards; /* max. discards to be issued */ unsigned int discard_granularity; /* discard granularity */ unsigned int undiscard_blks; /* # of undiscard blocks */ + unsigned int next_pos; /* next discard position */ atomic_t issued_discard; /* # of issued discard */ atomic_t issing_discard; /* # of issing discard */ atomic_t discard_cmd_cnt; /* # of cached cmd count */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 478a2a87d491b..f5f04aabe338b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -934,6 +934,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, /* common policy */ dpolicy->type = discard_type; dpolicy->sync = true; + dpolicy->ordered = false; dpolicy->granularity = granularity; dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; @@ -945,6 +946,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; dpolicy->io_aware = true; dpolicy->sync = false; + dpolicy->ordered = true; if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) { dpolicy->granularity = 1; dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME; @@ -1181,6 +1183,63 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, return 0; } +static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct discard_cmd *prev_dc = NULL, *next_dc = NULL; + struct rb_node **insert_p = NULL, *insert_parent = NULL; + struct discard_cmd *dc; + struct blk_plug plug; + unsigned int pos = dcc->next_pos; + unsigned int issued = 0; + bool io_interrupted = false; + + mutex_lock(&dcc->cmd_lock); + dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, + NULL, pos, + (struct rb_entry **)&prev_dc, + (struct rb_entry **)&next_dc, + &insert_p, &insert_parent, true); + if (!dc) + dc = next_dc; + + blk_start_plug(&plug); + + while (dc) { + struct rb_node *node; + + if (dc->state != D_PREP) + goto next; + + if (dpolicy->io_aware && !is_idle(sbi)) { + io_interrupted = true; + break; + } + + dcc->next_pos = dc->lstart + dc->len; + __submit_discard_cmd(sbi, dpolicy, dc); + + if (++issued >= dpolicy->max_requests) + break; +next: + node = rb_next(&dc->rb_node); + dc = rb_entry_safe(node, struct discard_cmd, rb_node); + } + + blk_finish_plug(&plug); + + if (!dc) + dcc->next_pos = 0; + + mutex_unlock(&dcc->cmd_lock); + + if (!issued && io_interrupted) + issued = -1; + + return issued; +} + static int __issue_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy) { @@ -1194,6 +1253,10 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { if (i + 1 < dpolicy->granularity) break; + + if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered) + return __issue_discard_cmd_orderly(sbi, dpolicy); + pend_list = &dcc->pend_list[i]; mutex_lock(&dcc->cmd_lock); @@ -1754,6 +1817,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) dcc->nr_discards = 0; dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; dcc->undiscard_blks = 0; + dcc->next_pos = 0; dcc->root = RB_ROOT; dcc->rbtree_check = false; -- GitLab From ef2544b77113d63f796a37d8b26cc2379e23a608 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 9 Jul 2018 20:32:42 -0700 Subject: [PATCH 0043/2269] f2fs: Keep alloc_valid_block_count in sync If we attempt to request more blocks than we have room for, we try to instead request as much as we can, however, alloc_valid_block_count is not decremented to match the new value, allowing it to drift higher until the next checkpoint. This always decrements it when the requested amount cannot be fulfilled. Signed-off-by: Daniel Rosenberg Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3ab54ed6aa124..593d52cf7c2db 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1687,18 +1687,20 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, sbi->total_valid_block_count -= diff; if (!*count) { spin_unlock(&sbi->stat_lock); - percpu_counter_sub(&sbi->alloc_valid_block_count, diff); goto enospc; } } spin_unlock(&sbi->stat_lock); - if (unlikely(release)) + if (unlikely(release)) { + percpu_counter_sub(&sbi->alloc_valid_block_count, release); dquot_release_reservation_block(inode, release); + } f2fs_i_blocks_write(inode, *count, true, true); return 0; enospc: + percpu_counter_sub(&sbi->alloc_valid_block_count, release); dquot_release_reservation_block(inode, release); return -ENOSPC; } -- GitLab From 390638608a5bcd0b503094474812bc374a9f355f Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Thu, 12 Jul 2018 23:09:26 +0800 Subject: [PATCH 0044/2269] f2fs: do not set free of current section For the case when sbi->segs_per_sec > 1, take section:segment = 5 for example, if segment 1 is just used and allocate new segment 2, and the blocks of segment 1 is invalidated, at this time, the previous code will use __set_test_and_free to free the free_secmap and free_sections++, this is not correct since it is still a current section, so fix it. Signed-off-by: Yunlong Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index b5bd3287e1044..50495515f0a0d 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -448,6 +448,8 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, if (test_and_clear_bit(segno, free_i->free_segmap)) { free_i->free_segments++; + if (IS_CURSEC(sbi, secno)) + goto skip_free; next = find_next_bit(free_i->free_segmap, start_segno + sbi->segs_per_sec, start_segno); if (next >= start_segno + sbi->segs_per_sec) { @@ -455,6 +457,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, free_i->free_sections++; } } +skip_free: spin_unlock(&free_i->segmap_lock); } -- GitLab From e5d1b8b3f18c58c476b68118cc2584a34c241a9d Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Thu, 12 Jul 2018 23:09:28 +0800 Subject: [PATCH 0045/2269] f2fs: blk_finish_plug of submit_bio in lfs mode Expand the blk_finish_plug action from blkzoned to normal lfs mode, since plug will cause the out-of-order IO submission, which is not friendly to flash in lfs mode. Signed-off-by: Yunlong Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 589b5ea34f728..4d22d86847f4d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -264,7 +264,7 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi, if (type != DATA && type != NODE) goto submit_io; - if (f2fs_sb_has_blkzoned(sbi->sb) && current->plug) + if (test_opt(sbi, LFS) && current->plug) blk_finish_plug(current->plug); start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS; -- GitLab From 96de3df5a13d4cf79e30c08ecf2581f4e6e784b6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 8 Jul 2018 22:16:55 +0800 Subject: [PATCH 0046/2269] f2fs: fix to do sanity check with i_extra_isize If inode.i_extra_isize was fuzzed to an abnormal value, when calculating inline data size, the result will overflow, result in accessing invalid memory area when operating inline data. Let's do sanity check with i_extra_isize during inode loading for fixing. https://bugzilla.kernel.org/show_bug.cgi?id=200421 - Reproduce - POC (poc.c) #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void activity(char *mpoint) { char *foo_bar_baz; char *foo_baz; char *xattr; int err; err = asprintf(&foo_bar_baz, "%s/foo/bar/baz", mpoint); err = asprintf(&foo_baz, "%s/foo/baz", mpoint); err = asprintf(&xattr, "%s/foo/bar/xattr", mpoint); rename(foo_bar_baz, foo_baz); char buf2[113]; memset(buf2, 0, sizeof(buf2)); listxattr(xattr, buf2, sizeof(buf2)); removexattr(xattr, "user.mime_type"); } int main(int argc, char *argv[]) { activity(argv[1]); return 0; } - Kernel message Umount the image will leave the following message [ 2910.995489] F2FS-fs (loop0): Mounted with checkpoint version = 2 [ 2918.416465] ================================================================== [ 2918.416807] BUG: KASAN: slab-out-of-bounds in f2fs_iget+0xcb9/0x1a80 [ 2918.417009] Read of size 4 at addr ffff88018efc2068 by task a.out/1229 [ 2918.417311] CPU: 1 PID: 1229 Comm: a.out Not tainted 4.17.0+ #1 [ 2918.417314] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 2918.417323] Call Trace: [ 2918.417366] dump_stack+0x71/0xab [ 2918.417401] print_address_description+0x6b/0x290 [ 2918.417407] kasan_report+0x28e/0x390 [ 2918.417411] ? f2fs_iget+0xcb9/0x1a80 [ 2918.417415] f2fs_iget+0xcb9/0x1a80 [ 2918.417422] ? f2fs_lookup+0x2e7/0x580 [ 2918.417425] f2fs_lookup+0x2e7/0x580 [ 2918.417433] ? __recover_dot_dentries+0x400/0x400 [ 2918.417447] ? legitimize_path.isra.29+0x5a/0xa0 [ 2918.417453] __lookup_slow+0x11c/0x220 [ 2918.417457] ? may_delete+0x2a0/0x2a0 [ 2918.417475] ? deref_stack_reg+0xe0/0xe0 [ 2918.417479] ? __lookup_hash+0xb0/0xb0 [ 2918.417483] lookup_slow+0x3e/0x60 [ 2918.417488] walk_component+0x3ac/0x990 [ 2918.417492] ? generic_permission+0x51/0x1e0 [ 2918.417495] ? inode_permission+0x51/0x1d0 [ 2918.417499] ? pick_link+0x3e0/0x3e0 [ 2918.417502] ? link_path_walk+0x4b1/0x770 [ 2918.417513] ? _raw_spin_lock_irqsave+0x25/0x50 [ 2918.417518] ? walk_component+0x990/0x990 [ 2918.417522] ? path_init+0x2e6/0x580 [ 2918.417526] path_lookupat+0x13f/0x430 [ 2918.417531] ? trailing_symlink+0x3a0/0x3a0 [ 2918.417534] ? do_renameat2+0x270/0x7b0 [ 2918.417538] ? __kasan_slab_free+0x14c/0x190 [ 2918.417541] ? do_renameat2+0x270/0x7b0 [ 2918.417553] ? kmem_cache_free+0x85/0x1e0 [ 2918.417558] ? do_renameat2+0x270/0x7b0 [ 2918.417563] filename_lookup+0x13c/0x280 [ 2918.417567] ? filename_parentat+0x2b0/0x2b0 [ 2918.417572] ? kasan_unpoison_shadow+0x31/0x40 [ 2918.417575] ? kasan_kmalloc+0xa6/0xd0 [ 2918.417593] ? strncpy_from_user+0xaa/0x1c0 [ 2918.417598] ? getname_flags+0x101/0x2b0 [ 2918.417614] ? path_listxattr+0x87/0x110 [ 2918.417619] path_listxattr+0x87/0x110 [ 2918.417623] ? listxattr+0xc0/0xc0 [ 2918.417637] ? mm_fault_error+0x1b0/0x1b0 [ 2918.417654] do_syscall_64+0x73/0x160 [ 2918.417660] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 2918.417676] RIP: 0033:0x7f2f3a3480d7 [ 2918.417677] Code: f0 ff ff 73 01 c3 48 8b 0d be dd 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 c2 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 dd 2b 00 f7 d8 64 89 01 48 [ 2918.417732] RSP: 002b:00007fff4095b7d8 EFLAGS: 00000206 ORIG_RAX: 00000000000000c2 [ 2918.417744] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f2f3a3480d7 [ 2918.417746] RDX: 0000000000000071 RSI: 00007fff4095b810 RDI: 000000000126a0c0 [ 2918.417749] RBP: 00007fff4095b890 R08: 000000000126a010 R09: 0000000000000000 [ 2918.417751] R10: 00000000000001ab R11: 0000000000000206 R12: 00000000004005e0 [ 2918.417753] R13: 00007fff4095b990 R14: 0000000000000000 R15: 0000000000000000 [ 2918.417853] Allocated by task 329: [ 2918.418002] kasan_kmalloc+0xa6/0xd0 [ 2918.418007] kmem_cache_alloc+0xc8/0x1e0 [ 2918.418023] mempool_init_node+0x194/0x230 [ 2918.418027] mempool_init+0x12/0x20 [ 2918.418042] bioset_init+0x2bd/0x380 [ 2918.418052] blk_alloc_queue_node+0xe9/0x540 [ 2918.418075] dm_create+0x2c0/0x800 [ 2918.418080] dev_create+0xd2/0x530 [ 2918.418083] ctl_ioctl+0x2a3/0x5b0 [ 2918.418087] dm_ctl_ioctl+0xa/0x10 [ 2918.418092] do_vfs_ioctl+0x13e/0x8c0 [ 2918.418095] ksys_ioctl+0x66/0x70 [ 2918.418098] __x64_sys_ioctl+0x3d/0x50 [ 2918.418102] do_syscall_64+0x73/0x160 [ 2918.418106] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 2918.418204] Freed by task 0: [ 2918.418301] (stack is not available) [ 2918.418521] The buggy address belongs to the object at ffff88018efc0000 which belongs to the cache biovec-max of size 8192 [ 2918.418894] The buggy address is located 104 bytes to the right of 8192-byte region [ffff88018efc0000, ffff88018efc2000) [ 2918.419257] The buggy address belongs to the page: [ 2918.419431] page:ffffea00063bf000 count:1 mapcount:0 mapping:ffff8801f2242540 index:0x0 compound_mapcount: 0 [ 2918.419702] flags: 0x17fff8000008100(slab|head) [ 2918.419879] raw: 017fff8000008100 dead000000000100 dead000000000200 ffff8801f2242540 [ 2918.420101] raw: 0000000000000000 0000000000030003 00000001ffffffff 0000000000000000 [ 2918.420322] page dumped because: kasan: bad access detected [ 2918.420599] Memory state around the buggy address: [ 2918.420764] ffff88018efc1f00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2918.420975] ffff88018efc1f80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2918.421194] >ffff88018efc2000: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 2918.421406] ^ [ 2918.421627] ffff88018efc2080: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 2918.421838] ffff88018efc2100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2918.422046] ================================================================== [ 2918.422264] Disabling lock debugging due to kernel taint [ 2923.901641] BUG: unable to handle kernel paging request at ffff88018f0db000 [ 2923.901884] PGD 22226a067 P4D 22226a067 PUD 222273067 PMD 18e642063 PTE 800000018f0db061 [ 2923.902120] Oops: 0003 [#1] SMP KASAN PTI [ 2923.902274] CPU: 1 PID: 1231 Comm: umount Tainted: G B 4.17.0+ #1 [ 2923.902490] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 2923.902761] RIP: 0010:__memset+0x24/0x30 [ 2923.902906] Code: 90 90 90 90 90 90 66 66 90 66 90 49 89 f9 48 89 d1 83 e2 07 48 c1 e9 03 40 0f b6 f6 48 b8 01 01 01 01 01 01 01 01 48 0f af c6 48 ab 89 d1 f3 aa 4c 89 c8 c3 90 49 89 f9 40 88 f0 48 89 d1 f3 [ 2923.903446] RSP: 0018:ffff88018ddf7ae0 EFLAGS: 00010206 [ 2923.903622] RAX: 0000000000000000 RBX: ffff8801d549d888 RCX: 1ffffffffffdaffb [ 2923.903833] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88018f0daffc [ 2923.904062] RBP: ffff88018efc206c R08: 1ffff10031df840d R09: ffff88018efc206c [ 2923.904273] R10: ffffffffffffe1ee R11: ffffed0031df65fa R12: 0000000000000000 [ 2923.904485] R13: ffff8801d549dc98 R14: 00000000ffffc3db R15: ffffea00063bec80 [ 2923.904693] FS: 00007fa8b2f8a840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000 [ 2923.904937] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2923.910080] CR2: ffff88018f0db000 CR3: 000000018f892000 CR4: 00000000000006e0 [ 2923.914930] Call Trace: [ 2923.919724] f2fs_truncate_inline_inode+0x114/0x170 [ 2923.924487] f2fs_truncate_blocks+0x11b/0x7c0 [ 2923.929178] ? f2fs_truncate_data_blocks+0x10/0x10 [ 2923.933834] ? dqget+0x670/0x670 [ 2923.938437] ? f2fs_destroy_extent_tree+0xd6/0x270 [ 2923.943107] ? __radix_tree_lookup+0x2f/0x150 [ 2923.947772] f2fs_truncate+0xd4/0x1a0 [ 2923.952491] f2fs_evict_inode+0x5ab/0x610 [ 2923.957204] evict+0x15f/0x280 [ 2923.961898] __dentry_kill+0x161/0x250 [ 2923.966634] shrink_dentry_list+0xf3/0x250 [ 2923.971897] shrink_dcache_parent+0xa9/0x100 [ 2923.976561] ? shrink_dcache_sb+0x1f0/0x1f0 [ 2923.981177] ? wait_for_completion+0x8a/0x210 [ 2923.985781] ? migrate_swap_stop+0x2d0/0x2d0 [ 2923.990332] do_one_tree+0xe/0x40 [ 2923.994735] shrink_dcache_for_umount+0x3a/0xa0 [ 2923.999077] generic_shutdown_super+0x3e/0x1c0 [ 2924.003350] kill_block_super+0x4b/0x70 [ 2924.007619] deactivate_locked_super+0x65/0x90 [ 2924.011812] cleanup_mnt+0x5c/0xa0 [ 2924.015995] task_work_run+0xce/0xf0 [ 2924.020174] exit_to_usermode_loop+0x115/0x120 [ 2924.024293] do_syscall_64+0x12f/0x160 [ 2924.028479] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 2924.032709] RIP: 0033:0x7fa8b2868487 [ 2924.036888] Code: 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 31 f6 e9 09 00 00 00 66 0f 1f 84 00 00 00 00 00 b8 a6 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e1 c9 2b 00 f7 d8 64 89 01 48 [ 2924.045750] RSP: 002b:00007ffc39824d58 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [ 2924.050190] RAX: 0000000000000000 RBX: 00000000008ea030 RCX: 00007fa8b2868487 [ 2924.054604] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 00000000008f4360 [ 2924.058940] RBP: 00000000008f4360 R08: 0000000000000000 R09: 0000000000000014 [ 2924.063186] R10: 00000000000006b2 R11: 0000000000000246 R12: 00007fa8b2d7183c [ 2924.067418] R13: 0000000000000000 R14: 00000000008ea210 R15: 00007ffc39824fe0 [ 2924.071534] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer joydev input_leds serio_raw snd soundcore mac_hid i2c_piix4 ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi btrfs zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear 8139too qxl ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel psmouse aes_x86_64 8139cp crypto_simd cryptd mii glue_helper pata_acpi floppy [ 2924.098044] CR2: ffff88018f0db000 [ 2924.102520] ---[ end trace a8e0d899985faf31 ]--- [ 2924.107012] RIP: 0010:__memset+0x24/0x30 [ 2924.111448] Code: 90 90 90 90 90 90 66 66 90 66 90 49 89 f9 48 89 d1 83 e2 07 48 c1 e9 03 40 0f b6 f6 48 b8 01 01 01 01 01 01 01 01 48 0f af c6 48 ab 89 d1 f3 aa 4c 89 c8 c3 90 49 89 f9 40 88 f0 48 89 d1 f3 [ 2924.120724] RSP: 0018:ffff88018ddf7ae0 EFLAGS: 00010206 [ 2924.125312] RAX: 0000000000000000 RBX: ffff8801d549d888 RCX: 1ffffffffffdaffb [ 2924.129931] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88018f0daffc [ 2924.134537] RBP: ffff88018efc206c R08: 1ffff10031df840d R09: ffff88018efc206c [ 2924.139175] R10: ffffffffffffe1ee R11: ffffed0031df65fa R12: 0000000000000000 [ 2924.143825] R13: ffff8801d549dc98 R14: 00000000ffffc3db R15: ffffea00063bec80 [ 2924.148500] FS: 00007fa8b2f8a840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000 [ 2924.153247] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2924.158003] CR2: ffff88018f0db000 CR3: 000000018f892000 CR4: 00000000000006e0 [ 2924.164641] BUG: Bad rss-counter state mm:00000000fa04621e idx:0 val:4 [ 2924.170007] BUG: Bad rss-counter tate mm:00000000fa04621e idx:1 val:2 - Location https://elixir.bootlin.com/linux/v4.18-rc3/source/fs/f2fs/inline.c#L78 memset(addr + from, 0, MAX_INLINE_DATA(inode) - from); Here the length can be negative. Reported-by Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index ccdf6abde5f76..740988bc250de 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -196,6 +196,7 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page) static bool sanity_check_inode(struct inode *inode, struct page *node_page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); unsigned long long iblocks; iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks); @@ -237,6 +238,17 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } + if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE || + fi->i_extra_isize % sizeof(__le32)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, " + "max: %zu", + __func__, inode->i_ino, fi->i_extra_isize, + F2FS_TOTAL_EXTRA_ATTR_SIZE); + return false; + } + if (F2FS_I(inode)->extent_tree) { struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest; @@ -305,11 +317,6 @@ static int do_read_inode(struct inode *inode) get_inline_info(inode, ri); - if (!sanity_check_inode(inode, node_page)) { - f2fs_put_page(node_page, 1); - return -EINVAL; - } - fi->i_extra_isize = f2fs_has_extra_attr(inode) ? le16_to_cpu(ri->i_extra_isize) : 0; @@ -329,6 +336,11 @@ static int do_read_inode(struct inode *inode) fi->i_inline_xattr_size = 0; } + if (!sanity_check_inode(inode, node_page)) { + f2fs_put_page(node_page, 1); + return -EINVAL; + } + /* check data exist */ if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) __recover_inline_status(inode, node_page); -- GitLab From 629b633fd09c524c64a3a23d0cdad8d2accf32e5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 17 Jul 2018 00:02:17 +0800 Subject: [PATCH 0047/2269] f2fs: fix to propagate error from __get_meta_page() If caller of __get_meta_page() can handle error, let's propagate error from __get_meta_page(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 55 ++++++++++++++++++++------- fs/f2fs/data.c | 24 ++++++++++-- fs/f2fs/f2fs.h | 8 +++- fs/f2fs/file.c | 7 +++- fs/f2fs/gc.c | 16 ++++++-- fs/f2fs/inline.c | 14 ++++++- fs/f2fs/inode.c | 12 +++++- fs/f2fs/node.c | 90 ++++++++++++++++++++++++++++++++++---------- fs/f2fs/recovery.c | 13 ++++++- fs/f2fs/segment.c | 37 +++++++++++++----- 10 files changed, 220 insertions(+), 56 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 480205ed76790..01733aeefbfae 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -71,6 +71,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, .encrypted_page = NULL, .is_meta = is_meta, }; + int err; if (unlikely(!is_meta)) fio.op_flags &= ~REQ_META; @@ -85,11 +86,10 @@ repeat: fio.page = page; - if (f2fs_submit_page_bio(&fio)) { - memset(page_address(page), 0, PAGE_SIZE); - f2fs_stop_checkpoint(sbi, false); - f2fs_bug_on(sbi, 1); - return page; + err = f2fs_submit_page_bio(&fio); + if (err) { + f2fs_put_page(page, 1); + return ERR_PTR(err); } lock_page(page); @@ -98,14 +98,9 @@ repeat: goto repeat; } - /* - * if there is any IO error when accessing device, make our filesystem - * readonly and make sure do not write checkpoint with non-uptodate - * meta page. - */ if (unlikely(!PageUptodate(page))) { - memset(page_address(page), 0, PAGE_SIZE); - f2fs_stop_checkpoint(sbi, false); + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); } out: return page; @@ -116,6 +111,25 @@ struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) return __get_meta_page(sbi, index, true); } +struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index) +{ + struct page *page; + int count = 0; + +retry: + page = __get_meta_page(sbi, index, true); + if (IS_ERR(page)) { + if (PTR_ERR(page) == -EIO && + ++count <= DEFAULT_RETRY_IO_COUNT) + goto retry; + + f2fs_stop_checkpoint(sbi, false); + f2fs_bug_on(sbi, 1); + } + + return page; +} + /* for POR only */ struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index) { @@ -607,7 +621,9 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) /* truncate all the data during iput */ iput(inode); - f2fs_get_node_info(sbi, ino, &ni); + err = f2fs_get_node_info(sbi, ino, &ni); + if (err) + goto err_out; /* ENOMEM was fully retried in f2fs_evict_inode. */ if (ni.blk_addr != NULL_ADDR) { @@ -655,9 +671,15 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi) f2fs_ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true); for (i = 0; i < orphan_blocks; i++) { - struct page *page = f2fs_get_meta_page(sbi, start_blk + i); + struct page *page; struct f2fs_orphan_block *orphan_blk; + page = f2fs_get_meta_page(sbi, start_blk + i); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto out; + } + orphan_blk = (struct f2fs_orphan_block *)page_address(page); for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) { nid_t ino = le32_to_cpu(orphan_blk->ino[j]); @@ -748,6 +770,9 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, __u32 crc = 0; *cp_page = f2fs_get_meta_page(sbi, cp_addr); + if (IS_ERR(*cp_page)) + return PTR_ERR(*cp_page); + *cp_block = (struct f2fs_checkpoint *)page_address(*cp_page); crc_offset = le32_to_cpu((*cp_block)->checksum_offset); @@ -873,6 +898,8 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) unsigned char *ckpt = (unsigned char *)sbi->ckpt; cur_page = f2fs_get_meta_page(sbi, cp_blk_no + i); + if (IS_ERR(cur_page)) + goto free_fail_no_cp; sit_bitmap_ptr = page_address(cur_page); memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size); f2fs_put_page(cur_page, 1); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 4d22d86847f4d..a9668991f3d56 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -879,6 +879,10 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; + err = f2fs_get_node_info(sbi, dn->nid, &ni); + if (err) + return err; + dn->data_blkaddr = datablock_addr(dn->inode, dn->node_page, dn->ofs_in_node); if (dn->data_blkaddr == NEW_ADDR) @@ -888,7 +892,6 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) return err; alloc: - f2fs_get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); f2fs_allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr, @@ -1291,7 +1294,11 @@ static int f2fs_xattr_fiemap(struct inode *inode, if (!page) return -ENOMEM; - f2fs_get_node_info(sbi, inode->i_ino, &ni); + err = f2fs_get_node_info(sbi, inode->i_ino, &ni); + if (err) { + f2fs_put_page(page, 1); + return err; + } phys = (__u64)blk_to_logical(inode, ni.blk_addr); offset = offsetof(struct f2fs_inode, i_addr) + @@ -1318,7 +1325,11 @@ static int f2fs_xattr_fiemap(struct inode *inode, if (!page) return -ENOMEM; - f2fs_get_node_info(sbi, xnid, &ni); + err = f2fs_get_node_info(sbi, xnid, &ni); + if (err) { + f2fs_put_page(page, 1); + return err; + } phys = (__u64)blk_to_logical(inode, ni.blk_addr); len = inode->i_sb->s_blocksize; @@ -1705,6 +1716,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) struct inode *inode = page->mapping->host; struct dnode_of_data dn; struct extent_info ei = {0,0,0}; + struct node_info ni; bool ipu_force = false; int err = 0; @@ -1773,6 +1785,12 @@ got_it: fio->need_lock = LOCK_REQ; } + err = f2fs_get_node_info(fio->sbi, dn.nid, &ni); + if (err) + goto out_writepage; + + fio->version = ni.version; + err = encrypt_one_page(fio); if (err) goto out_writepage; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 593d52cf7c2db..804f289a8183a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -514,6 +514,8 @@ enum { */ }; +#define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO count */ + #define F2FS_LINK_MAX 0xffffffff /* maximum link count per file */ #define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ @@ -1021,6 +1023,7 @@ struct f2fs_io_info { bool retry; /* need to reallocate block address */ enum iostat_type io_type; /* io type */ struct writeback_control *io_wbc; /* writeback control */ + unsigned char version; /* version of the node */ }; #define is_read_io(rw) ((rw) == READ) @@ -2824,7 +2827,7 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type); int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid); bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid); bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino); -void f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, +int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni); pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs); int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode); @@ -2851,7 +2854,7 @@ int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink); void f2fs_recover_inline_xattr(struct inode *inode, struct page *page); int f2fs_recover_xattr_data(struct inode *inode, struct page *page); int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page); -void f2fs_restore_node_summary(struct f2fs_sb_info *sbi, +int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, unsigned int segno, struct f2fs_summary_block *sum); void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); int f2fs_build_node_manager(struct f2fs_sb_info *sbi); @@ -2929,6 +2932,7 @@ enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io); struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); +struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index); struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index); bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 41c562feaa991..41f320bd2aa7f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1067,7 +1067,12 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, if (ret) return ret; - f2fs_get_node_info(sbi, dn.nid, &ni); + ret = f2fs_get_node_info(sbi, dn.nid, &ni); + if (ret) { + f2fs_put_dnode(&dn); + return ret; + } + ilen = min((pgoff_t) ADDRS_PER_PAGE(dn.node_page, dst_inode) - dn.ofs_in_node, len - i); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 9be5b5007459b..bca09c5fd6e58 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -517,7 +517,11 @@ next_step: continue; } - f2fs_get_node_info(sbi, nid, &ni); + if (f2fs_get_node_info(sbi, nid, &ni)) { + f2fs_put_page(node_page, 1); + continue; + } + if (ni.blk_addr != start_addr + off) { f2fs_put_page(node_page, 1); continue; @@ -576,7 +580,10 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (IS_ERR(node_page)) return false; - f2fs_get_node_info(sbi, nid, dni); + if (f2fs_get_node_info(sbi, nid, dni)) { + f2fs_put_page(node_page, 1); + return false; + } if (sum->version != dni->version) { f2fs_msg(sbi->sb, KERN_WARNING, @@ -655,7 +662,10 @@ static void move_data_block(struct inode *inode, block_t bidx, */ f2fs_wait_on_page_writeback(page, DATA, true); - f2fs_get_node_info(fio.sbi, dn.nid, &ni); + err = f2fs_get_node_info(fio.sbi, dn.nid, &ni); + if (err) + goto put_out; + set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); /* read page */ diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 2bcb2d36f024e..115dc219344b1 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -121,6 +121,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) .encrypted_page = NULL, .io_type = FS_DATA_IO, }; + struct node_info ni; int dirty, err; if (!f2fs_exist_data(dn->inode)) @@ -130,6 +131,14 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) if (err) return err; + err = f2fs_get_node_info(fio.sbi, dn->nid, &ni); + if (err) { + f2fs_put_dnode(dn); + return err; + } + + fio.version = ni.version; + if (unlikely(dn->data_blkaddr != NEW_ADDR)) { f2fs_put_dnode(dn); set_sbi_flag(fio.sbi, SBI_NEED_FSCK); @@ -690,7 +699,10 @@ int f2fs_inline_data_fiemap(struct inode *inode, ilen = start + len; ilen -= start; - f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni); + err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni); + if (err) + goto out; + byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits; byteaddr += (char *)inline_data_addr(inode, ipage) - (char *)F2FS_INODE(ipage); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 740988bc250de..35d49528b2c18 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -699,6 +699,7 @@ void f2fs_handle_failed_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct node_info ni; + int err; /* * clear nlink of inode in order to release resource of inode @@ -721,10 +722,16 @@ void f2fs_handle_failed_inode(struct inode *inode) * so we can prevent losing this orphan when encoutering checkpoint * and following suddenly power-off. */ - f2fs_get_node_info(sbi, inode->i_ino, &ni); + err = f2fs_get_node_info(sbi, inode->i_ino, &ni); + if (err) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "May loss orphan inode, run fsck to fix."); + goto out; + } if (ni.blk_addr != NULL_ADDR) { - int err = f2fs_acquire_orphan_inode(sbi); + err = f2fs_acquire_orphan_inode(sbi); if (err) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_msg(sbi->sb, KERN_WARNING, @@ -737,6 +744,7 @@ void f2fs_handle_failed_inode(struct inode *inode) set_inode_flag(inode, FI_FREE_NID); } +out: f2fs_unlock_op(sbi); /* iput will drop the inode object */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 3f7af7608c83c..43acc5a4257d9 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -113,7 +113,7 @@ static void clear_node_page_dirty(struct page *page) static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid) { pgoff_t index = current_nat_addr(sbi, nid); - return f2fs_get_meta_page(sbi, index); + return f2fs_get_meta_page_nofail(sbi, index); } static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) @@ -419,7 +419,7 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) /* * This function always returns success */ -void f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, +int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) { struct f2fs_nm_info *nm_i = NM_I(sbi); @@ -443,7 +443,7 @@ void f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, ni->blk_addr = nat_get_blkaddr(e); ni->version = nat_get_version(e); up_read(&nm_i->nat_tree_lock); - return; + return 0; } memset(&ne, 0, sizeof(struct f2fs_nat_entry)); @@ -466,6 +466,9 @@ void f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, up_read(&nm_i->nat_tree_lock); page = f2fs_get_meta_page(sbi, index); + if (IS_ERR(page)) + return PTR_ERR(page); + nat_blk = (struct f2fs_nat_block *)page_address(page); ne = nat_blk->entries[nid - start_nid]; node_info_from_raw_nat(ni, &ne); @@ -473,6 +476,7 @@ void f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, cache: /* cache nat entry */ cache_nat_entry(sbi, nid, &ne); + return 0; } /* @@ -722,12 +726,15 @@ release_out: return err; } -static void truncate_node(struct dnode_of_data *dn) +static int truncate_node(struct dnode_of_data *dn) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct node_info ni; + int err; - f2fs_get_node_info(sbi, dn->nid, &ni); + err = f2fs_get_node_info(sbi, dn->nid, &ni); + if (err) + return err; /* Deallocate node address */ f2fs_invalidate_blocks(sbi, ni.blk_addr); @@ -750,11 +757,14 @@ static void truncate_node(struct dnode_of_data *dn) dn->node_page = NULL; trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr); + + return 0; } static int truncate_dnode(struct dnode_of_data *dn) { struct page *page; + int err; if (dn->nid == 0) return 1; @@ -770,7 +780,10 @@ static int truncate_dnode(struct dnode_of_data *dn) dn->node_page = page; dn->ofs_in_node = 0; f2fs_truncate_data_blocks(dn); - truncate_node(dn); + err = truncate_node(dn); + if (err) + return err; + return 1; } @@ -835,7 +848,9 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, if (!ofs) { /* remove current indirect node */ dn->node_page = page; - truncate_node(dn); + ret = truncate_node(dn); + if (ret) + goto out_err; freed++; } else { f2fs_put_page(page, 1); @@ -893,7 +908,9 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, if (offset[idx + 1] == 0) { dn->node_page = pages[idx]; dn->nid = nid[idx]; - truncate_node(dn); + err = truncate_node(dn); + if (err) + goto fail; } else { f2fs_put_page(pages[idx], 1); } @@ -1014,6 +1031,7 @@ int f2fs_truncate_xattr_node(struct inode *inode) nid_t nid = F2FS_I(inode)->i_xattr_nid; struct dnode_of_data dn; struct page *npage; + int err; if (!nid) return 0; @@ -1022,10 +1040,15 @@ int f2fs_truncate_xattr_node(struct inode *inode) if (IS_ERR(npage)) return PTR_ERR(npage); + set_new_dnode(&dn, inode, NULL, npage, nid); + err = truncate_node(&dn); + if (err) { + f2fs_put_page(npage, 1); + return err; + } + f2fs_i_xnid_write(inode, 0); - set_new_dnode(&dn, inode, NULL, npage, nid); - truncate_node(&dn); return 0; } @@ -1059,7 +1082,11 @@ int f2fs_remove_inode_page(struct inode *inode) inode->i_blocks != 0 && inode->i_blocks != 8); /* will put inode & node pages */ - truncate_node(&dn); + err = truncate_node(&dn); + if (err) { + f2fs_put_dnode(&dn); + return err; + } return 0; } @@ -1092,7 +1119,11 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) goto fail; #ifdef CONFIG_F2FS_CHECK_FS - f2fs_get_node_info(sbi, dn->nid, &new_ni); + err = f2fs_get_node_info(sbi, dn->nid, &new_ni); + if (err) { + dec_valid_node_count(sbi, dn->inode, !ofs); + goto fail; + } f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR); #endif new_ni.nid = dn->nid; @@ -1140,6 +1171,7 @@ static int read_node_page(struct page *page, int op_flags) .page = page, .encrypted_page = NULL, }; + int err; if (PageUptodate(page)) { #ifdef CONFIG_F2FS_CHECK_FS @@ -1148,7 +1180,9 @@ static int read_node_page(struct page *page, int op_flags) return LOCKED_PAGE; } - f2fs_get_node_info(sbi, page->index, &ni); + err = f2fs_get_node_info(sbi, page->index, &ni); + if (err) + return err; if (unlikely(ni.blk_addr == NULL_ADDR) || is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) { @@ -1383,6 +1417,9 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, nid = nid_of_node(page); f2fs_bug_on(sbi, page->index != nid); + if (f2fs_get_node_info(sbi, nid, &ni)) + goto redirty_out; + if (wbc->for_reclaim) { if (!down_read_trylock(&sbi->node_write)) goto redirty_out; @@ -1390,8 +1427,6 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, down_read(&sbi->node_write); } - f2fs_get_node_info(sbi, nid, &ni); - /* This page is already truncated */ if (unlikely(ni.blk_addr == NULL_ADDR)) { ClearPageUptodate(page); @@ -2311,12 +2346,16 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page) struct dnode_of_data dn; struct node_info ni; struct page *xpage; + int err; if (!prev_xnid) goto recover_xnid; /* 1: invalidate the previous xattr nid */ - f2fs_get_node_info(sbi, prev_xnid, &ni); + err = f2fs_get_node_info(sbi, prev_xnid, &ni); + if (err) + return err; + f2fs_invalidate_blocks(sbi, ni.blk_addr); dec_valid_node_count(sbi, inode, false); set_node_addr(sbi, &ni, NULL_ADDR, false); @@ -2351,8 +2390,11 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) nid_t ino = ino_of_node(page); struct node_info old_ni, new_ni; struct page *ipage; + int err; - f2fs_get_node_info(sbi, ino, &old_ni); + err = f2fs_get_node_info(sbi, ino, &old_ni); + if (err) + return err; if (unlikely(old_ni.blk_addr != NULL_ADDR)) return -EINVAL; @@ -2406,7 +2448,7 @@ retry: return 0; } -void f2fs_restore_node_summary(struct f2fs_sb_info *sbi, +int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, unsigned int segno, struct f2fs_summary_block *sum) { struct f2fs_node *rn; @@ -2428,6 +2470,9 @@ void f2fs_restore_node_summary(struct f2fs_sb_info *sbi, for (idx = addr; idx < addr + nrpages; idx++) { struct page *page = f2fs_get_tmp_page(sbi, idx); + if (IS_ERR(page)) + return PTR_ERR(page); + rn = F2FS_NODE(page); sum_entry->nid = rn->footer.nid; sum_entry->version = 0; @@ -2439,6 +2484,7 @@ void f2fs_restore_node_summary(struct f2fs_sb_info *sbi, invalidate_mapping_pages(META_MAPPING(sbi), addr, addr + nrpages); } + return 0; } static void remove_nats_in_journal(struct f2fs_sb_info *sbi) @@ -2675,7 +2721,13 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg - nm_i->nat_bits_blocks; for (i = 0; i < nm_i->nat_bits_blocks; i++) { - struct page *page = f2fs_get_meta_page(sbi, nat_bits_addr++); + struct page *page; + + page = f2fs_get_meta_page(sbi, nat_bits_addr++); + if (IS_ERR(page)) { + disable_nat_bits(sbi, true); + return PTR_ERR(page); + } memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS), page_address(page), F2FS_BLKSIZE); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 501767451e2b9..8c46958652784 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -256,6 +256,10 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, return 0; page = f2fs_get_tmp_page(sbi, blkaddr); + if (IS_ERR(page)) { + err = PTR_ERR(page); + break; + } if (!is_recoverable_dnode(page)) break; @@ -471,7 +475,10 @@ retry_dn: f2fs_wait_on_page_writeback(dn.node_page, NODE, true); - f2fs_get_node_info(sbi, dn.nid, &ni); + err = f2fs_get_node_info(sbi, dn.nid, &ni); + if (err) + goto err; + f2fs_bug_on(sbi, ni.ino != ino_of_node(page)); f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page)); @@ -574,6 +581,10 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, f2fs_ra_meta_pages_cond(sbi, blkaddr); page = f2fs_get_tmp_page(sbi, blkaddr); + if (IS_ERR(page)) { + err = PTR_ERR(page); + break; + } if (!is_recoverable_dnode(page)) { f2fs_put_page(page, 1); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f5f04aabe338b..65dcde1d4fb83 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -250,7 +250,13 @@ retry: err = -EAGAIN; goto next; } - f2fs_get_node_info(sbi, dn.nid, &ni); + + err = f2fs_get_node_info(sbi, dn.nid, &ni); + if (err) { + f2fs_put_dnode(&dn); + return err; + } + if (cur->old_addr == NEW_ADDR) { f2fs_invalidate_blocks(sbi, dn.data_blkaddr); f2fs_update_data_blkaddr(&dn, NEW_ADDR); @@ -2051,7 +2057,7 @@ int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra) */ struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) { - return f2fs_get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno)); + return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno)); } void f2fs_update_meta_page(struct f2fs_sb_info *sbi, @@ -2911,11 +2917,9 @@ void f2fs_outplace_write_data(struct dnode_of_data *dn, { struct f2fs_sb_info *sbi = fio->sbi; struct f2fs_summary sum; - struct node_info ni; f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); - f2fs_get_node_info(sbi, dn->nid, &ni); - set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); + set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version); do_write_page(&sum, fio); f2fs_update_data_blkaddr(dn, fio->new_blkaddr); @@ -3077,7 +3081,7 @@ void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr) } } -static void read_compacted_summaries(struct f2fs_sb_info *sbi) +static int read_compacted_summaries(struct f2fs_sb_info *sbi) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct curseg_info *seg_i; @@ -3089,6 +3093,8 @@ static void read_compacted_summaries(struct f2fs_sb_info *sbi) start = start_sum_block(sbi); page = f2fs_get_meta_page(sbi, start++); + if (IS_ERR(page)) + return PTR_ERR(page); kaddr = (unsigned char *)page_address(page); /* Step 1: restore nat cache */ @@ -3129,11 +3135,14 @@ static void read_compacted_summaries(struct f2fs_sb_info *sbi) page = NULL; page = f2fs_get_meta_page(sbi, start++); + if (IS_ERR(page)) + return PTR_ERR(page); kaddr = (unsigned char *)page_address(page); offset = 0; } } f2fs_put_page(page, 1); + return 0; } static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) @@ -3145,6 +3154,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) unsigned short blk_off; unsigned int segno = 0; block_t blk_addr = 0; + int err = 0; /* get segment number and block addr */ if (IS_DATASEG(type)) { @@ -3168,6 +3178,8 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) } new = f2fs_get_meta_page(sbi, blk_addr); + if (IS_ERR(new)) + return PTR_ERR(new); sum = (struct f2fs_summary_block *)page_address(new); if (IS_NODESEG(type)) { @@ -3179,7 +3191,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) ns->ofs_in_node = 0; } } else { - f2fs_restore_node_summary(sbi, segno, sum); + err = f2fs_restore_node_summary(sbi, segno, sum); + if (err) + goto out; } } @@ -3199,8 +3213,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) curseg->alloc_type = ckpt->alloc_type[type]; curseg->next_blkoff = blk_off; mutex_unlock(&curseg->curseg_mutex); +out: f2fs_put_page(new, 1); - return 0; + return err; } static int restore_curseg_summaries(struct f2fs_sb_info *sbi) @@ -3218,7 +3233,9 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) META_CP, true); /* restore for compacted data summary */ - read_compacted_summaries(sbi); + err = read_compacted_summaries(sbi); + if (err) + return err; type = CURSEG_HOT_NODE; } @@ -3349,7 +3366,7 @@ int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, unsigned int segno) { - return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno)); + return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno)); } static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, -- GitLab From dee7233be772367083ef1959897011e714bb0dab Mon Sep 17 00:00:00 2001 From: Hyunchul Lee Date: Thu, 19 Jul 2018 09:23:57 +0900 Subject: [PATCH 0048/2269] f2fs: avoid duplicated permission check for "trusted." xattrs Because xattr_permission already checks CAP_SYS_ADMIN capability, we don't need to check it. Signed-off-by: Hyunchul Lee Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 708271871f945..4b34244dcc69e 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -37,9 +37,6 @@ static int f2fs_xattr_generic_get(const struct xattr_handler *handler, return -EOPNOTSUPP; break; case F2FS_XATTR_INDEX_TRUSTED: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - break; case F2FS_XATTR_INDEX_SECURITY: break; default: @@ -62,9 +59,6 @@ static int f2fs_xattr_generic_set(const struct xattr_handler *handler, return -EOPNOTSUPP; break; case F2FS_XATTR_INDEX_TRUSTED: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - break; case F2FS_XATTR_INDEX_SECURITY: break; default: -- GitLab From 4a19e8b2fdbc8916c74b13b70ef2d9db260e02dc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 17 Jul 2018 20:41:45 +0800 Subject: [PATCH 0049/2269] f2fs: kill EXT_TREE_VEC_SIZE Since commit 201ef5e080c9 ("f2fs: improve shrink performance of extent nodes"), there is no user of EXT_TREE_VEC_SIZE, just kill it for cleanup. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 804f289a8183a..41c8fc67210dd 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -520,9 +520,6 @@ enum { #define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ -/* vector size for gang look-up from extent cache that consists of radix tree */ -#define EXT_TREE_VEC_SIZE 64 - /* for in-memory extent cache entry */ #define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */ -- GitLab From 058cbc41a5f03f9378ce272e7cad188751381863 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 17 Jul 2018 20:41:46 +0800 Subject: [PATCH 0050/2269] f2fs: clean up with get_current_nat_page Just cleanup, no logic change. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 43acc5a4257d9..6af146d30935a 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -112,25 +112,22 @@ static void clear_node_page_dirty(struct page *page) static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid) { - pgoff_t index = current_nat_addr(sbi, nid); - return f2fs_get_meta_page_nofail(sbi, index); + return f2fs_get_meta_page_nofail(sbi, current_nat_addr(sbi, nid)); } static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) { struct page *src_page; struct page *dst_page; - pgoff_t src_off; pgoff_t dst_off; void *src_addr; void *dst_addr; struct f2fs_nm_info *nm_i = NM_I(sbi); - src_off = current_nat_addr(sbi, nid); - dst_off = next_nat_addr(sbi, src_off); + dst_off = next_nat_addr(sbi, current_nat_addr(sbi, nid)); /* get current nat block page with lock */ - src_page = f2fs_get_meta_page(sbi, src_off); + src_page = get_current_nat_page(sbi, nid); dst_page = f2fs_grab_meta_page(sbi, dst_off); f2fs_bug_on(sbi, PageDirty(src_page)); -- GitLab From a5acdb94b4f02d2f22636d986d4a046c5dba1d97 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 17 Jul 2018 20:41:47 +0800 Subject: [PATCH 0051/2269] f2fs: clean up with f2fs_encrypted_inode() Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 41f320bd2aa7f..f45a94b2db6d7 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1615,7 +1615,7 @@ static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) struct f2fs_inode_info *fi = F2FS_I(inode); unsigned int flags = fi->i_flags; - if (file_is_encrypt(inode)) + if (f2fs_encrypted_inode(inode)) flags |= F2FS_ENCRYPT_FL; if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) flags |= F2FS_INLINE_DATA_FL; -- GitLab From bb1491c87e5cb05f7a0c6725b25e36b538bf3889 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 17 Jul 2018 20:41:48 +0800 Subject: [PATCH 0052/2269] f2fs: clean up with f2fs_is_{atomic,volatile}_file() Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 65dcde1d4fb83..0b6827dba25ea 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2720,8 +2720,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) return CURSEG_COLD_DATA; if (file_is_hot(inode) || is_inode_flag_set(inode, FI_HOT_DATA) || - is_inode_flag_set(inode, FI_ATOMIC_FILE) || - is_inode_flag_set(inode, FI_VOLATILE_FILE)) + f2fs_is_atomic_file(inode) || + f2fs_is_volatile_file(inode)) return CURSEG_HOT_DATA; return f2fs_rw_hint_to_seg_type(inode->i_write_hint); } else { -- GitLab From a2f3b2f61acbfa34a20a8c9feadfbfedfae3000c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 17 Jul 2018 20:41:49 +0800 Subject: [PATCH 0053/2269] f2fs: clean up ioctl interface naming Romve redundant prefix 'f2fs_' in the middle of f2fs_ioc_f2fs_write_checkpoint(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f45a94b2db6d7..7816c0629b5e1 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2129,7 +2129,7 @@ out: return ret; } -static int f2fs_ioc_f2fs_write_checkpoint(struct file *filp, unsigned long arg) +static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -2910,7 +2910,7 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case F2FS_IOC_GARBAGE_COLLECT_RANGE: return f2fs_ioc_gc_range(filp, arg); case F2FS_IOC_WRITE_CHECKPOINT: - return f2fs_ioc_f2fs_write_checkpoint(filp, arg); + return f2fs_ioc_write_checkpoint(filp, arg); case F2FS_IOC_DEFRAGMENT: return f2fs_ioc_defragment(filp, arg); case F2FS_IOC_MOVE_RANGE: -- GitLab From 29b4a3803b890f919c92908003d9aba682dba6bd Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Thu, 19 Jul 2018 14:57:14 +0800 Subject: [PATCH 0054/2269] f2fs: fix wrong kernel message when recover fsync data on ro fs This patch fix wrong message info for recover fsync data on readonly fs. Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 8c46958652784..0a6e81879a1f8 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -639,7 +639,8 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) #endif if (s_flags & MS_RDONLY) { - f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); + f2fs_msg(sbi->sb, KERN_INFO, + "recover fsync data on readonly fs"); sbi->sb->s_flags &= ~MS_RDONLY; } -- GitLab From 75c16e310e9dba614e475454049aa3795536d7ad Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 19 Jul 2018 23:57:54 +0800 Subject: [PATCH 0055/2269] f2fs: restrict setting up inode.i_advise In order to give advise to f2fs to recognize hot/cold file, it is possible that we can set specific bit in inode.i_advise through setxattr(), but there are several bits which are used internally, such as encrypt_bit, keep_size_bit, they should never be changed through setxattr(). So that this patch 1) adds FADVISE_MODIFIABLE_BITS to filter modifiable bits user given, 2) supports to clear {hot,cold}_file bits. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/xattr.c | 12 +++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 41c8fc67210dd..080fc0b6c7649 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -605,6 +605,8 @@ enum { #define FADVISE_HOT_BIT 0x20 #define FADVISE_VERITY_BIT 0x40 /* reserved */ +#define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT) + #define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) #define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) #define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 4b34244dcc69e..77a010e625f50 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -94,12 +94,22 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler, const char *name, const void *value, size_t size, int flags) { + unsigned char old_advise = F2FS_I(inode)->i_advise; + unsigned char new_advise; + if (!inode_owner_or_capable(inode)) return -EPERM; if (value == NULL) return -EINVAL; - F2FS_I(inode)->i_advise |= *(char *)value; + new_advise = *(char *)value; + if (new_advise & ~FADVISE_MODIFIABLE_BITS) + return -EINVAL; + + new_advise = new_advise & FADVISE_MODIFIABLE_BITS; + new_advise |= old_advise & ~FADVISE_MODIFIABLE_BITS; + + F2FS_I(inode)->i_advise = new_advise; f2fs_mark_inode_dirty_sync(inode, true); return 0; } -- GitLab From ea1f9c29fc17f7585d2ec838d62ed4d3526367bb Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 27 Jul 2018 18:15:11 +0900 Subject: [PATCH 0056/2269] f2fs: don't allow any writes on aborted atomic writes In order to prevent abusing atomic writes by abnormal users, we've added a threshold, 20% over memory footprint, which disallows further atomic writes. Previously, however, SQLite doesn't know the files became normal, so that it could write stale data and commit on revoked normal database file. Once f2fs detects such the abnormal behavior, this patch tries to avoid further writes in write_begin(). Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 +++-- fs/f2fs/file.c | 7 ++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a9668991f3d56..ab7b700fae89f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2291,8 +2291,9 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, trace_f2fs_write_begin(inode, pos, len, flags); - if (f2fs_is_atomic_file(inode) && - !f2fs_available_free_memory(sbi, INMEM_PAGES)) { + if ((f2fs_is_atomic_file(inode) && + !f2fs_available_free_memory(sbi, INMEM_PAGES)) || + is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) { err = -ENOMEM; drop_atomic = true; goto fail; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7816c0629b5e1..21002cb1df04f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1708,8 +1708,11 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - if (f2fs_is_atomic_file(inode)) + if (f2fs_is_atomic_file(inode)) { + if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) + ret = -EINVAL; goto out; + } ret = f2fs_convert_inline_inode(inode); if (ret) @@ -1871,6 +1874,8 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); } + clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); + inode_unlock(inode); mnt_drop_write_file(filp); -- GitLab From ff0d5192253735db775cf48ca19964138ea081c3 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Thu, 19 Jul 2018 20:58:15 +0800 Subject: [PATCH 0057/2269] f2fs: issue discard align to section in LFS mode For the case when sbi->segs_per_sec > 1 with lfs mode, take section:segment = 5 for example, if the section prefree_map is ...previous section | current section (1 1 0 1 1) | next section..., then the start = x, end = x + 1, after start = start_segno + sbi->segs_per_sec, start = x + 5, then it will skip x + 3 and x + 4, but their bitmap is still set, which will cause duplicated f2fs_issue_discard of this same section in the next write_checkpoint: round 1: section bitmap : 1 1 1 1 1, all valid, prefree_map: 0 0 0 0 0 then rm data block NO.2, block NO.2 becomes invalid, prefree_map: 0 0 1 0 0 write_checkpoint: section bitmap: 1 1 0 1 1, prefree_map: 0 0 0 0 0, prefree of NO.2 is cleared, and no discard issued round 2: rm data block NO.0, NO.1, NO.3, NO.4 all invalid, but prefree bit of NO.2 is set and cleared in round 1, then prefree_map: 1 1 0 1 1 write_checkpoint: section bitmap: 0 0 0 0 0, prefree_map: 0 0 0 1 1, no valid blocks of this section, so discard issued, but this time prefree bit of NO.3 and NO.4 is skipped due to start = start_segno + sbi->segs_per_sec; round 3: write_checkpoint: section bitmap: 0 0 0 0 0, prefree_map: 0 0 0 1 1 -> 0 0 0 0 0, no valid blocks of this section, so discard issued, this time prefree bit of NO.3 and NO.4 is cleared, but the discard of this section is sent again... To fix this problem, we can align the start and end value to section boundary for fstrim and real-time discard operation, and decide to issue discard only when the whole section is invalid, which can issue discard aligned to section size as much as possible and avoid redundant discard. Signed-off-by: Yunlong Song Signed-off-by: Chao Yu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0b6827dba25ea..631e153457524 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1715,21 +1715,30 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, unsigned int start = 0, end = -1; unsigned int secno, start_segno; bool force = (cpc->reason & CP_DISCARD); + bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1; mutex_lock(&dirty_i->seglist_lock); while (1) { int i; + + if (need_align && end != -1) + end--; start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1); if (start >= MAIN_SEGS(sbi)) break; end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi), start + 1); - for (i = start; i < end; i++) - clear_bit(i, prefree_map); + if (need_align) { + start = rounddown(start, sbi->segs_per_sec); + end = roundup(end, sbi->segs_per_sec); + } - dirty_i->nr_dirty[PRE] -= end - start; + for (i = start; i < end; i++) { + if (test_and_clear_bit(i, prefree_map)) + dirty_i->nr_dirty[PRE]--; + } if (!test_opt(sbi, DISCARD)) continue; @@ -2516,6 +2525,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) struct discard_policy dpolicy; unsigned long long trimmed = 0; int err = 0; + bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1; if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) return -EINVAL; @@ -2533,6 +2543,10 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start); end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : GET_SEGNO(sbi, end); + if (need_align) { + start_segno = rounddown(start_segno, sbi->segs_per_sec); + end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1; + } cpc.reason = CP_DISCARD; cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen)); -- GitLab From 6473a8be0e8e732c030de7bb3e2d021ddbea009c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 25 Jul 2018 19:16:21 +0800 Subject: [PATCH 0058/2269] f2fs: let checkpoint flush dnode page of regular Fsyncer will wait on all dnode pages of regular writeback before flushing, if there are async dnode pages blocked by IO scheduler, it may decrease fsync's performance. In this patch, we choose to let f2fs_balance_fs_bg() to trigger checkpoint to flush these dnode pages of regular, so async IO of dnode page can be elimitnated, making fsyncer only need to wait for sync IO. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 8 +++++++- fs/f2fs/node.h | 5 +++++ fs/f2fs/segment.c | 4 +++- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 6af146d30935a..dee1dc93e2e62 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1410,6 +1410,10 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; + if (wbc->sync_mode == WB_SYNC_NONE && + IS_DNODE(page) && is_cold_node(page)) + goto redirty_out; + /* get old block addr of this node page */ nid = nid_of_node(page); f2fs_bug_on(sbi, page->index != nid); @@ -1727,10 +1731,12 @@ continue_unlock: } if (step < 2) { + if (wbc->sync_mode == WB_SYNC_NONE && step == 1) + goto out; step++; goto next_step; } - +out: if (nwritten) f2fs_submit_merged_write(sbi, NODE); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 8f34bdffde934..0f4db7a612547 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -135,6 +135,11 @@ static inline bool excess_cached_nats(struct f2fs_sb_info *sbi) return NM_I(sbi)->nat_cnt >= DEF_NAT_CACHE_THRESHOLD; } +static inline bool excess_dirty_nodes(struct f2fs_sb_info *sbi) +{ + return get_pages(sbi, F2FS_DIRTY_NODES) >= sbi->blocks_per_seg * 8; +} + enum mem_type { FREE_NIDS, /* indicates the free nid list */ NAT_ENTRIES, /* indicates the cached nat entry */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 631e153457524..3662e1f429b4f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -509,7 +509,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) else f2fs_build_free_nids(sbi, false, false); - if (!is_idle(sbi) && !excess_dirty_nats(sbi)) + if (!is_idle(sbi) && + (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi))) return; /* checkpoint is the only way to shrink partial cached entries */ @@ -517,6 +518,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) !f2fs_available_free_memory(sbi, INO_ENTRIES) || excess_prefree_segs(sbi) || excess_dirty_nats(sbi) || + excess_dirty_nodes(sbi) || f2fs_time_over(sbi, CP_TIME)) { if (test_opt(sbi, DATA_FLUSH)) { struct blk_plug plug; -- GitLab From 15027cf6278c311a3b415e2906ba6b8e53098826 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Mon, 23 Jul 2018 22:10:22 +0800 Subject: [PATCH 0059/2269] f2fs: add proc entry to show victim_secmap bitmap This patch adds a new proc entry to show victim_secmap information in more detail, which is very helpful to know the get_victim candidate status clearly, and helpful to debug problems (e.g., some sections can not gc all of its blocks, since some blocks belong to atomic file, leaving victim_secmap with section bit setting, in extrem case, this will lead all bytes of victim_secmap setting with 0xff). Signed-off-by: Yunlong Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 47f24b922af05..79e47e7d737c8 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -615,6 +615,28 @@ static int __maybe_unused iostat_info_seq_show(struct seq_file *seq, return 0; } +static int __maybe_unused victim_bits_seq_show(struct seq_file *seq, + void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + int i; + + seq_puts(seq, "format: victim_secmap bitmaps\n"); + + for (i = 0; i < MAIN_SECS(sbi); i++) { + if ((i % 10) == 0) + seq_printf(seq, "%-10d", i); + seq_printf(seq, "%d", test_bit(i, dirty_i->victim_secmap) ? 1 : 0); + if ((i % 10) == 9 || i == (MAIN_SECS(sbi) - 1)) + seq_putc(seq, '\n'); + else + seq_putc(seq, ' '); + } + return 0; +} + #define F2FS_PROC_FILE_DEF(_name) \ static int _name##_open_fs(struct inode *inode, struct file *file) \ { \ @@ -631,6 +653,7 @@ static const struct file_operations f2fs_seq_##_name##_fops = { \ F2FS_PROC_FILE_DEF(segment_info); F2FS_PROC_FILE_DEF(segment_bits); F2FS_PROC_FILE_DEF(iostat_info); +F2FS_PROC_FILE_DEF(victim_bits); int __init f2fs_init_sysfs(void) { @@ -681,6 +704,8 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) &f2fs_seq_segment_bits_fops, sb); proc_create_data("iostat_info", S_IRUGO, sbi->s_proc, &f2fs_seq_iostat_info_fops, sb); + proc_create_data("victim_bits", S_IRUGO, sbi->s_proc, + &f2fs_seq_victim_bits_fops, sb); } return 0; } @@ -691,6 +716,7 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi) remove_proc_entry("iostat_info", sbi->s_proc); remove_proc_entry("segment_info", sbi->s_proc); remove_proc_entry("segment_bits", sbi->s_proc); + remove_proc_entry("victim_bits", sbi->s_proc); remove_proc_entry(sbi->sb->s_id, f2fs_proc_root); } kobject_del(&sbi->s_kobj); -- GitLab From dfc3eea2b1aa5dcbc1eebb83704f95e7cfe0bbb4 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Tue, 24 Jul 2018 20:17:53 +0800 Subject: [PATCH 0060/2269] f2fs: quota: decrease the lock granularity of statfs_project According to fs/quota/dquot.c, `dq_data_lock' protects mem_dqinfo structures and modifications of dquot pointers in the inode, and `dquot->dq_dqb_lock' protects data from dq_dqb. We should use dquot->dq_dqb_lock in statfs_project instead of dq_dat_lock. Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 55642d50b2277..57d54433af6a0 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1117,7 +1117,7 @@ static int f2fs_statfs_project(struct super_block *sb, dquot = dqget(sb, qid); if (IS_ERR(dquot)) return PTR_ERR(dquot); - spin_lock(&dq_data_lock); + spin_lock(&dquot->dq_dqb_lock); limit = (dquot->dq_dqb.dqb_bsoftlimit ? dquot->dq_dqb.dqb_bsoftlimit : @@ -1140,7 +1140,7 @@ static int f2fs_statfs_project(struct super_block *sb, (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0; } - spin_unlock(&dq_data_lock); + spin_unlock(&dquot->dq_dqb_lock); dqput(dquot); return 0; } -- GitLab From 010b8c6ad0df99f4368941ba6347689d23dde84e Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Thu, 26 Jul 2018 19:24:25 +0800 Subject: [PATCH 0061/2269] f2fs: quota: fix incorrect comments Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 5 ++++- fs/f2fs/super.c | 5 +---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 01733aeefbfae..f3fabf992727a 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -661,7 +661,10 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi) /* Needed for iput() to work correctly and not trash data */ sbi->sb->s_flags |= MS_ACTIVE; - /* Turn on quotas so that they are updated correctly */ + /* + * Turn on quotas which were not enabled for read-only mounts if + * filesystem has quota feature, so that they are updated correctly. + */ quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY); #endif diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 57d54433af6a0..167a754595c3b 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2955,10 +2955,7 @@ try_onemore: goto free_root_inode; #ifdef CONFIG_QUOTA - /* - * Turn on quotas which were not enabled for read-only mounts if - * filesystem has quota feature, so that they are updated correctly. - */ + /* Enable quota usage during mount */ if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) { err = f2fs_enable_quotas(sb); if (err) { -- GitLab From 27d0a7575dc53c79b9e13b67f7842b331f54dea6 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Tue, 24 Jul 2018 20:17:52 +0800 Subject: [PATCH 0062/2269] f2fs: quota: do not mount as RDWR without QUOTA if quota feature enabled If quota feature is enabled, quota is on by default. However, if CONFIG_QUOTA is not built in kernel, dquot entries will not get updated, which leads to quota inconsistency. Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 167a754595c3b..eb25db84ce6ff 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -346,12 +346,6 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) "QUOTA feature is enabled, so ignore jquota_fmt"); F2FS_OPTION(sbi).s_jquota_fmt = 0; } - if (f2fs_sb_has_quota_ino(sbi->sb) && f2fs_readonly(sbi->sb)) { - f2fs_msg(sbi->sb, KERN_INFO, - "Filesystem with quota feature cannot be mounted RDWR " - "without CONFIG_QUOTA"); - return -1; - } return 0; } #endif @@ -774,6 +768,13 @@ static int parse_options(struct super_block *sb, char *options) #ifdef CONFIG_QUOTA if (f2fs_check_quota_options(sbi)) return -EINVAL; +#else + if (f2fs_sb_has_quota_ino(sbi->sb) && !f2fs_readonly(sbi->sb)) { + f2fs_msg(sbi->sb, KERN_INFO, + "Filesystem with quota feature cannot be mounted RDWR " + "without CONFIG_QUOTA"); + return -EINVAL; + } #endif if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) { -- GitLab From c28578ac1e862dede3a5ccb5ed13d1880bfcfeb1 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 26 Jul 2018 07:19:48 +0800 Subject: [PATCH 0063/2269] f2fs: fix to restrict mount condition when without CONFIG_QUOTA Like quota_ino feature, we need to reject mounting RDWR with image which enables project_quota feature when there is no CONFIG_QUOTA be set in kernel. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index eb25db84ce6ff..47ef6f0997da6 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -775,6 +775,12 @@ static int parse_options(struct super_block *sb, char *options) "without CONFIG_QUOTA"); return -EINVAL; } + if (f2fs_sb_has_project_quota(sbi->sb) && !f2fs_readonly(sbi->sb)) { + f2fs_msg(sb, KERN_ERR, + "Filesystem with project quota feature cannot be " + "mounted RDWR without CONFIG_QUOTA"); + return -EINVAL; + } #endif if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) { -- GitLab From 09bc4de4c933e93d0ca8f6ab748b22aec6e062b0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 27 Jul 2018 18:15:13 +0800 Subject: [PATCH 0064/2269] f2fs: don't keep meta pages used for block migration For migration of encrypted inode's block, we load data of encrypted block into meta inode's page cache, after checkpoint, those all intermediate pages should be clean, and no one will read them again, so let's just release them for more memory. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f3fabf992727a..3c3429a99475c 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1408,6 +1408,14 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) commit_checkpoint(sbi, ckpt, start_blk); wait_on_all_pages_writeback(sbi); + /* + * invalidate intermediate page cache borrowed from meta inode + * which are used for migration of encrypted inode's blocks. + */ + if (f2fs_sb_has_encrypt(sbi->sb)) + invalidate_mapping_pages(META_MAPPING(sbi), + MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1); + f2fs_release_ino_entry(sbi, false); clear_sbi_flag(sbi, SBI_IS_DIRTY); -- GitLab From 6fffb33eac34d62fe4ee0068e7f1f300fa9df764 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 27 Jul 2018 18:15:14 +0800 Subject: [PATCH 0065/2269] f2fs: fix to active page in lru list for read path If config CONFIG_F2FS_FAULT_INJECTION is on, for both read or write path we will call find_lock_page() to get the page, but for read path, it missed to passing FGP_ACCESSED to allocator to active the page in LRU list, result in being reclaimed in advance incorrectly, fix it. Reported-by: Xianrong Zhou Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 080fc0b6c7649..0b57642592826 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1965,8 +1965,13 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping, pgoff_t index, bool for_write) { #ifdef CONFIG_F2FS_FAULT_INJECTION - struct page *page = find_lock_page(mapping, index); + struct page *page; + if (!for_write) + page = find_get_page_flags(mapping, index, + FGP_LOCK | FGP_ACCESSED); + else + page = find_lock_page(mapping, index); if (page) return page; -- GitLab From 3a63e3d97b7893bb2a6f5779105c399a404d9dc7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 29 Jul 2018 12:16:59 +0800 Subject: [PATCH 0066/2269] f2fs: fix to clear PG_checked flag in set_page_dirty() PG_checked flag will be set on data page during GC, later, we can recognize such page by the flag and migrate page to cold segment. But previously, we don't clear this flag when invalidating data page, after page redirtying, we will write it into wrong log. Let's clear PG_checked flag in set_page_dirty() to avoid this. Signed-off-by: Weichao Guo Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ab7b700fae89f..84f15bd371599 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2542,6 +2542,10 @@ static int f2fs_set_data_page_dirty(struct page *page) if (!PageUptodate(page)) SetPageUptodate(page); + /* don't remain PG_checked flag which was set during GC */ + if (is_cold_data(page)) + clear_cold_data(page); + if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) { if (!IS_ATOMIC_WRITTEN_PAGE(page)) { f2fs_register_inmem_page(inode, page); -- GitLab From 2e69e8e874f546cc18db79a13303477769690f4f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 31 Jul 2018 09:09:01 -0700 Subject: [PATCH 0067/2269] f2fs: avoid f2fs_bug_on() in cp_error case There is a subtle race condition to invoke f2fs_bug_on() in shutdown tests. I've confirmed that the last checkpoint is preserved in consistent state, so it'd be fine to just return error at this moment. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index dee1dc93e2e62..782524fd2beee 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1075,6 +1075,10 @@ int f2fs_remove_inode_page(struct inode *inode) f2fs_truncate_data_blocks_range(&dn, 1); /* 0 is possible, after f2fs_new_inode() has failed */ + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { + f2fs_put_dnode(&dn); + return -EIO; + } f2fs_bug_on(F2FS_I_SB(inode), inode->i_blocks != 0 && inode->i_blocks != 8); -- GitLab From 598224021f8d00de59945f9cdf244637354dd155 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 1 Aug 2018 19:16:11 +0800 Subject: [PATCH 0068/2269] f2fs: fix to do sanity check with cp_pack_start_sum After fuzzing, cp_pack_start_sum could be corrupted, so current log's summary info should be wrong due to loading incorrect summary block. Then, if segment's type in current log is exceeded NR_CURSEG_TYPE, it can lead accessing invalid dirty_i->dirty_segmap bitmap finally. Add sanity check for cp_pack_start_sum to fix this issue. https://bugzilla.kernel.org/show_bug.cgi?id=200419 - Reproduce - Kernel message (f2fs-dev w/ KASAN) [ 3117.578432] F2FS-fs (loop0): Invalid log blocks per segment (8) [ 3117.578445] F2FS-fs (loop0): Can't find valid F2FS filesystem in 2th superblock [ 3117.581364] F2FS-fs (loop0): invalid crc_offset: 30716 [ 3117.583564] WARNING: CPU: 1 PID: 1225 at fs/f2fs/checkpoint.c:90 __get_meta_page+0x448/0x4b0 [ 3117.583570] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer joydev input_leds serio_raw snd soundcore mac_hid i2c_piix4 ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi btrfs zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear 8139too qxl ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel psmouse aes_x86_64 8139cp crypto_simd cryptd mii glue_helper pata_acpi floppy [ 3117.584014] CPU: 1 PID: 1225 Comm: mount Not tainted 4.17.0+ #1 [ 3117.584017] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 3117.584022] RIP: 0010:__get_meta_page+0x448/0x4b0 [ 3117.584023] Code: 00 49 8d bc 24 84 00 00 00 e8 74 54 da ff 41 83 8c 24 84 00 00 00 08 4c 89 f6 4c 89 ef e8 c0 d9 95 00 48 89 ef e8 18 e3 00 00 <0f> 0b f0 80 4d 48 04 e9 0f fe ff ff 0f 0b 48 89 c7 48 89 04 24 e8 [ 3117.584072] RSP: 0018:ffff88018eb678c0 EFLAGS: 00010286 [ 3117.584082] RAX: ffff88018f0a6a78 RBX: ffffea0007a46600 RCX: ffffffff9314d1b2 [ 3117.584085] RDX: ffffffff00000001 RSI: 0000000000000000 RDI: ffff88018f0a6a98 [ 3117.584087] RBP: ffff88018ebe9980 R08: 0000000000000002 R09: 0000000000000001 [ 3117.584090] R10: 0000000000000001 R11: ffffed00326e4450 R12: ffff880193722200 [ 3117.584092] R13: ffff88018ebe9afc R14: 0000000000000206 R15: ffff88018eb67900 [ 3117.584096] FS: 00007f5694636840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000 [ 3117.584098] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3117.584101] CR2: 00000000016f21b8 CR3: 0000000191c22000 CR4: 00000000000006e0 [ 3117.584112] Call Trace: [ 3117.584121] ? f2fs_set_meta_page_dirty+0x150/0x150 [ 3117.584127] ? f2fs_build_segment_manager+0xbf9/0x3190 [ 3117.584133] ? f2fs_npages_for_summary_flush+0x75/0x120 [ 3117.584145] f2fs_build_segment_manager+0xda8/0x3190 [ 3117.584151] ? f2fs_get_valid_checkpoint+0x298/0xa00 [ 3117.584156] ? f2fs_flush_sit_entries+0x10e0/0x10e0 [ 3117.584184] ? map_id_range_down+0x17c/0x1b0 [ 3117.584188] ? __put_user_ns+0x30/0x30 [ 3117.584206] ? find_next_bit+0x53/0x90 [ 3117.584237] ? cpumask_next+0x16/0x20 [ 3117.584249] f2fs_fill_super+0x1948/0x2b40 [ 3117.584258] ? f2fs_commit_super+0x1a0/0x1a0 [ 3117.584279] ? sget_userns+0x65e/0x690 [ 3117.584296] ? set_blocksize+0x88/0x130 [ 3117.584302] ? f2fs_commit_super+0x1a0/0x1a0 [ 3117.584305] mount_bdev+0x1c0/0x200 [ 3117.584310] mount_fs+0x5c/0x190 [ 3117.584320] vfs_kern_mount+0x64/0x190 [ 3117.584330] do_mount+0x2e4/0x1450 [ 3117.584343] ? lockref_put_return+0x130/0x130 [ 3117.584347] ? copy_mount_string+0x20/0x20 [ 3117.584357] ? kasan_unpoison_shadow+0x31/0x40 [ 3117.584362] ? kasan_kmalloc+0xa6/0xd0 [ 3117.584373] ? memcg_kmem_put_cache+0x16/0x90 [ 3117.584377] ? __kmalloc_track_caller+0x196/0x210 [ 3117.584383] ? _copy_from_user+0x61/0x90 [ 3117.584396] ? memdup_user+0x3e/0x60 [ 3117.584401] ksys_mount+0x7e/0xd0 [ 3117.584405] __x64_sys_mount+0x62/0x70 [ 3117.584427] do_syscall_64+0x73/0x160 [ 3117.584440] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 3117.584455] RIP: 0033:0x7f5693f14b9a [ 3117.584456] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48 [ 3117.584505] RSP: 002b:00007fff27346488 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 [ 3117.584510] RAX: ffffffffffffffda RBX: 00000000016e2030 RCX: 00007f5693f14b9a [ 3117.584512] RDX: 00000000016e2210 RSI: 00000000016e3f30 RDI: 00000000016ee040 [ 3117.584514] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013 [ 3117.584516] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 00000000016ee040 [ 3117.584519] R13: 00000000016e2210 R14: 0000000000000000 R15: 0000000000000003 [ 3117.584523] ---[ end trace a8e0d899985faf31 ]--- [ 3117.685663] F2FS-fs (loop0): f2fs_check_nid_range: out-of-range nid=2, run fsck to fix. [ 3117.685673] F2FS-fs (loop0): recover_data: ino = 2 (i_size: recover) recovered = 1, err = 0 [ 3117.685707] ================================================================== [ 3117.685955] BUG: KASAN: slab-out-of-bounds in __remove_dirty_segment+0xdd/0x1e0 [ 3117.686175] Read of size 8 at addr ffff88018f0a63d0 by task mount/1225 [ 3117.686477] CPU: 0 PID: 1225 Comm: mount Tainted: G W 4.17.0+ #1 [ 3117.686481] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 3117.686483] Call Trace: [ 3117.686494] dump_stack+0x71/0xab [ 3117.686512] print_address_description+0x6b/0x290 [ 3117.686517] kasan_report+0x28e/0x390 [ 3117.686522] ? __remove_dirty_segment+0xdd/0x1e0 [ 3117.686527] __remove_dirty_segment+0xdd/0x1e0 [ 3117.686532] locate_dirty_segment+0x189/0x190 [ 3117.686538] f2fs_allocate_new_segments+0xa9/0xe0 [ 3117.686543] recover_data+0x703/0x2c20 [ 3117.686547] ? f2fs_recover_fsync_data+0x48f/0xd50 [ 3117.686553] ? ksys_mount+0x7e/0xd0 [ 3117.686564] ? policy_nodemask+0x1a/0x90 [ 3117.686567] ? policy_node+0x56/0x70 [ 3117.686571] ? add_fsync_inode+0xf0/0xf0 [ 3117.686592] ? blk_finish_plug+0x44/0x60 [ 3117.686597] ? f2fs_ra_meta_pages+0x38b/0x5e0 [ 3117.686602] ? find_inode_fast+0xac/0xc0 [ 3117.686606] ? f2fs_is_valid_blkaddr+0x320/0x320 [ 3117.686618] ? __radix_tree_lookup+0x150/0x150 [ 3117.686633] ? dqget+0x670/0x670 [ 3117.686648] ? pagecache_get_page+0x29/0x410 [ 3117.686656] ? kmem_cache_alloc+0x176/0x1e0 [ 3117.686660] ? f2fs_is_valid_blkaddr+0x11d/0x320 [ 3117.686664] f2fs_recover_fsync_data+0xc23/0xd50 [ 3117.686670] ? f2fs_space_for_roll_forward+0x60/0x60 [ 3117.686674] ? rb_insert_color+0x323/0x3d0 [ 3117.686678] ? f2fs_recover_orphan_inodes+0xa5/0x700 [ 3117.686683] ? proc_register+0x153/0x1d0 [ 3117.686686] ? f2fs_remove_orphan_inode+0x10/0x10 [ 3117.686695] ? f2fs_attr_store+0x50/0x50 [ 3117.686700] ? proc_create_single_data+0x52/0x60 [ 3117.686707] f2fs_fill_super+0x1d06/0x2b40 [ 3117.686728] ? f2fs_commit_super+0x1a0/0x1a0 [ 3117.686735] ? sget_userns+0x65e/0x690 [ 3117.686740] ? set_blocksize+0x88/0x130 [ 3117.686745] ? f2fs_commit_super+0x1a0/0x1a0 [ 3117.686748] mount_bdev+0x1c0/0x200 [ 3117.686753] mount_fs+0x5c/0x190 [ 3117.686758] vfs_kern_mount+0x64/0x190 [ 3117.686762] do_mount+0x2e4/0x1450 [ 3117.686769] ? lockref_put_return+0x130/0x130 [ 3117.686773] ? copy_mount_string+0x20/0x20 [ 3117.686777] ? kasan_unpoison_shadow+0x31/0x40 [ 3117.686780] ? kasan_kmalloc+0xa6/0xd0 [ 3117.686786] ? memcg_kmem_put_cache+0x16/0x90 [ 3117.686790] ? __kmalloc_track_caller+0x196/0x210 [ 3117.686795] ? _copy_from_user+0x61/0x90 [ 3117.686801] ? memdup_user+0x3e/0x60 [ 3117.686804] ksys_mount+0x7e/0xd0 [ 3117.686809] __x64_sys_mount+0x62/0x70 [ 3117.686816] do_syscall_64+0x73/0x160 [ 3117.686824] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 3117.686829] RIP: 0033:0x7f5693f14b9a [ 3117.686830] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48 [ 3117.686887] RSP: 002b:00007fff27346488 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 [ 3117.686892] RAX: ffffffffffffffda RBX: 00000000016e2030 RCX: 00007f5693f14b9a [ 3117.686894] RDX: 00000000016e2210 RSI: 00000000016e3f30 RDI: 00000000016ee040 [ 3117.686896] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013 [ 3117.686899] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 00000000016ee040 [ 3117.686901] R13: 00000000016e2210 R14: 0000000000000000 R15: 0000000000000003 [ 3117.687005] Allocated by task 1225: [ 3117.687152] kasan_kmalloc+0xa6/0xd0 [ 3117.687157] kmem_cache_alloc_trace+0xfd/0x200 [ 3117.687161] f2fs_build_segment_manager+0x2d09/0x3190 [ 3117.687165] f2fs_fill_super+0x1948/0x2b40 [ 3117.687168] mount_bdev+0x1c0/0x200 [ 3117.687171] mount_fs+0x5c/0x190 [ 3117.687174] vfs_kern_mount+0x64/0x190 [ 3117.687177] do_mount+0x2e4/0x1450 [ 3117.687180] ksys_mount+0x7e/0xd0 [ 3117.687182] __x64_sys_mount+0x62/0x70 [ 3117.687186] do_syscall_64+0x73/0x160 [ 3117.687190] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 3117.687285] Freed by task 19: [ 3117.687412] __kasan_slab_free+0x137/0x190 [ 3117.687416] kfree+0x8b/0x1b0 [ 3117.687460] ttm_bo_man_put_node+0x61/0x80 [ttm] [ 3117.687476] ttm_bo_cleanup_refs+0x15f/0x250 [ttm] [ 3117.687492] ttm_bo_delayed_delete+0x2f0/0x300 [ttm] [ 3117.687507] ttm_bo_delayed_workqueue+0x17/0x50 [ttm] [ 3117.687528] process_one_work+0x2f9/0x740 [ 3117.687531] worker_thread+0x78/0x6b0 [ 3117.687541] kthread+0x177/0x1c0 [ 3117.687545] ret_from_fork+0x35/0x40 [ 3117.687638] The buggy address belongs to the object at ffff88018f0a6300 which belongs to the cache kmalloc-192 of size 192 [ 3117.688014] The buggy address is located 16 bytes to the right of 192-byte region [ffff88018f0a6300, ffff88018f0a63c0) [ 3117.688382] The buggy address belongs to the page: [ 3117.688554] page:ffffea00063c2980 count:1 mapcount:0 mapping:ffff8801f3403180 index:0x0 [ 3117.688788] flags: 0x17fff8000000100(slab) [ 3117.688944] raw: 017fff8000000100 ffffea00063c2840 0000000e0000000e ffff8801f3403180 [ 3117.689166] raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000 [ 3117.689386] page dumped because: kasan: bad access detected [ 3117.689653] Memory state around the buggy address: [ 3117.689816] ffff88018f0a6280: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 3117.690027] ffff88018f0a6300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 3117.690239] >ffff88018f0a6380: 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 3117.690448] ^ [ 3117.690644] ffff88018f0a6400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 3117.690868] ffff88018f0a6480: 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 3117.691077] ================================================================== [ 3117.691290] Disabling lock debugging due to kernel taint [ 3117.693893] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 [ 3117.694120] PGD 80000001f01bc067 P4D 80000001f01bc067 PUD 1d9638067 PMD 0 [ 3117.694338] Oops: 0002 [#1] SMP KASAN PTI [ 3117.694490] CPU: 1 PID: 1225 Comm: mount Tainted: G B W 4.17.0+ #1 [ 3117.694703] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 3117.695073] RIP: 0010:__remove_dirty_segment+0xe2/0x1e0 [ 3117.695246] Code: c4 48 89 c7 e8 cf bb d7 ff 45 0f b6 24 24 41 83 e4 3f 44 88 64 24 07 41 83 e4 3f 4a 8d 7c e3 08 e8 b3 bc d7 ff 4a 8b 4c e3 08 4c 0f b3 29 0f 82 94 00 00 00 48 8d bd 20 04 00 00 e8 97 bb d7 [ 3117.695793] RSP: 0018:ffff88018eb67638 EFLAGS: 00010292 [ 3117.695969] RAX: 0000000000000000 RBX: ffff88018f0a6300 RCX: 0000000000000000 [ 3117.696182] RDX: 0000000000000000 RSI: 0000000000000297 RDI: 0000000000000297 [ 3117.696391] RBP: ffff88018ebe9980 R08: ffffed003e743ebb R09: ffffed003e743ebb [ 3117.696604] R10: 0000000000000001 R11: ffffed003e743eba R12: 0000000000000019 [ 3117.696813] R13: 0000000000000014 R14: 0000000000000320 R15: ffff88018ebe99e0 [ 3117.697032] FS: 00007f5694636840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000 [ 3117.697280] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3117.702357] CR2: 00007fe89bb1a000 CR3: 0000000191c22000 CR4: 00000000000006e0 [ 3117.707235] Call Trace: [ 3117.712077] locate_dirty_segment+0x189/0x190 [ 3117.716891] f2fs_allocate_new_segments+0xa9/0xe0 [ 3117.721617] recover_data+0x703/0x2c20 [ 3117.726316] ? f2fs_recover_fsync_data+0x48f/0xd50 [ 3117.730957] ? ksys_mount+0x7e/0xd0 [ 3117.735573] ? policy_nodemask+0x1a/0x90 [ 3117.740198] ? policy_node+0x56/0x70 [ 3117.744829] ? add_fsync_inode+0xf0/0xf0 [ 3117.749487] ? blk_finish_plug+0x44/0x60 [ 3117.754152] ? f2fs_ra_meta_pages+0x38b/0x5e0 [ 3117.758831] ? find_inode_fast+0xac/0xc0 [ 3117.763448] ? f2fs_is_valid_blkaddr+0x320/0x320 [ 3117.768046] ? __radix_tree_lookup+0x150/0x150 [ 3117.772603] ? dqget+0x670/0x670 [ 3117.777159] ? pagecache_get_page+0x29/0x410 [ 3117.781648] ? kmem_cache_alloc+0x176/0x1e0 [ 3117.786067] ? f2fs_is_valid_blkaddr+0x11d/0x320 [ 3117.790476] f2fs_recover_fsync_data+0xc23/0xd50 [ 3117.794790] ? f2fs_space_for_roll_forward+0x60/0x60 [ 3117.799086] ? rb_insert_color+0x323/0x3d0 [ 3117.803304] ? f2fs_recover_orphan_inodes+0xa5/0x700 [ 3117.807563] ? proc_register+0x153/0x1d0 [ 3117.811766] ? f2fs_remove_orphan_inode+0x10/0x10 [ 3117.815947] ? f2fs_attr_store+0x50/0x50 [ 3117.820087] ? proc_create_single_data+0x52/0x60 [ 3117.824262] f2fs_fill_super+0x1d06/0x2b40 [ 3117.828367] ? f2fs_commit_super+0x1a0/0x1a0 [ 3117.832432] ? sget_userns+0x65e/0x690 [ 3117.836500] ? set_blocksize+0x88/0x130 [ 3117.840501] ? f2fs_commit_super+0x1a0/0x1a0 [ 3117.844420] mount_bdev+0x1c0/0x200 [ 3117.848275] mount_fs+0x5c/0x190 [ 3117.852053] vfs_kern_mount+0x64/0x190 [ 3117.855810] do_mount+0x2e4/0x1450 [ 3117.859441] ? lockref_put_return+0x130/0x130 [ 3117.862996] ? copy_mount_string+0x20/0x20 [ 3117.866417] ? kasan_unpoison_shadow+0x31/0x40 [ 3117.869719] ? kasan_kmalloc+0xa6/0xd0 [ 3117.872948] ? memcg_kmem_put_cache+0x16/0x90 [ 3117.876121] ? __kmalloc_track_caller+0x196/0x210 [ 3117.879333] ? _copy_from_user+0x61/0x90 [ 3117.882467] ? memdup_user+0x3e/0x60 [ 3117.885604] ksys_mount+0x7e/0xd0 [ 3117.888700] __x64_sys_mount+0x62/0x70 [ 3117.891742] do_syscall_64+0x73/0x160 [ 3117.894692] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 3117.897669] RIP: 0033:0x7f5693f14b9a [ 3117.900563] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48 [ 3117.906922] RSP: 002b:00007fff27346488 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 [ 3117.910159] RAX: ffffffffffffffda RBX: 00000000016e2030 RCX: 00007f5693f14b9a [ 3117.913469] RDX: 00000000016e2210 RSI: 00000000016e3f30 RDI: 00000000016ee040 [ 3117.916764] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013 [ 3117.920071] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 00000000016ee040 [ 3117.923393] R13: 00000000016e2210 R14: 0000000000000000 R15: 0000000000000003 [ 3117.926680] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer joydev input_leds serio_raw snd soundcore mac_hid i2c_piix4 ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi btrfs zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear 8139too qxl ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel psmouse aes_x86_64 8139cp crypto_simd cryptd mii glue_helper pata_acpi floppy [ 3117.949979] CR2: 0000000000000000 [ 3117.954283] ---[ end trace a8e0d899985faf32 ]--- [ 3117.958575] RIP: 0010:__remove_dirty_segment+0xe2/0x1e0 [ 3117.962810] Code: c4 48 89 c7 e8 cf bb d7 ff 45 0f b6 24 24 41 83 e4 3f 44 88 64 24 07 41 83 e4 3f 4a 8d 7c e3 08 e8 b3 bc d7 ff 4a 8b 4c e3 08 4c 0f b3 29 0f 82 94 00 00 00 48 8d bd 20 04 00 00 e8 97 bb d7 [ 3117.971789] RSP: 0018:ffff88018eb67638 EFLAGS: 00010292 [ 3117.976333] RAX: 0000000000000000 RBX: ffff88018f0a6300 RCX: 0000000000000000 [ 3117.980926] RDX: 0000000000000000 RSI: 0000000000000297 RDI: 0000000000000297 [ 3117.985497] RBP: ffff88018ebe9980 R08: ffffed003e743ebb R09: ffffed003e743ebb [ 3117.990098] R10: 0000000000000001 R11: ffffed003e743eba R12: 0000000000000019 [ 3117.994761] R13: 0000000000000014 R14: 0000000000000320 R15: ffff88018ebe99e0 [ 3117.999392] FS: 00007f5694636840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000 [ 3118.004096] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3118.008816] CR2: 00007fe89bb1a000 CR3: 0000000191c22000 CR4: 00000000000006e0 - Location https://elixir.bootlin.com/linux/v4.18-rc3/source/fs/f2fs/segment.c#L775 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) dirty_i->nr_dirty[t]--; Here dirty_i->dirty_segmap[t] can be NULL which leads to crash in test_and_clear_bit() Reported-by Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 8 ++++---- fs/f2fs/super.c | 12 ++++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 3c3429a99475c..150e68467ca05 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -880,15 +880,15 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) cp_block = (struct f2fs_checkpoint *)page_address(cur_page); memcpy(sbi->ckpt, cp_block, blk_size); - /* Sanity checking of checkpoint */ - if (f2fs_sanity_check_ckpt(sbi)) - goto free_fail_no_cp; - if (cur_page == cp1) sbi->cur_cp_pack = 1; else sbi->cur_cp_pack = 2; + /* Sanity checking of checkpoint */ + if (f2fs_sanity_check_ckpt(sbi)) + goto free_fail_no_cp; + if (cp_blks <= 1) goto done; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 47ef6f0997da6..939495ba5d04d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2293,6 +2293,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) unsigned int sit_bitmap_size, nat_bitmap_size; unsigned int log_blocks_per_seg; unsigned int segment_count_main; + unsigned int cp_pack_start_sum, cp_payload; block_t user_block_count; int i; @@ -2353,6 +2354,17 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) return 1; } + cp_pack_start_sum = __start_sum_addr(sbi); + cp_payload = __cp_payload(sbi); + if (cp_pack_start_sum < cp_payload + 1 || + cp_pack_start_sum > blocks_per_seg - 1 - + NR_CURSEG_TYPE) { + f2fs_msg(sbi->sb, KERN_ERR, + "Wrong cp_pack_start_sum: %u", + cp_pack_start_sum); + return 1; + } + if (unlikely(f2fs_cp_error(sbi))) { f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); return 1; -- GitLab From d8417ff1c369a85735c03d37bfb40052d3cde82b Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 1 Aug 2018 19:51:38 -0500 Subject: [PATCH 0069/2269] f2fs: use true and false for boolean values Return statements in functions returning bool should use true or false instead of an integer value. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0b57642592826..86ae56f6762d1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1335,7 +1335,7 @@ static inline bool is_idle(struct f2fs_sb_info *sbi) struct request_list *rl = &q->root_rl; if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC]) - return 0; + return false; return f2fs_time_over(sbi, REQ_TIME); } @@ -3401,7 +3401,7 @@ static inline bool f2fs_may_encrypt(struct inode *inode) return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)); #else - return 0; + return false; #endif } -- GitLab From fb03bc6e74650aad0a412bdbc4c329a143a3e399 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 2 Aug 2018 23:03:19 +0800 Subject: [PATCH 0070/2269] f2fs: fix to avoid broken of dnode block list f2fs recovery flow is relying on dnode block link list, it means fsynced file recovery depends on previous dnode's persistence in the list, so during fsync() we should wait on all regular inode's dnode writebacked before issuing flush. By this way, we can avoid dnode block list being broken by out-of-order IO submission due to IO scheduler or driver. Sheng Yong helps to do the test with this patch: Target:/data (f2fs, -) 64MB / 32768KB / 4KB / 8 1 / PERSIST / Index Base: SEQ-RD(MB/s) SEQ-WR(MB/s) RND-RD(IOPS) RND-WR(IOPS) Insert(TPS) Update(TPS) Delete(TPS) 1 867.82 204.15 41440.03 41370.54 680.8 1025.94 1031.08 2 871.87 205.87 41370.3 40275.2 791.14 1065.84 1101.7 3 866.52 205.69 41795.67 40596.16 694.69 1037.16 1031.48 Avg 868.7366667 205.2366667 41535.33333 40747.3 722.21 1042.98 1054.753333 After: SEQ-RD(MB/s) SEQ-WR(MB/s) RND-RD(IOPS) RND-WR(IOPS) Insert(TPS) Update(TPS) Delete(TPS) 1 798.81 202.5 41143 40613.87 602.71 838.08 913.83 2 805.79 206.47 40297.2 41291.46 604.44 840.75 924.27 3 814.83 206.17 41209.57 40453.62 602.85 834.66 927.91 Avg 806.4766667 205.0466667 40883.25667 40786.31667 603.3333333 837.83 922.0033333 Patched/Original: 0.928332713 0.999074239 0.984300676 1.000957528 0.835398753 0.803303994 0.874141189 It looks like atomic write will suffer performance regression. I suspect that the criminal is that we forcing to wait all dnode being in storage cache before we issue PREFLUSH+FUA. BTW, will commit ("f2fs: don't need to wait for node writes for atomic write") cause the problem: we will lose data of last transaction after SPO, even if atomic write return no error: - atomic_open(); - write() P1, P2, P3; - atomic_commit(); - writeback data: P1, P2, P3; - writeback node: N1, N2, N3; <--- If N1, N2 is not writebacked, N3 with fsync_mark is writebacked, In SPOR, we won't find N3 since node chain is broken, turns out that losing last transaction. - preflush + fua; - power-cut If we don't wait dnode writeback for atomic_write: SEQ-RD(MB/s) SEQ-WR(MB/s) RND-RD(IOPS) RND-WR(IOPS) Insert(TPS) Update(TPS) Delete(TPS) 1 779.91 206.03 41621.5 40333.16 716.9 1038.21 1034.85 2 848.51 204.35 40082.44 39486.17 791.83 1119.96 1083.77 3 772.12 206.27 41335.25 41599.65 723.29 1055.07 971.92 Avg 800.18 205.55 41013.06333 40472.99333 744.0066667 1071.08 1030.18 Patched/Original: 0.92108464 1.001526693 0.987425886 0.993268102 1.030180511 1.026942031 0.976702294 SQLite's performance recovers. Jaegeuk: "Practically, I don't see db corruption becase of this. We can excuse to lose the last transaction." Finally, we decide to keep original implementation of atomic write interface sematics that we don't wait all dnode writeback before preflush+fua submission. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 8 ++- fs/f2fs/data.c | 2 + fs/f2fs/f2fs.h | 22 ++++++- fs/f2fs/file.c | 5 +- fs/f2fs/node.c | 144 +++++++++++++++++++++++++++++++++++-------- fs/f2fs/super.c | 6 ++ 6 files changed, 156 insertions(+), 31 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 150e68467ca05..f300f98e557a2 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1161,7 +1161,7 @@ static void unblock_operations(struct f2fs_sb_info *sbi) f2fs_unlock_all(sbi); } -static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) +void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) { DEFINE_WAIT(wait); @@ -1397,7 +1397,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); /* wait for previous submitted meta pages writeback */ - wait_on_all_pages_writeback(sbi); + f2fs_wait_on_all_pages_writeback(sbi); /* flush all device cache */ err = f2fs_flush_device_cache(sbi); @@ -1406,7 +1406,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* barrier and flush checkpoint cp pack 2 page if it can */ commit_checkpoint(sbi, ckpt, start_blk); - wait_on_all_pages_writeback(sbi); + f2fs_wait_on_all_pages_writeback(sbi); /* * invalidate intermediate page cache borrowed from meta inode @@ -1418,6 +1418,8 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_release_ino_entry(sbi, false); + f2fs_reset_fsync_node_info(sbi); + clear_sbi_flag(sbi, SBI_IS_DIRTY); clear_sbi_flag(sbi, SBI_NEED_CP); __set_cp_next_pack(sbi); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 84f15bd371599..796f5e5aa6f0c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -177,6 +177,8 @@ static void f2fs_write_end_io(struct bio *bio) page->index != nid_of_node(page)); dec_page_count(sbi, type); + if (f2fs_in_warm_node_list(sbi, page)) + f2fs_del_fsync_node_entry(sbi, page); clear_cold_data(page); end_page_writeback(page); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 86ae56f6762d1..7383cfa573d15 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -229,6 +229,12 @@ struct inode_entry { struct inode *inode; /* vfs inode pointer */ }; +struct fsync_node_entry { + struct list_head list; /* list head */ + struct page *page; /* warm node page pointer */ + unsigned int seq_id; /* sequence id */ +}; + /* for the bitmap indicate blocks to be discarded */ struct discard_entry { struct list_head list; /* list head */ @@ -1157,6 +1163,11 @@ struct f2fs_sb_info { struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ + spinlock_t fsync_node_lock; /* for node entry lock */ + struct list_head fsync_node_list; /* node list head */ + unsigned int fsync_seg_id; /* sequence id */ + unsigned int fsync_node_num; /* number of node entries */ + /* for orphan inode, use 0'th array */ unsigned int max_orphans; /* max orphan inodes */ @@ -2828,6 +2839,10 @@ struct node_info; int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid); bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type); +bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page); +void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi); +void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page); +void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi); int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid); bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid); bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino); @@ -2837,7 +2852,8 @@ pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs); int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode); int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from); int f2fs_truncate_xattr_node(struct inode *inode); -int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino); +int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, + unsigned int seq_id); int f2fs_remove_inode_page(struct inode *inode); struct page *f2fs_new_inode_page(struct inode *inode); struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs); @@ -2846,7 +2862,8 @@ struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid); struct page *f2fs_get_node_page_ra(struct page *parent, int start); void f2fs_move_node_page(struct page *node_page, int gc_type); int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, - struct writeback_control *wbc, bool atomic); + struct writeback_control *wbc, bool atomic, + unsigned int *seq_id); int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, bool do_balance, enum iostat_type io_type); @@ -2963,6 +2980,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi); void f2fs_update_dirty_page(struct inode *inode, struct page *page); void f2fs_remove_dirty_inode(struct inode *inode); int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type); +void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi); int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc); void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi); int __init f2fs_create_checkpoint_caches(void); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 21002cb1df04f..5bac5d9dd7530 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -213,6 +213,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, .nr_to_write = LONG_MAX, .for_reclaim = 0, }; + unsigned int seq_id = 0; if (unlikely(f2fs_readonly(inode->i_sb))) return 0; @@ -275,7 +276,7 @@ go_write: } sync_nodes: atomic_inc(&sbi->wb_sync_req[NODE]); - ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic); + ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id); atomic_dec(&sbi->wb_sync_req[NODE]); if (ret) goto out; @@ -301,7 +302,7 @@ sync_nodes: * given fsync mark. */ if (!atomic) { - ret = f2fs_wait_on_node_pages_writeback(sbi, ino); + ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id); if (ret) goto out; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 782524fd2beee..041e6433d0fec 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -28,6 +28,7 @@ static struct kmem_cache *nat_entry_slab; static struct kmem_cache *free_nid_slab; static struct kmem_cache *nat_entry_set_slab; +static struct kmem_cache *fsync_node_entry_slab; /* * Check whether the given nid is within node id range. @@ -264,6 +265,72 @@ static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, start, nr); } +bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page) +{ + return NODE_MAPPING(sbi) == page->mapping && + IS_DNODE(page) && is_cold_node(page); +} + +void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi) +{ + spin_lock_init(&sbi->fsync_node_lock); + INIT_LIST_HEAD(&sbi->fsync_node_list); + sbi->fsync_seg_id = 0; + sbi->fsync_node_num = 0; +} + +static unsigned int f2fs_add_fsync_node_entry(struct f2fs_sb_info *sbi, + struct page *page) +{ + struct fsync_node_entry *fn; + unsigned long flags; + unsigned int seq_id; + + fn = f2fs_kmem_cache_alloc(fsync_node_entry_slab, GFP_NOFS); + + get_page(page); + fn->page = page; + INIT_LIST_HEAD(&fn->list); + + spin_lock_irqsave(&sbi->fsync_node_lock, flags); + list_add_tail(&fn->list, &sbi->fsync_node_list); + fn->seq_id = sbi->fsync_seg_id++; + seq_id = fn->seq_id; + sbi->fsync_node_num++; + spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); + + return seq_id; +} + +void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page) +{ + struct fsync_node_entry *fn; + unsigned long flags; + + spin_lock_irqsave(&sbi->fsync_node_lock, flags); + list_for_each_entry(fn, &sbi->fsync_node_list, list) { + if (fn->page == page) { + list_del(&fn->list); + sbi->fsync_node_num--; + spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); + kmem_cache_free(fsync_node_entry_slab, fn); + put_page(page); + return; + } + } + spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); + f2fs_bug_on(sbi, 1); +} + +void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi) +{ + unsigned long flags; + + spin_lock_irqsave(&sbi->fsync_node_lock, flags); + sbi->fsync_seg_id = 0; + spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); +} + int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); @@ -1388,7 +1455,7 @@ continue_unlock: static int __write_node_page(struct page *page, bool atomic, bool *submitted, struct writeback_control *wbc, bool do_balance, - enum iostat_type io_type) + enum iostat_type io_type, unsigned int *seq_id) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); nid_t nid; @@ -1405,6 +1472,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, .io_type = io_type, .io_wbc = wbc, }; + unsigned int seq; trace_f2fs_writepage(page, NODE); @@ -1450,6 +1518,13 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, set_page_writeback(page); ClearPageError(page); + + if (f2fs_in_warm_node_list(sbi, page)) { + seq = f2fs_add_fsync_node_entry(sbi, page); + if (seq_id) + *seq_id = seq; + } + fio.old_blkaddr = ni.blk_addr; f2fs_do_write_node_page(nid, &fio); set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page)); @@ -1497,7 +1572,7 @@ void f2fs_move_node_page(struct page *node_page, int gc_type) goto out_page; if (__write_node_page(node_page, false, NULL, - &wbc, false, FS_GC_NODE_IO)) + &wbc, false, FS_GC_NODE_IO, NULL)) unlock_page(node_page); goto release_page; } else { @@ -1514,11 +1589,13 @@ release_page: static int f2fs_write_node_page(struct page *page, struct writeback_control *wbc) { - return __write_node_page(page, false, NULL, wbc, false, FS_NODE_IO); + return __write_node_page(page, false, NULL, wbc, false, + FS_NODE_IO, NULL); } int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, - struct writeback_control *wbc, bool atomic) + struct writeback_control *wbc, bool atomic, + unsigned int *seq_id) { pgoff_t index; pgoff_t last_idx = ULONG_MAX; @@ -1599,7 +1676,7 @@ continue_unlock: ret = __write_node_page(page, atomic && page == last_page, &submitted, wbc, true, - FS_NODE_IO); + FS_NODE_IO, seq_id); if (ret) { unlock_page(page); f2fs_put_page(last_page, 0); @@ -1716,7 +1793,7 @@ continue_unlock: set_dentry_mark(page, 0); ret = __write_node_page(page, false, &submitted, - wbc, do_balance, io_type); + wbc, do_balance, io_type, NULL); if (ret) unlock_page(page); else if (submitted) @@ -1749,35 +1826,46 @@ out: return ret; } -int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) +int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, + unsigned int seq_id) { - pgoff_t index = 0; - struct pagevec pvec; + struct fsync_node_entry *fn; + struct page *page; + struct list_head *head = &sbi->fsync_node_list; + unsigned long flags; + unsigned int cur_seq_id = 0; int ret2, ret = 0; - int nr_pages; - pagevec_init(&pvec, 0); + while (seq_id && cur_seq_id < seq_id) { + spin_lock_irqsave(&sbi->fsync_node_lock, flags); + if (list_empty(head)) { + spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); + break; + } + fn = list_first_entry(head, struct fsync_node_entry, list); + if (fn->seq_id > seq_id) { + spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); + break; + } + cur_seq_id = fn->seq_id; + page = fn->page; + get_page(page); + spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); - while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, - PAGECACHE_TAG_WRITEBACK))) { - int i; + f2fs_wait_on_page_writeback(page, NODE, true); + if (TestClearPageError(page)) + ret = -EIO; - for (i = 0; i < nr_pages; i++) { - struct page *page = pvec.pages[i]; + put_page(page); - if (ino && ino_of_node(page) == ino) { - f2fs_wait_on_page_writeback(page, NODE, true); - if (TestClearPageError(page)) - ret = -EIO; - } - } - pagevec_release(&pvec); - cond_resched(); + if (ret) + break; } ret2 = filemap_check_errors(NODE_MAPPING(sbi)); if (!ret) ret = ret2; + return ret; } @@ -2992,8 +3080,15 @@ int __init f2fs_create_node_manager_caches(void) sizeof(struct nat_entry_set)); if (!nat_entry_set_slab) goto destroy_free_nid; + + fsync_node_entry_slab = f2fs_kmem_cache_create("fsync_node_entry", + sizeof(struct fsync_node_entry)); + if (!fsync_node_entry_slab) + goto destroy_nat_entry_set; return 0; +destroy_nat_entry_set: + kmem_cache_destroy(nat_entry_set_slab); destroy_free_nid: kmem_cache_destroy(free_nid_slab); destroy_nat_entry: @@ -3004,6 +3099,7 @@ fail: void f2fs_destroy_node_manager_caches(void) { + kmem_cache_destroy(fsync_node_entry_slab); kmem_cache_destroy(nat_entry_set_slab); kmem_cache_destroy(free_nid_slab); kmem_cache_destroy(nat_entry_slab); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 939495ba5d04d..3368f8c82699a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1036,6 +1036,10 @@ static void f2fs_put_super(struct super_block *sb) /* our cp_error case, we can wait for any writeback page */ f2fs_flush_merged_writes(sbi); + f2fs_wait_on_all_pages_writeback(sbi); + + f2fs_bug_on(sbi, sbi->fsync_node_num); + iput(sbi->node_inode); iput(sbi->meta_inode); @@ -2911,6 +2915,8 @@ try_onemore: f2fs_init_ino_entry_info(sbi); + f2fs_init_fsync_node_info(sbi); + /* setup f2fs internal modules */ err = f2fs_build_segment_manager(sbi); if (err) { -- GitLab From b94caec4357e19ef667b8c24577ff7aad3f861b5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 2 Aug 2018 22:59:12 +0800 Subject: [PATCH 0071/2269] f2fs: fix invalid memory access syzbot found the following crash on: HEAD commit: d9bd94c0bcaa Add linux-next specific files for 20180801 git tree: linux-next console output: https://syzkaller.appspot.com/x/log.txt?x=1001189c400000 kernel config: https://syzkaller.appspot.com/x/.config?x=cc8964ea4d04518c dashboard link: https://syzkaller.appspot.com/bug?extid=c966a82db0b14aa37e81 compiler: gcc (GCC) 8.0.1 20180413 (experimental) Unfortunately, I don't have any reproducer for this crash yet. IMPORTANT: if you fix the bug, please add the following tag to the commit: Reported-by: syzbot+c966a82db0b14aa37e81@syzkaller.appspotmail.com loop7: rw=12288, want=8200, limit=20 netlink: 65342 bytes leftover after parsing attributes in process `syz-executor4'. openvswitch: netlink: Message has 8 unknown bytes. kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN CPU: 1 PID: 7615 Comm: syz-executor7 Not tainted 4.18.0-rc7-next-20180801+ #29 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__read_once_size include/linux/compiler.h:188 [inline] RIP: 0010:compound_head include/linux/page-flags.h:142 [inline] RIP: 0010:PageLocked include/linux/page-flags.h:272 [inline] RIP: 0010:f2fs_put_page fs/f2fs/f2fs.h:2011 [inline] RIP: 0010:validate_checkpoint+0x66d/0xec0 fs/f2fs/checkpoint.c:835 Code: e8 58 05 7f fe 4c 8d 6b 80 4d 8d 74 24 08 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 c6 04 02 00 4c 89 f2 48 c1 ea 03 <80> 3c 02 00 0f 85 f4 06 00 00 4c 89 ea 4d 8b 7c 24 08 48 b8 00 00 RSP: 0018:ffff8801937cebe8 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: ffff8801937cef30 RCX: ffffc90006035000 RDX: 0000000000000000 RSI: ffffffff82fd9658 RDI: 0000000000000005 RBP: ffff8801937cef58 R08: ffff8801ab254700 R09: fffff94000d9e026 R10: fffff94000d9e026 R11: ffffea0006cf0137 R12: fffffffffffffffb R13: ffff8801937ceeb0 R14: 0000000000000003 R15: ffff880193419b40 FS: 00007f36a61d5700(0000) GS:ffff8801db100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fc04ff93000 CR3: 00000001d0562000 CR4: 00000000001426e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: f2fs_get_valid_checkpoint+0x436/0x1ec0 fs/f2fs/checkpoint.c:860 f2fs_fill_super+0x2d42/0x8110 fs/f2fs/super.c:2883 mount_bdev+0x314/0x3e0 fs/super.c:1344 f2fs_mount+0x3c/0x50 fs/f2fs/super.c:3133 legacy_get_tree+0x131/0x460 fs/fs_context.c:729 vfs_get_tree+0x1cb/0x5c0 fs/super.c:1743 do_new_mount fs/namespace.c:2603 [inline] do_mount+0x6f2/0x1e20 fs/namespace.c:2927 ksys_mount+0x12d/0x140 fs/namespace.c:3143 __do_sys_mount fs/namespace.c:3157 [inline] __se_sys_mount fs/namespace.c:3154 [inline] __x64_sys_mount+0xbe/0x150 fs/namespace.c:3154 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x45943a Code: b8 a6 00 00 00 0f 05 48 3d 01 f0 ff ff 0f 83 bd 8a fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 0f 83 9a 8a fb ff c3 66 0f 1f 84 00 00 00 00 00 RSP: 002b:00007f36a61d4a88 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 RAX: ffffffffffffffda RBX: 00007f36a61d4b30 RCX: 000000000045943a RDX: 00007f36a61d4ad0 RSI: 0000000020000100 RDI: 00007f36a61d4af0 RBP: 0000000020000100 R08: 00007f36a61d4b30 R09: 00007f36a61d4ad0 R10: 0000000000000000 R11: 0000000000000206 R12: 0000000000000013 R13: 0000000000000000 R14: 00000000004c8ea0 R15: 0000000000000000 Modules linked in: Dumping ftrace buffer: (ftrace buffer empty) ---[ end trace bd8550c129352286 ]--- RIP: 0010:__read_once_size include/linux/compiler.h:188 [inline] RIP: 0010:compound_head include/linux/page-flags.h:142 [inline] RIP: 0010:PageLocked include/linux/page-flags.h:272 [inline] RIP: 0010:f2fs_put_page fs/f2fs/f2fs.h:2011 [inline] RIP: 0010:validate_checkpoint+0x66d/0xec0 fs/f2fs/checkpoint.c:835 Code: e8 58 05 7f fe 4c 8d 6b 80 4d 8d 74 24 08 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 c6 04 02 00 4c 89 f2 48 c1 ea 03 <80> 3c 02 00 0f 85 f4 06 00 00 4c 89 ea 4d 8b 7c 24 08 48 b8 00 00 RSP: 0018:ffff8801937cebe8 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: ffff8801937cef30 RCX: ffffc90006035000 RDX: 0000000000000000 RSI: ffffffff82fd9658 RDI: 0000000000000005 netlink: 65342 bytes leftover after parsing attributes in process `syz-executor4'. RBP: ffff8801937cef58 R08: ffff8801ab254700 R09: fffff94000d9e026 openvswitch: netlink: Message has 8 unknown bytes. R10: fffff94000d9e026 R11: ffffea0006cf0137 R12: fffffffffffffffb R13: ffff8801937ceeb0 R14: 0000000000000003 R15: ffff880193419b40 FS: 00007f36a61d5700(0000) GS:ffff8801db100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fc04ff93000 CR3: 00000001d0562000 CR4: 00000000001426e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 In validate_checkpoint(), if we failed to call get_checkpoint_version(), we will pass returned invalid page pointer into f2fs_put_page, cause accessing invalid memory, this patch tries to handle error path correctly to fix this issue. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f300f98e557a2..1b5f884175933 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -780,6 +780,7 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, crc_offset = le32_to_cpu((*cp_block)->checksum_offset); if (crc_offset > (blk_size - sizeof(__le32))) { + f2fs_put_page(*cp_page, 1); f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc_offset: %zu", crc_offset); return -EINVAL; @@ -787,6 +788,7 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, crc = cur_cp_crc(*cp_block); if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) { + f2fs_put_page(*cp_page, 1); f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value"); return -EINVAL; } @@ -806,14 +808,14 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, err = get_checkpoint_version(sbi, cp_addr, &cp_block, &cp_page_1, version); if (err) - goto invalid_cp1; + return NULL; if (le32_to_cpu(cp_block->cp_pack_total_block_count) > sbi->blocks_per_seg) { f2fs_msg(sbi->sb, KERN_WARNING, "invalid cp_pack_total_block_count:%u", le32_to_cpu(cp_block->cp_pack_total_block_count)); - goto invalid_cp1; + goto invalid_cp; } pre_version = *version; @@ -821,7 +823,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, err = get_checkpoint_version(sbi, cp_addr, &cp_block, &cp_page_2, version); if (err) - goto invalid_cp2; + goto invalid_cp; cur_version = *version; if (cur_version == pre_version) { @@ -829,9 +831,8 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, f2fs_put_page(cp_page_2, 1); return cp_page_1; } -invalid_cp2: f2fs_put_page(cp_page_2, 1); -invalid_cp1: +invalid_cp: f2fs_put_page(cp_page_1, 1); return NULL; } -- GitLab From 7b098bebdbaff449631449398e8fb5e975d2087d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 28 Jul 2018 18:37:58 +0800 Subject: [PATCH 0072/2269] f2fs: fix to reset i_gc_failures correctly Let's reset i_gc_failures to zero when we unset pinned state for file. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5bac5d9dd7530..ce589374bbc21 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2810,7 +2810,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) if (!pin) { clear_inode_flag(inode, FI_PIN_FILE); - F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = 1; + f2fs_i_gc_failures_write(inode, 0); goto done; } -- GitLab From 42ff8417ac04a3a292d5ca7f2e09c6b2bf49a5f5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 29 Jun 2018 00:19:25 +0800 Subject: [PATCH 0073/2269] f2fs: fix to do sanity check with inline flags https://bugzilla.kernel.org/show_bug.cgi?id=200221 - Overview BUG() in clear_inode() when mounting and un-mounting a corrupted f2fs image - Reproduce - Kernel message [ 538.601448] F2FS-fs (loop0): Invalid segment/section count (31, 24 x 1376257) [ 538.601458] F2FS-fs (loop0): Can't find valid F2FS filesystem in 2th superblock [ 538.724091] F2FS-fs (loop0): Try to recover 2th superblock, ret: 0 [ 538.724102] F2FS-fs (loop0): Mounted with checkpoint version = 2 [ 540.970834] ------------[ cut here ]------------ [ 540.970838] kernel BUG at fs/inode.c:512! [ 540.971750] invalid opcode: 0000 [#1] SMP KASAN PTI [ 540.972755] CPU: 1 PID: 1305 Comm: umount Not tainted 4.18.0-rc1+ #4 [ 540.974034] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 540.982913] RIP: 0010:clear_inode+0xc0/0xd0 [ 540.983774] Code: 8d a3 30 01 00 00 4c 89 e7 e8 1c ec f8 ff 48 8b 83 30 01 00 00 49 39 c4 75 1a 48 c7 83 a0 00 00 00 60 00 00 00 5b 41 5c 5d c3 <0f> 0b 0f 0b 0f 0b 0f 0b 0f 0b 0f 0b 0f 1f 40 00 66 66 66 66 90 55 [ 540.987570] RSP: 0018:ffff8801e34a7b70 EFLAGS: 00010002 [ 540.988636] RAX: 0000000000000000 RBX: ffff8801e9b744e8 RCX: ffffffffb840eb3a [ 540.990063] RDX: dffffc0000000000 RSI: 0000000000000004 RDI: ffff8801e9b746b8 [ 540.991499] RBP: ffff8801e34a7b80 R08: ffffed003d36e8ce R09: ffffed003d36e8ce [ 540.992923] R10: 0000000000000001 R11: ffffed003d36e8cd R12: ffff8801e9b74668 [ 540.994360] R13: ffff8801e9b74760 R14: ffff8801e9b74528 R15: ffff8801e9b74530 [ 540.995786] FS: 00007f4662bdf840(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000 [ 540.997403] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 540.998571] CR2: 000000000175c568 CR3: 00000001dcfe6000 CR4: 00000000000006e0 [ 541.000015] Call Trace: [ 541.000554] f2fs_evict_inode+0x253/0x630 [ 541.001381] evict+0x16f/0x290 [ 541.002015] iput+0x280/0x300 [ 541.002654] dentry_unlink_inode+0x165/0x1e0 [ 541.003528] __dentry_kill+0x16a/0x260 [ 541.004300] dentry_kill+0x70/0x250 [ 541.005018] dput+0x154/0x1d0 [ 541.005635] do_one_tree+0x34/0x40 [ 541.006354] shrink_dcache_for_umount+0x3f/0xa0 [ 541.007285] generic_shutdown_super+0x43/0x1c0 [ 541.008192] kill_block_super+0x52/0x80 [ 541.008978] kill_f2fs_super+0x62/0x70 [ 541.009750] deactivate_locked_super+0x6f/0xa0 [ 541.010664] deactivate_super+0x5e/0x80 [ 541.011450] cleanup_mnt+0x61/0xa0 [ 541.012151] __cleanup_mnt+0x12/0x20 [ 541.012893] task_work_run+0xc8/0xf0 [ 541.013635] exit_to_usermode_loop+0x125/0x130 [ 541.014555] do_syscall_64+0x138/0x170 [ 541.015340] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 541.016375] RIP: 0033:0x7f46624bf487 [ 541.017104] Code: 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 31 f6 e9 09 00 00 00 66 0f 1f 84 00 00 00 00 00 b8 a6 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e1 c9 2b 00 f7 d8 64 89 01 48 [ 541.020923] RSP: 002b:00007fff5e12e9a8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [ 541.022452] RAX: 0000000000000000 RBX: 0000000001753030 RCX: 00007f46624bf487 [ 541.023885] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 000000000175a1e0 [ 541.025318] RBP: 000000000175a1e0 R08: 0000000000000000 R09: 0000000000000014 [ 541.026755] R10: 00000000000006b2 R11: 0000000000000246 R12: 00007f46629c883c [ 541.028186] R13: 0000000000000000 R14: 0000000001753210 R15: 00007fff5e12ec30 [ 541.029626] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy [ 541.039445] ---[ end trace 4ce02f25ff7d3df5 ]--- [ 541.040392] RIP: 0010:clear_inode+0xc0/0xd0 [ 541.041240] Code: 8d a3 30 01 00 00 4c 89 e7 e8 1c ec f8 ff 48 8b 83 30 01 00 00 49 39 c4 75 1a 48 c7 83 a0 00 00 00 60 00 00 00 5b 41 5c 5d c3 <0f> 0b 0f 0b 0f 0b 0f 0b 0f 0b 0f 0b 0f 1f 40 00 66 66 66 66 90 55 [ 541.045042] RSP: 0018:ffff8801e34a7b70 EFLAGS: 00010002 [ 541.046099] RAX: 0000000000000000 RBX: ffff8801e9b744e8 RCX: ffffffffb840eb3a [ 541.047537] RDX: dffffc0000000000 RSI: 0000000000000004 RDI: ffff8801e9b746b8 [ 541.048965] RBP: ffff8801e34a7b80 R08: ffffed003d36e8ce R09: ffffed003d36e8ce [ 541.050402] R10: 0000000000000001 R11: ffffed003d36e8cd R12: ffff8801e9b74668 [ 541.051832] R13: ffff8801e9b74760 R14: ffff8801e9b74528 R15: ffff8801e9b74530 [ 541.053263] FS: 00007f4662bdf840(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000 [ 541.054891] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 541.056039] CR2: 000000000175c568 CR3: 00000001dcfe6000 CR4: 00000000000006e0 [ 541.058506] ================================================================== [ 541.059991] BUG: KASAN: stack-out-of-bounds in update_stack_state+0x38c/0x3e0 [ 541.061513] Read of size 8 at addr ffff8801e34a7970 by task umount/1305 [ 541.063302] CPU: 1 PID: 1305 Comm: umount Tainted: G D 4.18.0-rc1+ #4 [ 541.064838] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 541.066778] Call Trace: [ 541.067294] dump_stack+0x7b/0xb5 [ 541.067986] print_address_description+0x70/0x290 [ 541.068941] kasan_report+0x291/0x390 [ 541.069692] ? update_stack_state+0x38c/0x3e0 [ 541.070598] __asan_load8+0x54/0x90 [ 541.071315] update_stack_state+0x38c/0x3e0 [ 541.072172] ? __read_once_size_nocheck.constprop.7+0x20/0x20 [ 541.073340] ? vprintk_func+0x27/0x60 [ 541.074096] ? printk+0xa3/0xd3 [ 541.074762] ? __save_stack_trace+0x5e/0x100 [ 541.075634] unwind_next_frame.part.5+0x18e/0x490 [ 541.076594] ? unwind_dump+0x290/0x290 [ 541.077368] ? __show_regs+0x2c4/0x330 [ 541.078142] __unwind_start+0x106/0x190 [ 541.085422] __save_stack_trace+0x5e/0x100 [ 541.086268] ? __save_stack_trace+0x5e/0x100 [ 541.087161] ? unlink_anon_vmas+0xba/0x2c0 [ 541.087997] save_stack_trace+0x1f/0x30 [ 541.088782] save_stack+0x46/0xd0 [ 541.089475] ? __alloc_pages_slowpath+0x1420/0x1420 [ 541.090477] ? flush_tlb_mm_range+0x15e/0x220 [ 541.091364] ? __dec_node_state+0x24/0xb0 [ 541.092180] ? lock_page_memcg+0x85/0xf0 [ 541.092979] ? unlock_page_memcg+0x16/0x80 [ 541.093812] ? page_remove_rmap+0x198/0x520 [ 541.094674] ? mark_page_accessed+0x133/0x200 [ 541.095559] ? _cond_resched+0x1a/0x50 [ 541.096326] ? unmap_page_range+0xcd4/0xe50 [ 541.097179] ? rb_next+0x58/0x80 [ 541.097845] ? rb_next+0x58/0x80 [ 541.098518] __kasan_slab_free+0x13c/0x1a0 [ 541.099352] ? unlink_anon_vmas+0xba/0x2c0 [ 541.100184] kasan_slab_free+0xe/0x10 [ 541.100934] kmem_cache_free+0x89/0x1e0 [ 541.101724] unlink_anon_vmas+0xba/0x2c0 [ 541.102534] free_pgtables+0x101/0x1b0 [ 541.103299] exit_mmap+0x146/0x2a0 [ 541.103996] ? __ia32_sys_munmap+0x50/0x50 [ 541.104829] ? kasan_check_read+0x11/0x20 [ 541.105649] ? mm_update_next_owner+0x322/0x380 [ 541.106578] mmput+0x8b/0x1d0 [ 541.107191] do_exit+0x43a/0x1390 [ 541.107876] ? mm_update_next_owner+0x380/0x380 [ 541.108791] ? deactivate_super+0x5e/0x80 [ 541.109610] ? cleanup_mnt+0x61/0xa0 [ 541.110351] ? __cleanup_mnt+0x12/0x20 [ 541.111115] ? task_work_run+0xc8/0xf0 [ 541.111879] ? exit_to_usermode_loop+0x125/0x130 [ 541.112817] rewind_stack_do_exit+0x17/0x20 [ 541.113666] RIP: 0033:0x7f46624bf487 [ 541.114404] Code: Bad RIP value. [ 541.115094] RSP: 002b:00007fff5e12e9a8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [ 541.116605] RAX: 0000000000000000 RBX: 0000000001753030 RCX: 00007f46624bf487 [ 541.118034] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 000000000175a1e0 [ 541.119472] RBP: 000000000175a1e0 R08: 0000000000000000 R09: 0000000000000014 [ 541.120890] R10: 00000000000006b2 R11: 0000000000000246 R12: 00007f46629c883c [ 541.122321] R13: 0000000000000000 R14: 0000000001753210 R15: 00007fff5e12ec30 [ 541.124061] The buggy address belongs to the page: [ 541.125042] page:ffffea00078d29c0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 [ 541.126651] flags: 0x2ffff0000000000() [ 541.127418] raw: 02ffff0000000000 dead000000000100 dead000000000200 0000000000000000 [ 541.128963] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 [ 541.130516] page dumped because: kasan: bad access detected [ 541.131954] Memory state around the buggy address: [ 541.132924] ffff8801e34a7800: 00 f1 f1 f1 f1 00 f4 f4 f4 f3 f3 f3 f3 00 00 00 [ 541.134378] ffff8801e34a7880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 541.135814] >ffff8801e34a7900: 00 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 [ 541.137253] ^ [ 541.138637] ffff8801e34a7980: f1 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 541.140075] ffff8801e34a7a00: 00 00 00 00 00 00 00 00 f3 00 00 00 00 00 00 00 [ 541.141509] ================================================================== - Location https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/inode.c#L512 BUG_ON(inode->i_data.nrpages); The root cause is root directory inode is corrupted, it has both inline_data and inline_dentry flag, and its nlink is zero, so in ->evict(), after dropping all page cache, it grabs page #0 for inline data truncation, result in panic in later clear_inode() where we will check inode->i_data.nrpages value. This patch adds inline flags check in sanity_check_inode, in addition, do sanity check with root inode's nlink. Reported-by Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 20 ++++++++++++++++++++ fs/f2fs/super.c | 3 ++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 35d49528b2c18..aa343a5cab445 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -265,6 +265,26 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } } + + if (f2fs_has_inline_data(inode) && + (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: inode (ino=%lx, mode=%u) should not have " + "inline_data, run fsck to fix", + __func__, inode->i_ino, inode->i_mode); + return false; + } + + if (f2fs_has_inline_dentry(inode) && !S_ISDIR(inode->i_mode)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: inode (ino=%lx, mode=%u) should not have " + "inline_dentry, run fsck to fix", + __func__, inode->i_ino, inode->i_mode); + return false; + } + return true; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3368f8c82699a..bc817b76db433 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2963,7 +2963,8 @@ try_onemore: err = PTR_ERR(root); goto free_stats; } - if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { + if (!S_ISDIR(root->i_mode) || !root->i_blocks || + !root->i_size || !root->i_nlink) { iput(root); err = -EINVAL; goto free_stats; -- GitLab From fe059de4f9a5c3ed4f89ee77b738a196c9bd6747 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 10 Jul 2018 23:01:45 +0800 Subject: [PATCH 0074/2269] f2fs: fix to do sanity check with block address in main area v2 This patch adds f2fs_is_valid_blkaddr() in below functions to do sanity check with block address to avoid pentential panic: - f2fs_grab_read_bio() - __written_first_block() https://bugzilla.kernel.org/show_bug.cgi?id=200465 - Reproduce - POC (poc.c) #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void activity(char *mpoint) { char *xattr; int err; err = asprintf(&xattr, "%s/foo/bar/xattr", mpoint); char buf2[113]; memset(buf2, 0, sizeof(buf2)); listxattr(xattr, buf2, sizeof(buf2)); } int main(int argc, char *argv[]) { activity(argv[1]); return 0; } - kernel message [ 844.718738] F2FS-fs (loop0): Mounted with checkpoint version = 2 [ 846.430929] F2FS-fs (loop0): access invalid blkaddr:1024 [ 846.431058] WARNING: CPU: 1 PID: 1249 at fs/f2fs/checkpoint.c:154 f2fs_is_valid_blkaddr+0x10f/0x160 [ 846.431059] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd input_leds joydev soundcore serio_raw i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 multipath linear qxl ttm crct10dif_pclmul crc32_pclmul drm_kms_helper ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops pcbc drm 8139too aesni_intel 8139cp floppy psmouse mii aes_x86_64 crypto_simd pata_acpi cryptd glue_helper [ 846.431310] CPU: 1 PID: 1249 Comm: a.out Not tainted 4.18.0-rc3+ #1 [ 846.431312] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 846.431315] RIP: 0010:f2fs_is_valid_blkaddr+0x10f/0x160 [ 846.431316] Code: 00 eb ed 31 c0 83 fa 05 75 ae 48 83 ec 08 48 8b 3f 89 f1 48 c7 c2 fc 0b 0f 8b 48 c7 c6 8b d7 09 8b 88 44 24 07 e8 61 8b ff ff <0f> 0b 0f b6 44 24 07 48 83 c4 08 eb 81 4c 8b 47 10 8b 8f 38 04 00 [ 846.431347] RSP: 0018:ffff961c414a7bc0 EFLAGS: 00010282 [ 846.431349] RAX: 0000000000000000 RBX: ffffc5f787b8ea80 RCX: 0000000000000000 [ 846.431350] RDX: 0000000000000000 RSI: ffff89dfffd165d8 RDI: ffff89dfffd165d8 [ 846.431351] RBP: ffff961c414a7c20 R08: 0000000000000001 R09: 0000000000000248 [ 846.431353] R10: 0000000000000000 R11: 0000000000000248 R12: 0000000000000007 [ 846.431369] R13: ffff89dff5492800 R14: ffff89dfae3aa000 R15: ffff89dff4ff88d0 [ 846.431372] FS: 00007f882e2fb700(0000) GS:ffff89dfffd00000(0000) knlGS:0000000000000000 [ 846.431373] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 846.431374] CR2: 0000000001a88008 CR3: 00000001eb572000 CR4: 00000000000006e0 [ 846.431384] Call Trace: [ 846.431426] f2fs_iget+0x6f4/0xe70 [ 846.431430] ? f2fs_find_entry+0x71/0x90 [ 846.431432] f2fs_lookup+0x1aa/0x390 [ 846.431452] __lookup_slow+0x97/0x150 [ 846.431459] lookup_slow+0x35/0x50 [ 846.431462] walk_component+0x1c6/0x470 [ 846.431479] ? memcg_kmem_charge_memcg+0x70/0x90 [ 846.431488] ? page_add_file_rmap+0x13/0x200 [ 846.431491] path_lookupat+0x76/0x230 [ 846.431501] ? __alloc_pages_nodemask+0xfc/0x280 [ 846.431504] filename_lookup+0xb8/0x1a0 [ 846.431534] ? _cond_resched+0x16/0x40 [ 846.431541] ? kmem_cache_alloc+0x160/0x1d0 [ 846.431549] ? path_listxattr+0x41/0xa0 [ 846.431551] path_listxattr+0x41/0xa0 [ 846.431570] do_syscall_64+0x55/0x100 [ 846.431583] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 846.431607] RIP: 0033:0x7f882de1c0d7 [ 846.431607] Code: f0 ff ff 73 01 c3 48 8b 0d be dd 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 c2 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 dd 2b 00 f7 d8 64 89 01 48 [ 846.431639] RSP: 002b:00007ffe8e66c238 EFLAGS: 00000202 ORIG_RAX: 00000000000000c2 [ 846.431641] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f882de1c0d7 [ 846.431642] RDX: 0000000000000071 RSI: 00007ffe8e66c280 RDI: 0000000001a880c0 [ 846.431643] RBP: 00007ffe8e66c300 R08: 0000000001a88010 R09: 0000000000000000 [ 846.431645] R10: 00000000000001ab R11: 0000000000000202 R12: 0000000000400550 [ 846.431646] R13: 00007ffe8e66c400 R14: 0000000000000000 R15: 0000000000000000 [ 846.431648] ---[ end trace abca54df39d14f5c ]--- [ 846.431651] F2FS-fs (loop0): invalid blkaddr: 1024, type: 5, run fsck to fix. [ 846.431762] WARNING: CPU: 1 PID: 1249 at fs/f2fs/f2fs.h:2697 f2fs_iget+0xd17/0xe70 [ 846.431763] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd input_leds joydev soundcore serio_raw i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 multipath linear qxl ttm crct10dif_pclmul crc32_pclmul drm_kms_helper ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops pcbc drm 8139too aesni_intel 8139cp floppy psmouse mii aes_x86_64 crypto_simd pata_acpi cryptd glue_helper [ 846.431797] CPU: 1 PID: 1249 Comm: a.out Tainted: G W 4.18.0-rc3+ #1 [ 846.431798] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 846.431800] RIP: 0010:f2fs_iget+0xd17/0xe70 [ 846.431801] Code: ff ff 48 63 d8 e9 e1 f6 ff ff 48 8b 45 c8 41 b8 05 00 00 00 48 c7 c2 d8 e8 0e 8b 48 c7 c6 1d b0 0a 8b 48 8b 38 e8 f9 b4 00 00 <0f> 0b 48 8b 45 c8 f0 80 48 48 04 e9 d8 f9 ff ff 0f 0b 48 8b 43 18 [ 846.431832] RSP: 0018:ffff961c414a7bd0 EFLAGS: 00010282 [ 846.431834] RAX: 0000000000000000 RBX: ffffc5f787b8ea80 RCX: 0000000000000006 [ 846.431835] RDX: 0000000000000000 RSI: 0000000000000096 RDI: ffff89dfffd165d0 [ 846.431836] RBP: ffff961c414a7c20 R08: 0000000000000000 R09: 0000000000000273 [ 846.431837] R10: 0000000000000000 R11: ffff89dfad50ca60 R12: 0000000000000007 [ 846.431838] R13: ffff89dff5492800 R14: ffff89dfae3aa000 R15: ffff89dff4ff88d0 [ 846.431840] FS: 00007f882e2fb700(0000) GS:ffff89dfffd00000(0000) knlGS:0000000000000000 [ 846.431841] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 846.431842] CR2: 0000000001a88008 CR3: 00000001eb572000 CR4: 00000000000006e0 [ 846.431846] Call Trace: [ 846.431850] ? f2fs_find_entry+0x71/0x90 [ 846.431853] f2fs_lookup+0x1aa/0x390 [ 846.431856] __lookup_slow+0x97/0x150 [ 846.431858] lookup_slow+0x35/0x50 [ 846.431874] walk_component+0x1c6/0x470 [ 846.431878] ? memcg_kmem_charge_memcg+0x70/0x90 [ 846.431880] ? page_add_file_rmap+0x13/0x200 [ 846.431882] path_lookupat+0x76/0x230 [ 846.431884] ? __alloc_pages_nodemask+0xfc/0x280 [ 846.431886] filename_lookup+0xb8/0x1a0 [ 846.431890] ? _cond_resched+0x16/0x40 [ 846.431891] ? kmem_cache_alloc+0x160/0x1d0 [ 846.431894] ? path_listxattr+0x41/0xa0 [ 846.431896] path_listxattr+0x41/0xa0 [ 846.431898] do_syscall_64+0x55/0x100 [ 846.431901] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 846.431902] RIP: 0033:0x7f882de1c0d7 [ 846.431903] Code: f0 ff ff 73 01 c3 48 8b 0d be dd 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 c2 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 dd 2b 00 f7 d8 64 89 01 48 [ 846.431934] RSP: 002b:00007ffe8e66c238 EFLAGS: 00000202 ORIG_RAX: 00000000000000c2 [ 846.431936] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f882de1c0d7 [ 846.431937] RDX: 0000000000000071 RSI: 00007ffe8e66c280 RDI: 0000000001a880c0 [ 846.431939] RBP: 00007ffe8e66c300 R08: 0000000001a88010 R09: 0000000000000000 [ 846.431940] R10: 00000000000001ab R11: 0000000000000202 R12: 0000000000400550 [ 846.431941] R13: 00007ffe8e66c400 R14: 0000000000000000 R15: 0000000000000000 [ 846.431943] ---[ end trace abca54df39d14f5d ]--- [ 846.432033] F2FS-fs (loop0): access invalid blkaddr:1024 [ 846.432051] WARNING: CPU: 1 PID: 1249 at fs/f2fs/checkpoint.c:154 f2fs_is_valid_blkaddr+0x10f/0x160 [ 846.432051] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd input_leds joydev soundcore serio_raw i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 multipath linear qxl ttm crct10dif_pclmul crc32_pclmul drm_kms_helper ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops pcbc drm 8139too aesni_intel 8139cp floppy psmouse mii aes_x86_64 crypto_simd pata_acpi cryptd glue_helper [ 846.432085] CPU: 1 PID: 1249 Comm: a.out Tainted: G W 4.18.0-rc3+ #1 [ 846.432086] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 846.432089] RIP: 0010:f2fs_is_valid_blkaddr+0x10f/0x160 [ 846.432089] Code: 00 eb ed 31 c0 83 fa 05 75 ae 48 83 ec 08 48 8b 3f 89 f1 48 c7 c2 fc 0b 0f 8b 48 c7 c6 8b d7 09 8b 88 44 24 07 e8 61 8b ff ff <0f> 0b 0f b6 44 24 07 48 83 c4 08 eb 81 4c 8b 47 10 8b 8f 38 04 00 [ 846.432120] RSP: 0018:ffff961c414a7900 EFLAGS: 00010286 [ 846.432122] RAX: 0000000000000000 RBX: 0000000000000400 RCX: 0000000000000006 [ 846.432123] RDX: 0000000000000000 RSI: 0000000000000096 RDI: ffff89dfffd165d0 [ 846.432124] RBP: ffff89dff5492800 R08: 0000000000000001 R09: 000000000000029d [ 846.432125] R10: ffff961c414a7820 R11: 000000000000029d R12: 0000000000000400 [ 846.432126] R13: 0000000000000000 R14: ffff89dff4ff88d0 R15: 0000000000000000 [ 846.432128] FS: 00007f882e2fb700(0000) GS:ffff89dfffd00000(0000) knlGS:0000000000000000 [ 846.432130] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 846.432131] CR2: 0000000001a88008 CR3: 00000001eb572000 CR4: 00000000000006e0 [ 846.432135] Call Trace: [ 846.432151] f2fs_wait_on_block_writeback+0x20/0x110 [ 846.432158] f2fs_grab_read_bio+0xbc/0xe0 [ 846.432161] f2fs_submit_page_read+0x21/0x280 [ 846.432163] f2fs_get_read_data_page+0xb7/0x3c0 [ 846.432165] f2fs_get_lock_data_page+0x29/0x1e0 [ 846.432167] f2fs_get_new_data_page+0x148/0x550 [ 846.432170] f2fs_add_regular_entry+0x1d2/0x550 [ 846.432178] ? __switch_to+0x12f/0x460 [ 846.432181] f2fs_add_dentry+0x6a/0xd0 [ 846.432184] f2fs_do_add_link+0xe9/0x140 [ 846.432186] __recover_dot_dentries+0x260/0x280 [ 846.432189] f2fs_lookup+0x343/0x390 [ 846.432193] __lookup_slow+0x97/0x150 [ 846.432195] lookup_slow+0x35/0x50 [ 846.432208] walk_component+0x1c6/0x470 [ 846.432212] ? memcg_kmem_charge_memcg+0x70/0x90 [ 846.432215] ? page_add_file_rmap+0x13/0x200 [ 846.432217] path_lookupat+0x76/0x230 [ 846.432219] ? __alloc_pages_nodemask+0xfc/0x280 [ 846.432221] filename_lookup+0xb8/0x1a0 [ 846.432224] ? _cond_resched+0x16/0x40 [ 846.432226] ? kmem_cache_alloc+0x160/0x1d0 [ 846.432228] ? path_listxattr+0x41/0xa0 [ 846.432230] path_listxattr+0x41/0xa0 [ 846.432233] do_syscall_64+0x55/0x100 [ 846.432235] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 846.432237] RIP: 0033:0x7f882de1c0d7 [ 846.432237] Code: f0 ff ff 73 01 c3 48 8b 0d be dd 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 c2 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 dd 2b 00 f7 d8 64 89 01 48 [ 846.432269] RSP: 002b:00007ffe8e66c238 EFLAGS: 00000202 ORIG_RAX: 00000000000000c2 [ 846.432271] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f882de1c0d7 [ 846.432272] RDX: 0000000000000071 RSI: 00007ffe8e66c280 RDI: 0000000001a880c0 [ 846.432273] RBP: 00007ffe8e66c300 R08: 0000000001a88010 R09: 0000000000000000 [ 846.432274] R10: 00000000000001ab R11: 0000000000000202 R12: 0000000000400550 [ 846.432275] R13: 00007ffe8e66c400 R14: 0000000000000000 R15: 0000000000000000 [ 846.432277] ---[ end trace abca54df39d14f5e ]--- [ 846.432279] F2FS-fs (loop0): invalid blkaddr: 1024, type: 5, run fsck to fix. [ 846.432376] WARNING: CPU: 1 PID: 1249 at fs/f2fs/f2fs.h:2697 f2fs_wait_on_block_writeback+0xb1/0x110 [ 846.432376] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd input_leds joydev soundcore serio_raw i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 multipath linear qxl ttm crct10dif_pclmul crc32_pclmul drm_kms_helper ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops pcbc drm 8139too aesni_intel 8139cp floppy psmouse mii aes_x86_64 crypto_simd pata_acpi cryptd glue_helper [ 846.432410] CPU: 1 PID: 1249 Comm: a.out Tainted: G W 4.18.0-rc3+ #1 [ 846.432411] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 846.432413] RIP: 0010:f2fs_wait_on_block_writeback+0xb1/0x110 [ 846.432414] Code: 66 90 f0 ff 4b 34 74 59 5b 5d c3 48 8b 7d 00 41 b8 05 00 00 00 89 d9 48 c7 c2 d8 e8 0e 8b 48 c7 c6 1d b0 0a 8b e8 df bc fd ff <0f> 0b f0 80 4d 48 04 e9 67 ff ff ff 48 8b 03 48 c1 e8 37 83 e0 07 [ 846.432445] RSP: 0018:ffff961c414a7910 EFLAGS: 00010286 [ 846.432447] RAX: 0000000000000000 RBX: 0000000000000400 RCX: 0000000000000006 [ 846.432448] RDX: 0000000000000000 RSI: 0000000000000092 RDI: ffff89dfffd165d0 [ 846.432449] RBP: ffff89dff5492800 R08: 0000000000000000 R09: 00000000000002d1 [ 846.432450] R10: ffff961c414a7820 R11: ffff89dfad50cf80 R12: 0000000000000400 [ 846.432451] R13: 0000000000000000 R14: ffff89dff4ff88d0 R15: 0000000000000000 [ 846.432453] FS: 00007f882e2fb700(0000) GS:ffff89dfffd00000(0000) knlGS:0000000000000000 [ 846.432454] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 846.432455] CR2: 0000000001a88008 CR3: 00000001eb572000 CR4: 00000000000006e0 [ 846.432459] Call Trace: [ 846.432463] f2fs_grab_read_bio+0xbc/0xe0 [ 846.432464] f2fs_submit_page_read+0x21/0x280 [ 846.432466] f2fs_get_read_data_page+0xb7/0x3c0 [ 846.432468] f2fs_get_lock_data_page+0x29/0x1e0 [ 846.432470] f2fs_get_new_data_page+0x148/0x550 [ 846.432473] f2fs_add_regular_entry+0x1d2/0x550 [ 846.432475] ? __switch_to+0x12f/0x460 [ 846.432477] f2fs_add_dentry+0x6a/0xd0 [ 846.432480] f2fs_do_add_link+0xe9/0x140 [ 846.432483] __recover_dot_dentries+0x260/0x280 [ 846.432485] f2fs_lookup+0x343/0x390 [ 846.432488] __lookup_slow+0x97/0x150 [ 846.432490] lookup_slow+0x35/0x50 [ 846.432505] walk_component+0x1c6/0x470 [ 846.432509] ? memcg_kmem_charge_memcg+0x70/0x90 [ 846.432511] ? page_add_file_rmap+0x13/0x200 [ 846.432513] path_lookupat+0x76/0x230 [ 846.432515] ? __alloc_pages_nodemask+0xfc/0x280 [ 846.432517] filename_lookup+0xb8/0x1a0 [ 846.432520] ? _cond_resched+0x16/0x40 [ 846.432522] ? kmem_cache_alloc+0x160/0x1d0 [ 846.432525] ? path_listxattr+0x41/0xa0 [ 846.432526] path_listxattr+0x41/0xa0 [ 846.432529] do_syscall_64+0x55/0x100 [ 846.432531] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 846.432533] RIP: 0033:0x7f882de1c0d7 [ 846.432533] Code: f0 ff ff 73 01 c3 48 8b 0d be dd 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 c2 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 dd 2b 00 f7 d8 64 89 01 48 [ 846.432565] RSP: 002b:00007ffe8e66c238 EFLAGS: 00000202 ORIG_RAX: 00000000000000c2 [ 846.432567] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f882de1c0d7 [ 846.432568] RDX: 0000000000000071 RSI: 00007ffe8e66c280 RDI: 0000000001a880c0 [ 846.432569] RBP: 00007ffe8e66c300 R08: 0000000001a88010 R09: 0000000000000000 [ 846.432570] R10: 00000000000001ab R11: 0000000000000202 R12: 0000000000400550 [ 846.432571] R13: 00007ffe8e66c400 R14: 0000000000000000 R15: 0000000000000000 [ 846.432573] ---[ end trace abca54df39d14f5f ]--- [ 846.434280] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 [ 846.434424] PGD 80000001ebd3a067 P4D 80000001ebd3a067 PUD 1eb1ae067 PMD 0 [ 846.434551] Oops: 0000 [#1] SMP PTI [ 846.434697] CPU: 0 PID: 44 Comm: kworker/u5:0 Tainted: G W 4.18.0-rc3+ #1 [ 846.434805] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 846.435000] Workqueue: fscrypt_read_queue decrypt_work [ 846.435174] RIP: 0010:fscrypt_do_page_crypto+0x6e/0x2d0 [ 846.435351] Code: 00 65 48 8b 04 25 28 00 00 00 48 89 84 24 88 00 00 00 31 c0 e8 43 c2 e0 ff 49 8b 86 48 02 00 00 85 ed c7 44 24 70 00 00 00 00 <48> 8b 58 08 0f 84 14 02 00 00 48 8b 78 10 48 8b 0c 24 48 c7 84 24 [ 846.435696] RSP: 0018:ffff961c40f9bd60 EFLAGS: 00010206 [ 846.435870] RAX: 0000000000000000 RBX: ffffc5f787719b80 RCX: ffffc5f787719b80 [ 846.436051] RDX: ffffffff8b9f4b88 RSI: ffffffff8b0ae622 RDI: ffff961c40f9bdb8 [ 846.436261] RBP: 0000000000001000 R08: ffffc5f787719b80 R09: 0000000000001000 [ 846.436433] R10: 0000000000000018 R11: fefefefefefefeff R12: ffffc5f787719b80 [ 846.436562] R13: ffffc5f787719b80 R14: ffff89dff4ff88d0 R15: 0ffff89dfaddee60 [ 846.436658] FS: 0000000000000000(0000) GS:ffff89dfffc00000(0000) knlGS:0000000000000000 [ 846.436758] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 846.436898] CR2: 0000000000000008 CR3: 00000001eddd0000 CR4: 00000000000006f0 [ 846.437001] Call Trace: [ 846.437181] ? check_preempt_wakeup+0xf2/0x230 [ 846.437276] ? check_preempt_curr+0x7c/0x90 [ 846.437370] fscrypt_decrypt_page+0x48/0x4d [ 846.437466] __fscrypt_decrypt_bio+0x5b/0x90 [ 846.437542] decrypt_work+0x12/0x20 [ 846.437651] process_one_work+0x15e/0x3d0 [ 846.437740] worker_thread+0x4c/0x440 [ 846.437848] kthread+0xf8/0x130 [ 846.437938] ? rescuer_thread+0x350/0x350 [ 846.438022] ? kthread_associate_blkcg+0x90/0x90 [ 846.438117] ret_from_fork+0x35/0x40 [ 846.438201] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd input_leds joydev soundcore serio_raw i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 multipath linear qxl ttm crct10dif_pclmul crc32_pclmul drm_kms_helper ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops pcbc drm 8139too aesni_intel 8139cp floppy psmouse mii aes_x86_64 crypto_simd pata_acpi cryptd glue_helper [ 846.438653] CR2: 0000000000000008 [ 846.438713] ---[ end trace abca54df39d14f60 ]--- [ 846.438796] RIP: 0010:fscrypt_do_page_crypto+0x6e/0x2d0 [ 846.438844] Code: 00 65 48 8b 04 25 28 00 00 00 48 89 84 24 88 00 00 00 31 c0 e8 43 c2 e0 ff 49 8b 86 48 02 00 00 85 ed c7 44 24 70 00 00 00 00 <48> 8b 58 08 0f 84 14 02 00 00 48 8b 78 10 48 8b 0c 24 48 c7 84 24 [ 846.439084] RSP: 0018:ffff961c40f9bd60 EFLAGS: 00010206 [ 846.439176] RAX: 0000000000000000 RBX: ffffc5f787719b80 RCX: ffffc5f787719b80 [ 846.440927] RDX: ffffffff8b9f4b88 RSI: ffffffff8b0ae622 RDI: ffff961c40f9bdb8 [ 846.442083] RBP: 0000000000001000 R08: ffffc5f787719b80 R09: 0000000000001000 [ 846.443284] R10: 0000000000000018 R11: fefefefefefefeff R12: ffffc5f787719b80 [ 846.444448] R13: ffffc5f787719b80 R14: ffff89dff4ff88d0 R15: 0ffff89dfaddee60 [ 846.445558] FS: 0000000000000000(0000) GS:ffff89dfffc00000(0000) knlGS:0000000000000000 [ 846.446687] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 846.447796] CR2: 0000000000000008 CR3: 00000001eddd0000 CR4: 00000000000006f0 - Location https://elixir.bootlin.com/linux/v4.18-rc4/source/fs/crypto/crypto.c#L149 struct crypto_skcipher *tfm = ci->ci_ctfm; Here ci can be NULL Note that this issue maybe require CONFIG_F2FS_FS_ENCRYPTION=y to reproduce. Reported-by Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 +++ fs/f2fs/inode.c | 18 +++++++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 796f5e5aa6f0c..22dd6b9e8484d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -546,6 +546,9 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, struct bio_post_read_ctx *ctx; unsigned int post_read_steps = 0; + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) + return ERR_PTR(-EFAULT); + bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false); if (!bio) return ERR_PTR(-ENOMEM); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index aa343a5cab445..fc2c98b9e255b 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -68,14 +68,16 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) } } -static bool __written_first_block(struct f2fs_sb_info *sbi, +static int __written_first_block(struct f2fs_sb_info *sbi, struct f2fs_inode *ri) { block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]); - if (is_valid_data_blkaddr(sbi, addr)) - return true; - return false; + if (!__is_valid_data_blkaddr(addr)) + return 1; + if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC)) + return -EFAULT; + return 0; } static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) @@ -295,6 +297,7 @@ static int do_read_inode(struct inode *inode) struct page *node_page; struct f2fs_inode *ri; projid_t i_projid; + int err; /* Check if ino is within scope */ if (f2fs_check_nid_range(sbi, inode->i_ino)) @@ -368,7 +371,12 @@ static int do_read_inode(struct inode *inode) /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); - if (__written_first_block(sbi, ri)) + err = __written_first_block(sbi, ri); + if (err < 0) { + f2fs_put_page(node_page, 1); + return err; + } + if (!err) set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); if (!f2fs_need_inode_block_update(sbi, inode->i_ino)) -- GitLab From 4b93c602278fd58fcc4d74355d378debccec98c9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 5 Aug 2018 23:02:22 +0800 Subject: [PATCH 0075/2269] f2fs: avoid race between zero_range and background GC Thread A Background GC - f2fs_zero_range - truncate_pagecache_range - gc_data_segment - get_read_data_page - move_data_page - set_page_dirty - set_cold_data - f2fs_do_zero_range - dn->data_blkaddr = NEW_ADDR; - f2fs_set_data_blkaddr Actually, we don't need to set dirty & checked flag on the page, since all valid data in the page should be zeroed by zero_range(). Use i_gc_rwsem[WRITE] to avoid such race condition. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ce589374bbc21..6feabc9197b81 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1314,8 +1314,6 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, if (ret) goto out_sem; - truncate_pagecache_range(inode, offset, offset + len - 1); - pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; @@ -1345,12 +1343,19 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, unsigned int end_offset; pgoff_t end; + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + + truncate_pagecache_range(inode, + (loff_t)index << PAGE_SHIFT, + ((loff_t)pg_end << PAGE_SHIFT) - 1); + f2fs_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE); if (ret) { f2fs_unlock_op(sbi); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); goto out; } @@ -1359,7 +1364,9 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, ret = f2fs_do_zero_range(&dn, index, end); f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); f2fs_balance_fs(sbi, dn.node_changed); -- GitLab From e2827ff5b8cab0f0aa9c6342114e19159d2de2ab Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 5 Aug 2018 23:04:25 +0800 Subject: [PATCH 0076/2269] f2fs: fix avoid race between truncate and background GC Thread A Background GC - f2fs_setattr isize to 0 - truncate_setsize - gc_data_segment - f2fs_get_read_data_page page #0 - set_page_dirty - set_cold_data - f2fs_truncate - f2fs_setattr isize to 4k - read 4k <--- hit data in cached page #0 Above race condition can cause read out invalid data in a truncated page, fix it by i_gc_rwsem[WRITE] lock. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++++ fs/f2fs/file.c | 37 +++++++++++++++++++++++-------------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 22dd6b9e8484d..87ffc7a882171 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2199,8 +2199,12 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to) if (to > i_size) { down_write(&F2FS_I(inode)->i_mmap_sem); + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + truncate_pagecache(inode, i_size); f2fs_truncate_blocks(inode, i_size, true); + + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); up_write(&F2FS_I(inode)->i_mmap_sem); } } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6feabc9197b81..c4197ade9c7c6 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -796,22 +796,26 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) } if (attr->ia_valid & ATTR_SIZE) { - if (attr->ia_size <= i_size_read(inode)) { - down_write(&F2FS_I(inode)->i_mmap_sem); - truncate_setsize(inode, attr->ia_size); + bool to_smaller = (attr->ia_size <= i_size_read(inode)); + + down_write(&F2FS_I(inode)->i_mmap_sem); + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + + truncate_setsize(inode, attr->ia_size); + + if (to_smaller) err = f2fs_truncate(inode); - up_write(&F2FS_I(inode)->i_mmap_sem); - if (err) - return err; - } else { - /* - * do not trim all blocks after i_size if target size is - * larger than i_size. - */ - down_write(&F2FS_I(inode)->i_mmap_sem); - truncate_setsize(inode, attr->ia_size); - up_write(&F2FS_I(inode)->i_mmap_sem); + /* + * do not trim all blocks after i_size if target size is + * larger than i_size. + */ + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + up_write(&F2FS_I(inode)->i_mmap_sem); + if (err) + return err; + + if (!to_smaller) { /* should convert inline inode here */ if (!f2fs_may_inline_data(inode)) { err = f2fs_convert_inline_inode(inode); @@ -958,13 +962,18 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) blk_start = (loff_t)pg_start << PAGE_SHIFT; blk_end = (loff_t)pg_end << PAGE_SHIFT; + down_write(&F2FS_I(inode)->i_mmap_sem); + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + truncate_inode_pages_range(mapping, blk_start, blk_end - 1); f2fs_lock_op(sbi); ret = f2fs_truncate_hole(inode, pg_start, pg_end); f2fs_unlock_op(sbi); + + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); up_write(&F2FS_I(inode)->i_mmap_sem); } } -- GitLab From e3f12d2a3b02ed0fc875007b1a8c5d68201868ff Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 5 Aug 2018 23:08:59 +0800 Subject: [PATCH 0077/2269] f2fs: refresh recent accessed nat entry in lru list Introduce nat_list_lock to protect nm_i->nat_entries list, and manage it as a LRU list, refresh location for therein recent accessed entries in the list. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/node.c | 46 ++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7383cfa573d15..0dea0e594a523 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -781,6 +781,7 @@ struct f2fs_nm_info { struct radix_tree_root nat_set_root;/* root of the nat set cache */ struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ struct list_head nat_entries; /* cached nat entry list (clean) */ + spinlock_t nat_list_lock; /* protect clean nat entry list */ unsigned int nat_cnt; /* the # of cached nat entries */ unsigned int dirty_nat_cnt; /* total num of nat entries in set */ unsigned int nat_blocks; /* # of nat blocks */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 041e6433d0fec..7d920bce89e86 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -174,14 +174,30 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i, if (raw_ne) node_info_from_raw_nat(&ne->ni, raw_ne); + + spin_lock(&nm_i->nat_list_lock); list_add_tail(&ne->list, &nm_i->nat_entries); + spin_unlock(&nm_i->nat_list_lock); + nm_i->nat_cnt++; return ne; } static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) { - return radix_tree_lookup(&nm_i->nat_root, n); + struct nat_entry *ne; + + ne = radix_tree_lookup(&nm_i->nat_root, n); + + /* for recent accessed nat entry, move it to tail of lru list */ + if (ne && !get_nat_flag(ne, IS_DIRTY)) { + spin_lock(&nm_i->nat_list_lock); + if (!list_empty(&ne->list)) + list_move_tail(&ne->list, &nm_i->nat_entries); + spin_unlock(&nm_i->nat_list_lock); + } + + return ne; } static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i, @@ -192,7 +208,6 @@ static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i, static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) { - list_del(&e->list); radix_tree_delete(&nm_i->nat_root, nat_get_nid(e)); nm_i->nat_cnt--; __free_nat_entry(e); @@ -243,16 +258,21 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, nm_i->dirty_nat_cnt++; set_nat_flag(ne, IS_DIRTY, true); refresh_list: + spin_lock(&nm_i->nat_list_lock); if (new_ne) list_del_init(&ne->list); else list_move_tail(&ne->list, &head->entry_list); + spin_unlock(&nm_i->nat_list_lock); } static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, struct nat_entry_set *set, struct nat_entry *ne) { + spin_lock(&nm_i->nat_list_lock); list_move_tail(&ne->list, &nm_i->nat_entries); + spin_unlock(&nm_i->nat_list_lock); + set_nat_flag(ne, IS_DIRTY, false); set->entry_cnt--; nm_i->dirty_nat_cnt--; @@ -469,13 +489,25 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) if (!down_write_trylock(&nm_i->nat_tree_lock)) return 0; - while (nr_shrink && !list_empty(&nm_i->nat_entries)) { + spin_lock(&nm_i->nat_list_lock); + while (nr_shrink) { struct nat_entry *ne; + + if (list_empty(&nm_i->nat_entries)) + break; + ne = list_first_entry(&nm_i->nat_entries, struct nat_entry, list); + list_del(&ne->list); + spin_unlock(&nm_i->nat_list_lock); + __del_from_nat_cache(nm_i, ne); nr_shrink--; + + spin_lock(&nm_i->nat_list_lock); } + spin_unlock(&nm_i->nat_list_lock); + up_write(&nm_i->nat_tree_lock); return nr - nr_shrink; } @@ -2906,6 +2938,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO); INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO); INIT_LIST_HEAD(&nm_i->nat_entries); + spin_lock_init(&nm_i->nat_list_lock); mutex_init(&nm_i->build_lock); spin_lock_init(&nm_i->nid_list_lock); @@ -3024,8 +3057,13 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) unsigned idx; nid = nat_get_nid(natvec[found - 1]) + 1; - for (idx = 0; idx < found; idx++) + for (idx = 0; idx < found; idx++) { + spin_lock(&nm_i->nat_list_lock); + list_del(&natvec[idx]->list); + spin_unlock(&nm_i->nat_list_lock); + __del_from_nat_cache(nm_i, natvec[idx]); + } } f2fs_bug_on(sbi, nm_i->nat_cnt); -- GitLab From 85a3281d2ac7fd659cd8a726a9a750f655b5d3c2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 5 Aug 2018 23:09:00 +0800 Subject: [PATCH 0078/2269] f2fs: fix incorrect range->len in f2fs_trim_fs() generic/260 reported below error: [+] Default length with start set (should succeed) [+] Length beyond the end of fs (should succeed) [+] Length beyond the end of fs with start set (should succeed) +./tests/generic/260: line 94: [: 18446744073709551615: integer expression expected +./tests/generic/260: line 104: [: 18446744073709551615: integer expression expected Test done ... In f2fs_trim_fs(), if there is no discard being trimmed, we need to correct range->len before return. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3662e1f429b4f..eada91dae08ac 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2582,8 +2582,9 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) trimmed += __wait_discard_cmd_range(sbi, &dpolicy, start_block, end_block); - range->len = F2FS_BLK_TO_BYTES(trimmed); out: + if (!err) + range->len = F2FS_BLK_TO_BYTES(trimmed); return err; } -- GitLab From d89f355377080a2ec464f264eecf8ca5bb567f89 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Sun, 5 Aug 2018 12:45:35 +0800 Subject: [PATCH 0079/2269] f2fs: wake up gc thread immediately when gc_urgent is set Fixes: 5b0e95398e2b ("f2fs: introduce sbi->gc_mode to determine the policy") Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 79e47e7d737c8..d4c196552888c 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -253,6 +253,7 @@ out: if (t >= 1) { sbi->gc_mode = GC_URGENT; if (sbi->gc_thread) { + sbi->gc_thread->gc_wake = 1; wake_up_interruptible_all( &sbi->gc_thread->gc_wait_queue_head); wake_up_discard_thread(sbi, true); -- GitLab From 4ac6304174f396ab83474fd7eb1f1a6772ba02bf Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 6 Aug 2018 22:43:50 +0800 Subject: [PATCH 0080/2269] f2fs: split discard command in prior to block layer Some devices has small max_{hw,}discard_sectors, so that in __blkdev_issue_discard(), one big size discard bio can be split into multiple small size discard bios, result in heavy load in IO scheduler and device, which can hang other sync IO for long time. Now, f2fs is trying to control discard commands more elaboratively, in order to make less conflict in between discard IO and user IO to enhance application's performance, so in this patch, we will split discard bio in f2fs in prior to in block layer to reduce issuing multiple discard bios in a short time. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 23 +++---- fs/f2fs/segment.c | 148 +++++++++++++++++++++++++++++++++++----------- 2 files changed, 127 insertions(+), 44 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0dea0e594a523..02885b6f83dc7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -179,7 +179,6 @@ enum { #define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) #define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */ -#define DEF_MAX_DISCARD_LEN 512 /* Max. 2MB per discard */ #define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */ #define DEF_MID_DISCARD_ISSUE_TIME 500 /* 500 ms, if device busy */ #define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */ @@ -251,9 +250,10 @@ struct discard_entry { (MAX_PLIST_NUM - 1) : (blk_num - 1)) enum { - D_PREP, - D_SUBMIT, - D_DONE, + D_PREP, /* initial */ + D_PARTIAL, /* partially submitted */ + D_SUBMIT, /* all submitted */ + D_DONE, /* finished */ }; struct discard_info { @@ -278,7 +278,10 @@ struct discard_cmd { struct block_device *bdev; /* bdev */ unsigned short ref; /* reference count */ unsigned char state; /* state */ + unsigned char issuing; /* issuing discard */ int error; /* bio error */ + spinlock_t lock; /* for state/bio_ref updating */ + unsigned short bio_ref; /* bio reference count */ }; enum { @@ -711,22 +714,22 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs, } static inline bool __is_discard_mergeable(struct discard_info *back, - struct discard_info *front) + struct discard_info *front, unsigned int max_len) { return (back->lstart + back->len == front->lstart) && - (back->len + front->len < DEF_MAX_DISCARD_LEN); + (back->len + front->len <= max_len); } static inline bool __is_discard_back_mergeable(struct discard_info *cur, - struct discard_info *back) + struct discard_info *back, unsigned int max_len) { - return __is_discard_mergeable(back, cur); + return __is_discard_mergeable(back, cur, max_len); } static inline bool __is_discard_front_mergeable(struct discard_info *cur, - struct discard_info *front) + struct discard_info *front, unsigned int max_len) { - return __is_discard_mergeable(cur, front); + return __is_discard_mergeable(cur, front, max_len); } static inline bool __is_extent_mergeable(struct extent_info *back, diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index eada91dae08ac..540d7d6161ba8 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -839,9 +839,12 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, dc->len = len; dc->ref = 0; dc->state = D_PREP; + dc->issuing = 0; dc->error = 0; init_completion(&dc->wait); list_add_tail(&dc->list, pend_list); + spin_lock_init(&dc->lock); + dc->bio_ref = 0; atomic_inc(&dcc->discard_cmd_cnt); dcc->undiscard_blks += len; @@ -868,7 +871,7 @@ static void __detach_discard_cmd(struct discard_cmd_control *dcc, struct discard_cmd *dc) { if (dc->state == D_DONE) - atomic_dec(&dcc->issing_discard); + atomic_sub(dc->issuing, &dcc->issing_discard); list_del(&dc->list); rb_erase(&dc->rb_node, &dcc->root); @@ -883,9 +886,17 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + unsigned long flags; trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len); + spin_lock_irqsave(&dc->lock, flags); + if (dc->bio_ref) { + spin_unlock_irqrestore(&dc->lock, flags); + return; + } + spin_unlock_irqrestore(&dc->lock, flags); + f2fs_bug_on(sbi, dc->ref); if (dc->error == -EOPNOTSUPP) @@ -901,10 +912,17 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, static void f2fs_submit_discard_endio(struct bio *bio) { struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; + unsigned long flags; dc->error = blk_status_to_errno(bio->bi_status); - dc->state = D_DONE; - complete_all(&dc->wait); + + spin_lock_irqsave(&dc->lock, flags); + dc->bio_ref--; + if (!dc->bio_ref && dc->state == D_SUBMIT) { + dc->state = D_DONE; + complete_all(&dc->wait); + } + spin_unlock_irqrestore(&dc->lock, flags); bio_put(bio); } @@ -972,17 +990,25 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, } } - +static void __update_discard_tree_range(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t lstart, + block_t start, block_t len); /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy, - struct discard_cmd *dc) + struct discard_cmd *dc, + unsigned int *issued) { + struct block_device *bdev = dc->bdev; + struct request_queue *q = bdev_get_queue(bdev); + unsigned int max_discard_blocks = + SECTOR_TO_BLOCK(q->limits.max_discard_sectors); struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? &(dcc->fstrim_list) : &(dcc->wait_list); - struct bio *bio = NULL; int flag = dpolicy->sync ? REQ_SYNC : 0; + block_t lstart, start, len, total_len; + int err = 0; if (dc->state != D_PREP) return; @@ -990,30 +1016,81 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) return; - trace_f2fs_issue_discard(dc->bdev, dc->start, dc->len); - - dc->error = __blkdev_issue_discard(dc->bdev, - SECTOR_FROM_BLOCK(dc->start), - SECTOR_FROM_BLOCK(dc->len), - GFP_NOFS, 0, &bio); - if (!dc->error) { - /* should keep before submission to avoid D_DONE right away */ - dc->state = D_SUBMIT; - atomic_inc(&dcc->issued_discard); - atomic_inc(&dcc->issing_discard); - if (bio) { + trace_f2fs_issue_discard(bdev, dc->start, dc->len); + + lstart = dc->lstart; + start = dc->start; + len = dc->len; + total_len = len; + + dc->len = 0; + + while (total_len && *issued < dpolicy->max_requests && !err) { + struct bio *bio = NULL; + unsigned long flags; + bool last = true; + + if (len > max_discard_blocks) { + len = max_discard_blocks; + last = false; + } + + (*issued)++; + if (*issued == dpolicy->max_requests) + last = true; + + dc->len += len; + + err = __blkdev_issue_discard(bdev, + SECTOR_FROM_BLOCK(start), + SECTOR_FROM_BLOCK(len), + GFP_NOFS, 0, &bio); + if (!err && bio) { + /* + * should keep before submission to avoid D_DONE + * right away + */ + spin_lock_irqsave(&dc->lock, flags); + if (last) + dc->state = D_SUBMIT; + else + dc->state = D_PARTIAL; + dc->bio_ref++; + spin_unlock_irqrestore(&dc->lock, flags); + + atomic_inc(&dcc->issing_discard); + dc->issuing++; + list_move_tail(&dc->list, wait_list); + + /* sanity check on discard range */ + __check_sit_bitmap(sbi, start, start + len); + bio->bi_private = dc; bio->bi_end_io = f2fs_submit_discard_endio; bio->bi_opf |= flag; submit_bio(bio); - list_move_tail(&dc->list, wait_list); - __check_sit_bitmap(sbi, dc->start, dc->start + dc->len); + + atomic_inc(&dcc->issued_discard); f2fs_update_iostat(sbi, FS_DISCARD, 1); + } else { + spin_lock_irqsave(&dc->lock, flags); + if (dc->state == D_PARTIAL) + dc->state = D_SUBMIT; + spin_unlock_irqrestore(&dc->lock, flags); + + __remove_discard_cmd(sbi, dc); + err = -EIO; } - } else { - __remove_discard_cmd(sbi, dc); + + lstart += len; + start += len; + total_len -= len; + len = total_len; } + + if (len) + __update_discard_tree_range(sbi, bdev, lstart, start, len); } static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, @@ -1094,10 +1171,11 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, struct discard_cmd *dc; struct discard_info di = {0}; struct rb_node **insert_p = NULL, *insert_parent = NULL; + struct request_queue *q = bdev_get_queue(bdev); + unsigned int max_discard_blocks = + SECTOR_TO_BLOCK(q->limits.max_discard_sectors); block_t end = lstart + len; - mutex_lock(&dcc->cmd_lock); - dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, NULL, lstart, (struct rb_entry **)&prev_dc, @@ -1137,7 +1215,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, if (prev_dc && prev_dc->state == D_PREP && prev_dc->bdev == bdev && - __is_discard_back_mergeable(&di, &prev_dc->di)) { + __is_discard_back_mergeable(&di, &prev_dc->di, + max_discard_blocks)) { prev_dc->di.len += di.len; dcc->undiscard_blks += di.len; __relocate_discard_cmd(dcc, prev_dc); @@ -1148,7 +1227,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, if (next_dc && next_dc->state == D_PREP && next_dc->bdev == bdev && - __is_discard_front_mergeable(&di, &next_dc->di)) { + __is_discard_front_mergeable(&di, &next_dc->di, + max_discard_blocks)) { next_dc->di.lstart = di.lstart; next_dc->di.len += di.len; next_dc->di.start = di.start; @@ -1171,8 +1251,6 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, node = rb_next(&prev_dc->rb_node); next_dc = rb_entry_safe(node, struct discard_cmd, rb_node); } - - mutex_unlock(&dcc->cmd_lock); } static int __queue_discard_cmd(struct f2fs_sb_info *sbi, @@ -1187,7 +1265,9 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, blkstart -= FDEV(devi).start_blk; } + mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock); __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen); + mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock); return 0; } @@ -1226,9 +1306,9 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, } dcc->next_pos = dc->lstart + dc->len; - __submit_discard_cmd(sbi, dpolicy, dc); + __submit_discard_cmd(sbi, dpolicy, dc, &issued); - if (++issued >= dpolicy->max_requests) + if (issued >= dpolicy->max_requests) break; next: node = rb_next(&dc->rb_node); @@ -1283,9 +1363,9 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, break; } - __submit_discard_cmd(sbi, dpolicy, dc); + __submit_discard_cmd(sbi, dpolicy, dc, &issued); - if (++issued >= dpolicy->max_requests) + if (issued >= dpolicy->max_requests) break; } blk_finish_plug(&plug); @@ -2492,9 +2572,9 @@ next: goto skip; } - __submit_discard_cmd(sbi, dpolicy, dc); + __submit_discard_cmd(sbi, dpolicy, dc, &issued); - if (++issued >= dpolicy->max_requests) { + if (issued >= dpolicy->max_requests) { start = dc->lstart + dc->len; blk_finish_plug(&plug); -- GitLab From a7b17e5ec81660f82d6ad65844b27f4bebc6e200 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 6 Aug 2018 20:30:18 +0800 Subject: [PATCH 0081/2269] f2fs: support discard submission error injection This patch adds to support discard submission error injection for testing error handling of __submit_discard_cmd(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 8 ++++++++ fs/f2fs/super.c | 1 + 3 files changed, 10 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 02885b6f83dc7..dc0fa6839ae18 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -57,6 +57,7 @@ enum { FAULT_TRUNCATE, FAULT_IO, FAULT_CHECKPOINT, + FAULT_DISCARD, FAULT_MAX, }; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 540d7d6161ba8..f24e659463e94 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1041,10 +1041,18 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, dc->len += len; +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(sbi, FAULT_DISCARD)) { + f2fs_show_injection_info(FAULT_DISCARD); + err = -EIO; + goto submit; + } +#endif err = __blkdev_issue_discard(bdev, SECTOR_FROM_BLOCK(start), SECTOR_FROM_BLOCK(len), GFP_NOFS, 0, &bio); +submit: if (!err && bio) { /* * should keep before submission to avoid D_DONE diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index bc817b76db433..946d067f4b0cf 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -55,6 +55,7 @@ char *f2fs_fault_name[FAULT_MAX] = { [FAULT_TRUNCATE] = "truncate fail", [FAULT_IO] = "IO error", [FAULT_CHECKPOINT] = "checkpoint error", + [FAULT_DISCARD] = "discard error", }; void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate) -- GitLab From a85c255ffc7a624731b162b82cd0a4c35c48479e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 8 Aug 2018 10:14:55 +0800 Subject: [PATCH 0082/2269] f2fs: fix use-after-free of dicard command entry As Dan Carpenter reported: The patch 20ee4382322c: "f2fs: issue small discard by LBA order" from Jul 8, 2018, leads to the following Smatch warning: fs/f2fs/segment.c:1277 __issue_discard_cmd_orderly() warn: 'dc' was already freed. See also: fs/f2fs/segment.c:2550 __issue_discard_cmd_range() warn: 'dc' was already freed. In order to fix this issue, let's get error from __submit_discard_cmd(), and release current discard command after we referenced next one. Reported-by: Dan Carpenter Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 79 +++++++++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 34 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f24e659463e94..6b932e669c576 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -994,7 +994,7 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t lstart, block_t start, block_t len); /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ -static void __submit_discard_cmd(struct f2fs_sb_info *sbi, +static int __submit_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy, struct discard_cmd *dc, unsigned int *issued) @@ -1011,10 +1011,10 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, int err = 0; if (dc->state != D_PREP) - return; + return 0; if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) - return; + return 0; trace_f2fs_issue_discard(bdev, dc->start, dc->len); @@ -1053,43 +1053,44 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, SECTOR_FROM_BLOCK(len), GFP_NOFS, 0, &bio); submit: - if (!err && bio) { - /* - * should keep before submission to avoid D_DONE - * right away - */ + if (err) { spin_lock_irqsave(&dc->lock, flags); - if (last) + if (dc->state == D_PARTIAL) dc->state = D_SUBMIT; - else - dc->state = D_PARTIAL; - dc->bio_ref++; spin_unlock_irqrestore(&dc->lock, flags); - atomic_inc(&dcc->issing_discard); - dc->issuing++; - list_move_tail(&dc->list, wait_list); + break; + } - /* sanity check on discard range */ - __check_sit_bitmap(sbi, start, start + len); + f2fs_bug_on(sbi, !bio); - bio->bi_private = dc; - bio->bi_end_io = f2fs_submit_discard_endio; - bio->bi_opf |= flag; - submit_bio(bio); + /* + * should keep before submission to avoid D_DONE + * right away + */ + spin_lock_irqsave(&dc->lock, flags); + if (last) + dc->state = D_SUBMIT; + else + dc->state = D_PARTIAL; + dc->bio_ref++; + spin_unlock_irqrestore(&dc->lock, flags); - atomic_inc(&dcc->issued_discard); + atomic_inc(&dcc->issing_discard); + dc->issuing++; + list_move_tail(&dc->list, wait_list); - f2fs_update_iostat(sbi, FS_DISCARD, 1); - } else { - spin_lock_irqsave(&dc->lock, flags); - if (dc->state == D_PARTIAL) - dc->state = D_SUBMIT; - spin_unlock_irqrestore(&dc->lock, flags); + /* sanity check on discard range */ + __check_sit_bitmap(sbi, start, start + len); - __remove_discard_cmd(sbi, dc); - err = -EIO; - } + bio->bi_private = dc; + bio->bi_end_io = f2fs_submit_discard_endio; + bio->bi_opf |= flag; + submit_bio(bio); + + atomic_inc(&dcc->issued_discard); + + f2fs_update_iostat(sbi, FS_DISCARD, 1); lstart += len; start += len; @@ -1097,8 +1098,9 @@ submit: len = total_len; } - if (len) + if (!err && len) __update_discard_tree_range(sbi, bdev, lstart, start, len); + return err; } static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, @@ -1304,6 +1306,7 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, while (dc) { struct rb_node *node; + int err = 0; if (dc->state != D_PREP) goto next; @@ -1314,12 +1317,14 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, } dcc->next_pos = dc->lstart + dc->len; - __submit_discard_cmd(sbi, dpolicy, dc, &issued); + err = __submit_discard_cmd(sbi, dpolicy, dc, &issued); if (issued >= dpolicy->max_requests) break; next: node = rb_next(&dc->rb_node); + if (err) + __remove_discard_cmd(sbi, dc); dc = rb_entry_safe(node, struct discard_cmd, rb_node); } @@ -2571,6 +2576,7 @@ next: while (dc && dc->lstart <= end) { struct rb_node *node; + int err = 0; if (dc->len < dpolicy->granularity) goto skip; @@ -2580,11 +2586,14 @@ next: goto skip; } - __submit_discard_cmd(sbi, dpolicy, dc, &issued); + err = __submit_discard_cmd(sbi, dpolicy, dc, &issued); if (issued >= dpolicy->max_requests) { start = dc->lstart + dc->len; + if (err) + __remove_discard_cmd(sbi, dc); + blk_finish_plug(&plug); mutex_unlock(&dcc->cmd_lock); trimmed += __wait_all_discard_cmd(sbi, NULL); @@ -2593,6 +2602,8 @@ next: } skip: node = rb_next(&dc->rb_node); + if (err) + __remove_discard_cmd(sbi, dc); dc = rb_entry_safe(node, struct discard_cmd, rb_node); if (fatal_signal_pending(current)) -- GitLab From 423720ed829717eec0975d1a5f606227d2ebe192 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 8 Aug 2018 17:36:29 +0800 Subject: [PATCH 0083/2269] f2fs: fix to return success when trimming meta area generic/251 --- tests/generic/251.out 2016-05-03 20:20:11.381899000 +0800 QA output created by 251 Running the test: done. +fstrim: /mnt/scratch_f2fs: FITRIM ioctl failed: Invalid argument +fstrim: /mnt/scratch_f2fs: FITRIM ioctl failed: Invalid argument +fstrim: /mnt/scratch_f2fs: FITRIM ioctl failed: Invalid argument +fstrim: /mnt/scratch_f2fs: FITRIM ioctl failed: Invalid argument +fstrim: /mnt/scratch_f2fs: FITRIM ioctl failed: Invalid argument ... Ran: generic/251 Failures: generic/251 The reason is coverage of fstrim locates in meta area, previously we just return -EINVAL for such case, making generic/251 failed, to fix this problem, let's relieve restriction to return success with no block discarded. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 6b932e669c576..63fc647f9ac2a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2631,8 +2631,8 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) return -EINVAL; - if (end <= MAIN_BLKADDR(sbi)) - return -EINVAL; + if (end < MAIN_BLKADDR(sbi)) + goto out; if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { f2fs_msg(sbi->sb, KERN_WARNING, -- GitLab From 9ab7c130145000bcb98bf055107da4ee3e676d52 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 8 Aug 2018 17:36:41 +0800 Subject: [PATCH 0084/2269] f2fs: support fault_type mount option Previously, once fault injection is on, by default, all kind of faults will be injected to f2fs, if we want to trigger single or specified combined type during the test, we need to configure sysfs entry, it will be a little inconvenient to integrate sysfs configuring into testsuit, such as xfstest. So this patch introduces a new mount option 'fault_type' to assist old option 'fault_injection', with these two mount options, we can specify any fault rate/type at mount-time. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 18 ++++++++++++++++ fs/f2fs/checkpoint.c | 2 +- fs/f2fs/f2fs.h | 7 ++++-- fs/f2fs/super.c | 34 ++++++++++++++++++++++++------ 4 files changed, 51 insertions(+), 10 deletions(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 37d8698ca2d65..68a8d2688c7a3 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -157,6 +157,24 @@ data_flush Enable data flushing before checkpoint in order to persist data of regular and symlink. fault_injection=%d Enable fault injection in all supported types with specified injection rate. +fault_type=%d Support configuring fault injection type, should be + enabled with fault_injection option, fault type value + is shown below, it supports single or combined type. + Type_Name Type_Value + FAULT_KMALLOC 0x000000001 + FAULT_KVMALLOC 0x000000002 + FAULT_PAGE_ALLOC 0x000000004 + FAULT_PAGE_GET 0x000000008 + FAULT_ALLOC_BIO 0x000000010 + FAULT_ALLOC_NID 0x000000020 + FAULT_ORPHAN 0x000000040 + FAULT_BLOCK 0x000000080 + FAULT_DIR_DEPTH 0x000000100 + FAULT_EVICT_INODE 0x000000200 + FAULT_TRUNCATE 0x000000400 + FAULT_IO 0x000000800 + FAULT_CHECKPOINT 0x000001000 + FAULT_DISCARD 0x000002000 mode=%s Control block allocation mode which supports "adaptive" and "lfs". In "lfs" mode, there should be no random writes towards main area. diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 1b5f884175933..24e889a4293f0 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -28,7 +28,7 @@ struct kmem_cache *f2fs_inode_entry_slab; void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io) { - f2fs_build_fault_attr(sbi, 0); + f2fs_build_fault_attr(sbi, 0, 0); set_ckpt_flags(sbi, CP_ERROR_FLAG); if (!end_io) f2fs_flush_merged_writes(sbi); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index dc0fa6839ae18..5ae0d9ed99524 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -61,6 +61,8 @@ enum { FAULT_MAX, }; +#define F2FS_ALL_FAULT_TYPE ((1 << FAULT_MAX) - 1) + struct f2fs_fault_info { atomic_t inject_ops; unsigned int inject_rate; @@ -3436,9 +3438,10 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, int rw) } #ifdef CONFIG_F2FS_FAULT_INJECTION -extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate); +extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, + unsigned int type); #else -#define f2fs_build_fault_attr(sbi, rate) do { } while (0) +#define f2fs_build_fault_attr(sbi, rate, type) do { } while (0) #endif #endif diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 946d067f4b0cf..5c18a954b8005 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -58,17 +58,21 @@ char *f2fs_fault_name[FAULT_MAX] = { [FAULT_DISCARD] = "discard error", }; -void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate) +void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, + unsigned int type) { struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info; if (rate) { atomic_set(&ffi->inject_ops, 0); ffi->inject_rate = rate; - ffi->inject_type = (1 << FAULT_MAX) - 1; - } else { - memset(ffi, 0, sizeof(struct f2fs_fault_info)); } + + if (type) + ffi->inject_type = type; + + if (!rate && !type) + memset(ffi, 0, sizeof(struct f2fs_fault_info)); } #endif @@ -113,6 +117,7 @@ enum { Opt_mode, Opt_io_size_bits, Opt_fault_injection, + Opt_fault_type, Opt_lazytime, Opt_nolazytime, Opt_quota, @@ -170,6 +175,7 @@ static match_table_t f2fs_tokens = { {Opt_mode, "mode=%s"}, {Opt_io_size_bits, "io_bits=%u"}, {Opt_fault_injection, "fault_injection=%u"}, + {Opt_fault_type, "fault_type=%u"}, {Opt_lazytime, "lazytime"}, {Opt_nolazytime, "nolazytime"}, {Opt_quota, "quota"}, @@ -600,7 +606,18 @@ static int parse_options(struct super_block *sb, char *options) if (args->from && match_int(args, &arg)) return -EINVAL; #ifdef CONFIG_F2FS_FAULT_INJECTION - f2fs_build_fault_attr(sbi, arg); + f2fs_build_fault_attr(sbi, arg, F2FS_ALL_FAULT_TYPE); + set_opt(sbi, FAULT_INJECTION); +#else + f2fs_msg(sb, KERN_INFO, + "FAULT_INJECTION was not selected"); +#endif + break; + case Opt_fault_type: + if (args->from && match_int(args, &arg)) + return -EINVAL; +#ifdef CONFIG_F2FS_FAULT_INJECTION + f2fs_build_fault_attr(sbi, 0, arg); set_opt(sbi, FAULT_INJECTION); #else f2fs_msg(sb, KERN_INFO, @@ -1321,9 +1338,12 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) if (F2FS_IO_SIZE_BITS(sbi)) seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi)); #ifdef CONFIG_F2FS_FAULT_INJECTION - if (test_opt(sbi, FAULT_INJECTION)) + if (test_opt(sbi, FAULT_INJECTION)) { seq_printf(seq, ",fault_injection=%u", F2FS_OPTION(sbi).fault_info.inject_rate); + seq_printf(seq, ",fault_type=%u", + F2FS_OPTION(sbi).fault_info.inject_type); + } #endif #ifdef CONFIG_QUOTA if (test_opt(sbi, QUOTA)) @@ -1393,7 +1413,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, POSIX_ACL); #endif - f2fs_build_fault_attr(sbi, 0); + f2fs_build_fault_attr(sbi, 0, 0); } #ifdef CONFIG_QUOTA -- GitLab From 9698814d8fec0f798ff1449c4d8af79f36d7c98d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 13 Aug 2018 23:38:06 +0200 Subject: [PATCH 0085/2269] f2fs: rework fault injection handling to avoid a warning When CONFIG_F2FS_FAULT_INJECTION is disabled, we get a warning about an unused label: fs/f2fs/segment.c: In function '__submit_discard_cmd': fs/f2fs/segment.c:1059:1: error: label 'submit' defined but not used [-Werror=unused-label] This could be fixed by adding another #ifdef around it, but the more reliable way of doing this seems to be to remove the other #ifdefs where that is easily possible. By defining time_to_inject() as a trivial stub, most of the checks for CONFIG_F2FS_FAULT_INJECTION can go away. This also leads to nicer formatting of the code. Signed-off-by: Arnd Bergmann Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 3 +-- fs/f2fs/data.c | 2 -- fs/f2fs/dir.c | 3 +-- fs/f2fs/f2fs.h | 50 ++++++++++++++++++++++---------------------- fs/f2fs/file.c | 3 +-- fs/f2fs/gc.c | 2 -- fs/f2fs/inode.c | 3 +-- fs/f2fs/node.c | 3 +-- fs/f2fs/recovery.c | 5 ++--- fs/f2fs/segment.c | 4 ---- 10 files changed, 32 insertions(+), 46 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 24e889a4293f0..6b24eb4814780 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -555,13 +555,12 @@ int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi) spin_lock(&im->ino_lock); -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_ORPHAN)) { spin_unlock(&im->ino_lock); f2fs_show_injection_info(FAULT_ORPHAN); return -ENOSPC; } -#endif + if (unlikely(im->ino_num >= sbi->max_orphans)) err = -ENOSPC; else diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 87ffc7a882171..eaf017e1166c6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -126,12 +126,10 @@ static bool f2fs_bio_post_read_required(struct bio *bio) static void f2fs_read_end_io(struct bio *bio) { -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) { f2fs_show_injection_info(FAULT_IO); bio->bi_status = BLK_STS_IOERR; } -#endif if (f2fs_bio_post_read_required(bio)) { struct bio_post_read_ctx *ctx = bio->bi_private; diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 7f955c4e86a45..ecc3a4e2be96d 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -517,12 +517,11 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, } start: -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) { f2fs_show_injection_info(FAULT_DIR_DEPTH); return -ENOSPC; } -#endif + if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) return -ENOSPC; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5ae0d9ed99524..f6d7d0f9d3bec 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -42,7 +42,6 @@ } while (0) #endif -#ifdef CONFIG_F2FS_FAULT_INJECTION enum { FAULT_KMALLOC, FAULT_KVMALLOC, @@ -61,6 +60,7 @@ enum { FAULT_MAX, }; +#ifdef CONFIG_F2FS_FAULT_INJECTION #define F2FS_ALL_FAULT_TYPE ((1 << FAULT_MAX) - 1) struct f2fs_fault_info { @@ -1325,6 +1325,12 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) } return false; } +#else +#define f2fs_show_injection_info(type) do { } while (0) +static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) +{ + return false; +} #endif /* For write statistics. Suppose sector size is 512 bytes, @@ -1677,13 +1683,12 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, if (ret) return ret; -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_BLOCK)) { f2fs_show_injection_info(FAULT_BLOCK); release = *count; goto enospc; } -#endif + /* * let's increase this in prior to actual block count change in order * for f2fs_sync_file to avoid data races when deciding checkpoint. @@ -1892,12 +1897,10 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, return ret; } -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_BLOCK)) { f2fs_show_injection_info(FAULT_BLOCK); goto enospc; } -#endif spin_lock(&sbi->stat_lock); @@ -1982,22 +1985,23 @@ static inline s64 valid_inode_count(struct f2fs_sb_info *sbi) static inline struct page *f2fs_grab_cache_page(struct address_space *mapping, pgoff_t index, bool for_write) { -#ifdef CONFIG_F2FS_FAULT_INJECTION struct page *page; - if (!for_write) - page = find_get_page_flags(mapping, index, - FGP_LOCK | FGP_ACCESSED); - else - page = find_lock_page(mapping, index); - if (page) - return page; + if (IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)) { + if (!for_write) + page = find_get_page_flags(mapping, index, + FGP_LOCK | FGP_ACCESSED); + else + page = find_lock_page(mapping, index); + if (page) + return page; - if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) { - f2fs_show_injection_info(FAULT_PAGE_ALLOC); - return NULL; + if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) { + f2fs_show_injection_info(FAULT_PAGE_ALLOC); + return NULL; + } } -#endif + if (!for_write) return grab_cache_page(mapping, index); return grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); @@ -2007,12 +2011,11 @@ static inline struct page *f2fs_pagecache_get_page( struct address_space *mapping, pgoff_t index, int fgp_flags, gfp_t gfp_mask) { -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) { f2fs_show_injection_info(FAULT_PAGE_GET); return NULL; } -#endif + return pagecache_get_page(mapping, index, fgp_flags, gfp_mask); } @@ -2077,12 +2080,11 @@ static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages); return bio; } -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_ALLOC_BIO)) { f2fs_show_injection_info(FAULT_ALLOC_BIO); return NULL; } -#endif + return bio_alloc(GFP_KERNEL, npages); } @@ -2617,12 +2619,11 @@ static inline bool f2fs_may_extent_tree(struct inode *inode) static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_KMALLOC)) { f2fs_show_injection_info(FAULT_KMALLOC); return NULL; } -#endif + return kmalloc(size, flags); } @@ -2635,12 +2636,11 @@ static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi, static inline void *f2fs_kvmalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_KVMALLOC)) { f2fs_show_injection_info(FAULT_KVMALLOC); return NULL; } -#endif + return kvmalloc(size, flags); } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c4197ade9c7c6..e3a0b291f37da 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -668,12 +668,11 @@ int f2fs_truncate(struct inode *inode) trace_f2fs_truncate(inode); -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) { f2fs_show_injection_info(FAULT_TRUNCATE); return -EIO; } -#endif + /* we should check inline_data size */ if (!f2fs_may_inline_data(inode)) { err = f2fs_convert_inline_inode(inode); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index bca09c5fd6e58..2ae0983bab4a6 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -53,12 +53,10 @@ static int gc_thread_func(void *data) continue; } -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_CHECKPOINT)) { f2fs_show_injection_info(FAULT_CHECKPOINT); f2fs_stop_checkpoint(sbi, false); } -#endif if (!sb_start_write_trylock(sbi->sb)) continue; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index fc2c98b9e255b..6908896a19502 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -659,12 +659,11 @@ retry: if (F2FS_HAS_BLOCKS(inode)) err = f2fs_truncate(inode); -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_EVICT_INODE)) { f2fs_show_injection_info(FAULT_EVICT_INODE); err = -EIO; } -#endif + if (!err) { f2fs_lock_op(sbi); err = f2fs_remove_inode_page(inode); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 7d920bce89e86..f213a53526aaf 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2323,12 +2323,11 @@ bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i = NULL; retry: -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_ALLOC_NID)) { f2fs_show_injection_info(FAULT_ALLOC_NID); return false; } -#endif + spin_lock(&nm_i->nid_list_lock); if (unlikely(nm_i->available_nids == 0)) { diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 0a6e81879a1f8..501bb0fdda1b3 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -518,10 +518,9 @@ retry_dn: if (src == NULL_ADDR) { err = f2fs_reserve_new_block(&dn); -#ifdef CONFIG_F2FS_FAULT_INJECTION - while (err) + while (err && + IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)) err = f2fs_reserve_new_block(&dn); -#endif /* We should not get -ENOSPC */ f2fs_bug_on(sbi, err); if (err) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 63fc647f9ac2a..b136e39e1e9ec 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -470,12 +470,10 @@ int f2fs_commit_inmem_pages(struct inode *inode) */ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) { -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_CHECKPOINT)) { f2fs_show_injection_info(FAULT_CHECKPOINT); f2fs_stop_checkpoint(sbi, false); } -#endif /* balance_fs_bg is able to be pending */ if (need && excess_cached_nats(sbi)) @@ -1041,13 +1039,11 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, dc->len += len; -#ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_DISCARD)) { f2fs_show_injection_info(FAULT_DISCARD); err = -EIO; goto submit; } -#endif err = __blkdev_issue_discard(bdev, SECTOR_FROM_BLOCK(start), SECTOR_FROM_BLOCK(len), -- GitLab From 3cc9792fa1ea67bbfdaee28737e37f9045976b76 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 11 Aug 2018 23:42:09 +0800 Subject: [PATCH 0086/2269] f2fs: fix to skip verifying block address for non-regular inode generic/184 1s ... [failed, exit status 1]- output mismatch --- tests/generic/184.out 2015-01-11 16:52:27.643681072 +0800 QA output created by 184 - silence is golden +rm: cannot remove '/mnt/f2fs/null': Bad address +mknod: '/mnt/f2fs/null': Bad address +chmod: cannot access '/mnt/f2fs/null': Bad address +./tests/generic/184: line 36: /mnt/f2fs/null: Bad address ... F2FS-fs (zram0): access invalid blkaddr:259 EIP: f2fs_is_valid_blkaddr+0x14b/0x1b0 [f2fs] f2fs_iget+0x927/0x1010 [f2fs] f2fs_lookup+0x26e/0x630 [f2fs] __lookup_slow+0xb3/0x140 lookup_slow+0x31/0x50 walk_component+0x185/0x1f0 path_lookupat+0x51/0x190 filename_lookup+0x7f/0x140 user_path_at_empty+0x36/0x40 vfs_statx+0x61/0xc0 __do_sys_stat64+0x29/0x40 sys_stat64+0x13/0x20 do_fast_syscall_32+0xaa/0x22c entry_SYSENTER_32+0x53/0x86 In f2fs_iget(), we will check inode's first block address, if it is valid, we will set FI_FIRST_BLOCK_WRITTEN flag in inode. But we should only do this for regular inode, otherwise, like special inode, i_addr[0] is used for storing device info instead of block address, it will fail checking flow obviously. So for non-regular inode, let's skip verifying address and setting flag. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 6908896a19502..959df2249875c 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -371,13 +371,15 @@ static int do_read_inode(struct inode *inode) /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); - err = __written_first_block(sbi, ri); - if (err < 0) { - f2fs_put_page(node_page, 1); - return err; + if (S_ISREG(inode->i_mode)) { + err = __written_first_block(sbi, ri); + if (err < 0) { + f2fs_put_page(node_page, 1); + return err; + } + if (!err) + set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); } - if (!err) - set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); if (!f2fs_need_inode_block_update(sbi, inode->i_ino)) fi->last_disk_size = inode->i_size; -- GitLab From 54f3a3a8c3e9b51d968b83fa8aa1932b59f1ffc1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 9 Aug 2018 17:53:34 -0700 Subject: [PATCH 0087/2269] f2fs: fix performance issue observed with multi-thread sequential read This reverts the commit - "b93f771 - f2fs: remove writepages lock" to fix the drop in sequential read throughput. Test: ./tiotest -t 32 -d /data/tio_tmp -f 32 -b 524288 -k 1 -k 3 -L device: UFS Before - read throughput: 185 MB/s total read requests: 85177 (of these ~80000 are 4KB size requests). total write requests: 2546 (of these ~2208 requests are written in 512KB). After - read throughput: 758 MB/s total read requests: 2417 (of these ~2042 are 512KB reads). total write requests: 2701 (of these ~2034 requests are written in 512KB). Signed-off-by: Sahitya Tummala Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 8 ++++++++ fs/f2fs/data.c | 21 +++++++++++++++++++++ fs/f2fs/f2fs.h | 2 ++ fs/f2fs/segment.c | 1 + fs/f2fs/super.c | 1 + fs/f2fs/sysfs.c | 2 ++ 6 files changed, 35 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 372b88f4e7062..41b8cb3b45256 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -51,6 +51,14 @@ Description: Controls the dirty page count condition for the in-place-update policies. +What: /sys/fs/f2fs//min_seq_blocks +Date: August 2018 +Contact: "Jaegeuk Kim" +Description: + Controls the dirty page count condition for batched sequential + writes in ->writepages. + + What: /sys/fs/f2fs//min_hot_blocks Date: March 2017 Contact: "Jaegeuk Kim" diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index eaf017e1166c6..aad411471f855 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2122,6 +2122,18 @@ continue_unlock: return ret; } +static inline bool __should_serialize_io(struct inode *inode, + struct writeback_control *wbc) +{ + if (!S_ISREG(inode->i_mode)) + return false; + if (wbc->sync_mode != WB_SYNC_ALL) + return true; + if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks) + return true; + return false; +} + static int __f2fs_write_data_pages(struct address_space *mapping, struct writeback_control *wbc, enum iostat_type io_type) @@ -2130,6 +2142,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct blk_plug plug; int ret; + bool locked = false; /* deal with chardevs and other special file */ if (!mapping->a_ops->writepage) @@ -2160,10 +2173,18 @@ static int __f2fs_write_data_pages(struct address_space *mapping, else if (atomic_read(&sbi->wb_sync_req[DATA])) goto skip_write; + if (__should_serialize_io(inode, wbc)) { + mutex_lock(&sbi->writepages); + locked = true; + } + blk_start_plug(&plug); ret = f2fs_write_cache_pages(mapping, wbc, io_type); blk_finish_plug(&plug); + if (locked) + mutex_unlock(&sbi->writepages); + if (wbc->sync_mode == WB_SYNC_ALL) atomic_dec(&sbi->wb_sync_req[DATA]); /* diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f6d7d0f9d3bec..1b142b062ce1d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -914,6 +914,7 @@ struct f2fs_sm_info { unsigned int ipu_policy; /* in-place-update policy */ unsigned int min_ipu_util; /* in-place-update threshold */ unsigned int min_fsync_blocks; /* threshold for fsync */ + unsigned int min_seq_blocks; /* threshold for sequential blocks */ unsigned int min_hot_blocks; /* threshold for hot block allocation */ unsigned int min_ssr_sections; /* threshold to trigger SSR allocation */ @@ -1134,6 +1135,7 @@ struct f2fs_sb_info { struct rw_semaphore sb_lock; /* lock for raw super block */ int valid_super_block; /* valid super block no */ unsigned long s_flag; /* flags for sbi */ + struct mutex writepages; /* mutex for writepages() */ #ifdef CONFIG_BLK_DEV_ZONED unsigned int blocks_per_blkz; /* F2FS blocks per zone */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b136e39e1e9ec..20650e25117b2 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4127,6 +4127,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi) sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; + sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec; sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS; sm_info->min_ssr_sections = reserved_sections(sbi); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 5c18a954b8005..1c8226dfd4f3f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2842,6 +2842,7 @@ try_onemore: /* init f2fs-specific super block info */ sbi->valid_super_block = valid_super_block; mutex_init(&sbi->gc_mutex); + mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); init_rwsem(&sbi->node_write); init_rwsem(&sbi->node_change); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index d4c196552888c..30fd016afeb37 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -397,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); @@ -449,6 +450,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(ipu_policy), ATTR_LIST(min_ipu_util), ATTR_LIST(min_fsync_blocks), + ATTR_LIST(min_seq_blocks), ATTR_LIST(min_hot_blocks), ATTR_LIST(min_ssr_sections), ATTR_LIST(max_victim_search), -- GitLab From dba8dac99310288025c74f34340b87f124fdca9a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 25 Jul 2018 12:11:56 +0900 Subject: [PATCH 0088/2269] f2fs: avoid fi->i_gc_rwsem[WRITE] lock in f2fs_gc The f2fs_gc() called by f2fs_balance_fs() requires to be called outside of fi->i_gc_rwsem[WRITE], since f2fs_gc() can try to grab it in a loop. If it hits the miximum retrials in GC, let's give a chance to release gc_mutex for a short time in order not to go into live lock in the worst case. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 +- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 119 ++++++++++++++++++++++++---------------------- fs/f2fs/gc.c | 26 +++++++--- fs/f2fs/segment.c | 6 ++- fs/f2fs/segment.h | 2 +- 6 files changed, 91 insertions(+), 67 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index aad411471f855..350c1228c7072 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2217,14 +2217,14 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to) loff_t i_size = i_size_read(inode); if (to > i_size) { - down_write(&F2FS_I(inode)->i_mmap_sem); down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); truncate_pagecache(inode, i_size); f2fs_truncate_blocks(inode, i_size, true); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); } } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1b142b062ce1d..1f7bfe4787aaf 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1244,6 +1244,7 @@ struct f2fs_sb_info { unsigned int gc_mode; /* current GC state */ /* for skip statistic */ unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */ + unsigned long long skipped_gc_rwsem; /* FG_GC only */ /* threshold for gc trials on pinned files */ u64 gc_pin_file_threshold; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index e3a0b291f37da..1d46de58f9d4b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -797,8 +797,8 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_valid & ATTR_SIZE) { bool to_smaller = (attr->ia_size <= i_size_read(inode)); - down_write(&F2FS_I(inode)->i_mmap_sem); down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); truncate_setsize(inode, attr->ia_size); @@ -808,8 +808,8 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) * do not trim all blocks after i_size if target size is * larger than i_size. */ - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); if (err) return err; @@ -962,8 +962,8 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) blk_start = (loff_t)pg_start << PAGE_SHIFT; blk_end = (loff_t)pg_end << PAGE_SHIFT; - down_write(&F2FS_I(inode)->i_mmap_sem); down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); truncate_inode_pages_range(mapping, blk_start, blk_end - 1); @@ -972,8 +972,8 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) ret = f2fs_truncate_hole(inode, pg_start, pg_end); f2fs_unlock_op(sbi); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); } } @@ -1188,25 +1188,33 @@ roll_back: return ret; } -static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end) +static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + pgoff_t start = offset >> PAGE_SHIFT; + pgoff_t end = (offset + len) >> PAGE_SHIFT; int ret; f2fs_balance_fs(sbi, true); - f2fs_lock_op(sbi); - f2fs_drop_extent_tree(inode); + /* avoid gc operation during block exchange */ + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_lock_op(sbi); + f2fs_drop_extent_tree(inode); + truncate_pagecache(inode, offset); ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true); f2fs_unlock_op(sbi); + + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); return ret; } static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) { - pgoff_t pg_start, pg_end; loff_t new_size; int ret; @@ -1221,25 +1229,17 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) if (ret) return ret; - pg_start = offset >> PAGE_SHIFT; - pg_end = (offset + len) >> PAGE_SHIFT; - - /* avoid gc operation during block exchange */ - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - - down_write(&F2FS_I(inode)->i_mmap_sem); /* write out all dirty pages from offset */ ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); if (ret) - goto out_unlock; - - truncate_pagecache(inode, offset); + return ret; - ret = f2fs_do_collapse(inode, pg_start, pg_end); + ret = f2fs_do_collapse(inode, offset, len); if (ret) - goto out_unlock; + return ret; /* write out all moved pages, if possible */ + down_write(&F2FS_I(inode)->i_mmap_sem); filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); truncate_pagecache(inode, offset); @@ -1247,11 +1247,9 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) truncate_pagecache(inode, new_size); ret = f2fs_truncate_blocks(inode, new_size, true); + up_write(&F2FS_I(inode)->i_mmap_sem); if (!ret) f2fs_i_size_write(inode, new_size); -out_unlock: - up_write(&F2FS_I(inode)->i_mmap_sem); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); return ret; } @@ -1317,10 +1315,9 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, if (ret) return ret; - down_write(&F2FS_I(inode)->i_mmap_sem); ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1); if (ret) - goto out_sem; + return ret; pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; @@ -1332,7 +1329,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, ret = fill_zero(inode, pg_start, off_start, off_end - off_start); if (ret) - goto out_sem; + return ret; new_size = max_t(loff_t, new_size, offset + len); } else { @@ -1340,7 +1337,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, ret = fill_zero(inode, pg_start++, off_start, PAGE_SIZE - off_start); if (ret) - goto out_sem; + return ret; new_size = max_t(loff_t, new_size, (loff_t)pg_start << PAGE_SHIFT); @@ -1352,6 +1349,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, pgoff_t end; down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); truncate_pagecache_range(inode, (loff_t)index << PAGE_SHIFT, @@ -1363,6 +1361,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE); if (ret) { f2fs_unlock_op(sbi); + up_write(&F2FS_I(inode)->i_mmap_sem); up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); goto out; } @@ -1374,6 +1373,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); + up_write(&F2FS_I(inode)->i_mmap_sem); up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); f2fs_balance_fs(sbi, dn.node_changed); @@ -1402,9 +1402,6 @@ out: else f2fs_i_size_write(inode, new_size); } -out_sem: - up_write(&F2FS_I(inode)->i_mmap_sem); - return ret; } @@ -1433,26 +1430,27 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) f2fs_balance_fs(sbi, true); - /* avoid gc operation during block exchange */ - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - down_write(&F2FS_I(inode)->i_mmap_sem); ret = f2fs_truncate_blocks(inode, i_size_read(inode), true); + up_write(&F2FS_I(inode)->i_mmap_sem); if (ret) - goto out; + return ret; /* write out all dirty pages from offset */ ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); if (ret) - goto out; - - truncate_pagecache(inode, offset); + return ret; pg_start = offset >> PAGE_SHIFT; pg_end = (offset + len) >> PAGE_SHIFT; delta = pg_end - pg_start; idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + /* avoid gc operation during block exchange */ + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); + truncate_pagecache(inode, offset); + while (!ret && idx > pg_start) { nr = idx - pg_start; if (nr > delta) @@ -1466,16 +1464,17 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) idx + delta, nr, false); f2fs_unlock_op(sbi); } + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); /* write out all moved pages, if possible */ + down_write(&F2FS_I(inode)->i_mmap_sem); filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); truncate_pagecache(inode, offset); + up_write(&F2FS_I(inode)->i_mmap_sem); if (!ret) f2fs_i_size_write(inode, new_size); -out: - up_write(&F2FS_I(inode)->i_mmap_sem); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); return ret; } @@ -1722,8 +1721,6 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) inode_lock(inode); - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - if (f2fs_is_atomic_file(inode)) { if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) ret = -EINVAL; @@ -1734,6 +1731,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) if (ret) goto out; + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + if (!get_dirty_pages(inode)) goto skip_flush; @@ -1741,18 +1740,20 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) "Unexpected flush for atomic writes: ino=%lu, npages=%u", inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); - if (ret) + if (ret) { + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); goto out; + } skip_flush: set_inode_flag(inode, FI_ATOMIC_FILE); clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); F2FS_I(inode)->inmem_task = current; stat_inc_atomic_write(inode); stat_update_max_atomic_write(inode); out: - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); inode_unlock(inode); mnt_drop_write_file(filp); return ret; @@ -1770,9 +1771,9 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) if (ret) return ret; - inode_lock(inode); + f2fs_balance_fs(F2FS_I_SB(inode), true); - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + inode_lock(inode); if (f2fs_is_volatile_file(inode)) { ret = -EINVAL; @@ -1798,7 +1799,6 @@ err_out: clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); ret = -EINVAL; } - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); inode_unlock(inode); mnt_drop_write_file(filp); return ret; @@ -2394,15 +2394,10 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, } inode_lock(src); - down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); if (src != dst) { ret = -EBUSY; if (!inode_trylock(dst)) goto out; - if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE])) { - inode_unlock(dst); - goto out; - } } ret = -EINVAL; @@ -2447,6 +2442,14 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, goto out_unlock; f2fs_balance_fs(sbi, true); + + down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); + if (src != dst) { + ret = -EBUSY; + if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE])) + goto out_src; + } + f2fs_lock_op(sbi); ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS, pos_out >> F2FS_BLKSIZE_BITS, @@ -2459,13 +2462,15 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, f2fs_i_size_write(dst, dst_osize); } f2fs_unlock_op(sbi); -out_unlock: - if (src != dst) { + + if (src != dst) up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]); +out_src: + up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); +out_unlock: + if (src != dst) inode_unlock(dst); - } out: - up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); inode_unlock(src); return ret; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 2ae0983bab4a6..aa533a161e298 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -882,6 +882,7 @@ next_step: if (!down_write_trylock( &F2FS_I(inode)->i_gc_rwsem[WRITE])) { iput(inode); + sbi->skipped_gc_rwsem++; continue; } @@ -911,6 +912,7 @@ next_step: continue; if (!down_write_trylock( &fi->i_gc_rwsem[WRITE])) { + sbi->skipped_gc_rwsem++; up_write(&fi->i_gc_rwsem[READ]); continue; } @@ -1048,6 +1050,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, .iroot = RADIX_TREE_INIT(GFP_NOFS), }; unsigned long long last_skipped = sbi->skipped_atomic_files[FG_GC]; + unsigned long long first_skipped; unsigned int skipped_round = 0, round = 0; trace_f2fs_gc_begin(sbi->sb, sync, background, @@ -1060,6 +1063,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, prefree_segments(sbi)); cpc.reason = __get_cp_reason(sbi); + sbi->skipped_gc_rwsem = 0; + first_skipped = last_skipped; gc_more: if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) { ret = -EINVAL; @@ -1101,7 +1106,8 @@ gc_more: total_freed += seg_freed; if (gc_type == FG_GC) { - if (sbi->skipped_atomic_files[FG_GC] > last_skipped) + if (sbi->skipped_atomic_files[FG_GC] > last_skipped || + sbi->skipped_gc_rwsem) skipped_round++; last_skipped = sbi->skipped_atomic_files[FG_GC]; round++; @@ -1110,15 +1116,23 @@ gc_more: if (gc_type == FG_GC) sbi->cur_victim_sec = NULL_SEGNO; - if (!sync) { - if (has_not_enough_free_secs(sbi, sec_freed, 0)) { - if (skipped_round > MAX_SKIP_ATOMIC_COUNT && - skipped_round * 2 >= round) - f2fs_drop_inmem_pages_all(sbi, true); + if (sync) + goto stop; + + if (has_not_enough_free_secs(sbi, sec_freed, 0)) { + if (skipped_round <= MAX_SKIP_GC_COUNT || + skipped_round * 2 < round) { segno = NULL_SEGNO; goto gc_more; } + if (first_skipped < last_skipped && + (last_skipped - first_skipped) > + sbi->skipped_gc_rwsem) { + f2fs_drop_inmem_pages_all(sbi, true); + segno = NULL_SEGNO; + goto gc_more; + } if (gc_type == FG_GC) ret = f2fs_write_checkpoint(sbi, &cpc); } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 20650e25117b2..7dcfe38e70cc1 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -445,8 +445,10 @@ int f2fs_commit_inmem_pages(struct inode *inode) int err; f2fs_balance_fs(sbi, true); - f2fs_lock_op(sbi); + down_write(&fi->i_gc_rwsem[WRITE]); + + f2fs_lock_op(sbi); set_inode_flag(inode, FI_ATOMIC_COMMIT); mutex_lock(&fi->inmem_lock); @@ -461,6 +463,8 @@ int f2fs_commit_inmem_pages(struct inode *inode) clear_inode_flag(inode, FI_ATOMIC_COMMIT); f2fs_unlock_op(sbi); + up_write(&fi->i_gc_rwsem[WRITE]); + return err; } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 50495515f0a0d..b3d9e317ff0c1 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -215,7 +215,7 @@ struct segment_allocation { #define IS_DUMMY_WRITTEN_PAGE(page) \ (page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE) -#define MAX_SKIP_ATOMIC_COUNT 16 +#define MAX_SKIP_GC_COUNT 16 struct inmem_pages { struct list_head list; -- GitLab From 9f892496fc0b02dde2e532a6db91cc296ecb5d09 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 14 Aug 2018 22:37:25 +0800 Subject: [PATCH 0089/2269] f2fs: readahead encrypted block during GC During GC, for each encrypted block, we will read block synchronously into meta page, and then submit it into current cold data log area. So this block read model with 4k granularity can make poor performance, like migrating non-encrypted block, let's readahead encrypted block as well to improve migration performance. To implement this, we choose meta page that its index is old block address of the encrypted block, and readahead ciphertext into this page, later, if readaheaded page is still updated, we will load its data into target meta page, and submit the write IO. Note that for OPU, truncation, deletion, we need to invalid meta page after we invalid old block address, to make sure we won't load invalid data from target meta page during encrypted block migration. for ((i = 0; i < 1000; i++)) do { xfs_io -f /mnt/f2fs/dir/$i -c "pwrite 0 128k" -c "fsync"; } done for ((i = 0; i < 1000; i+=2)) do { rm /mnt/f2fs/dir/$i; } done ret = ioctl(fd, F2FS_IOC_GARBAGE_COLLECT, 0); Before: gc-6549 [001] d..1 214682.212797: block_rq_insert: 8,32 RA 32768 () 786400 + 64 [gc] gc-6549 [001] d..1 214682.212802: block_unplug: [gc] 1 gc-6549 [001] .... 214682.213892: block_bio_queue: 8,32 R 67494144 + 8 [gc] gc-6549 [001] .... 214682.213899: block_getrq: 8,32 R 67494144 + 8 [gc] gc-6549 [001] .... 214682.213902: block_plug: [gc] gc-6549 [001] d..1 214682.213905: block_rq_insert: 8,32 R 4096 () 67494144 + 8 [gc] gc-6549 [001] d..1 214682.213908: block_unplug: [gc] 1 gc-6549 [001] .... 214682.226405: block_bio_queue: 8,32 R 67494152 + 8 [gc] gc-6549 [001] .... 214682.226412: block_getrq: 8,32 R 67494152 + 8 [gc] gc-6549 [001] .... 214682.226414: block_plug: [gc] gc-6549 [001] d..1 214682.226417: block_rq_insert: 8,32 R 4096 () 67494152 + 8 [gc] gc-6549 [001] d..1 214682.226420: block_unplug: [gc] 1 gc-6549 [001] .... 214682.226904: block_bio_queue: 8,32 R 67494160 + 8 [gc] gc-6549 [001] .... 214682.226910: block_getrq: 8,32 R 67494160 + 8 [gc] gc-6549 [001] .... 214682.226911: block_plug: [gc] gc-6549 [001] d..1 214682.226914: block_rq_insert: 8,32 R 4096 () 67494160 + 8 [gc] gc-6549 [001] d..1 214682.226916: block_unplug: [gc] 1 After: gc-5678 [003] .... 214327.025906: block_bio_queue: 8,32 R 67493824 + 8 [gc] gc-5678 [003] .... 214327.025908: block_bio_backmerge: 8,32 R 67493824 + 8 [gc] gc-5678 [003] .... 214327.025915: block_bio_queue: 8,32 R 67493832 + 8 [gc] gc-5678 [003] .... 214327.025917: block_bio_backmerge: 8,32 R 67493832 + 8 [gc] gc-5678 [003] .... 214327.025923: block_bio_queue: 8,32 R 67493840 + 8 [gc] gc-5678 [003] .... 214327.025925: block_bio_backmerge: 8,32 R 67493840 + 8 [gc] gc-5678 [003] .... 214327.025932: block_bio_queue: 8,32 R 67493848 + 8 [gc] gc-5678 [003] .... 214327.025934: block_bio_backmerge: 8,32 R 67493848 + 8 [gc] gc-5678 [003] .... 214327.025941: block_bio_queue: 8,32 R 67493856 + 8 [gc] gc-5678 [003] .... 214327.025943: block_bio_backmerge: 8,32 R 67493856 + 8 [gc] gc-5678 [003] .... 214327.025953: block_bio_queue: 8,32 R 67493864 + 8 [gc] gc-5678 [003] .... 214327.025955: block_bio_backmerge: 8,32 R 67493864 + 8 [gc] gc-5678 [003] .... 214327.025962: block_bio_queue: 8,32 R 67493872 + 8 [gc] gc-5678 [003] .... 214327.025964: block_bio_backmerge: 8,32 R 67493872 + 8 [gc] gc-5678 [003] .... 214327.025970: block_bio_queue: 8,32 R 67493880 + 8 [gc] gc-5678 [003] .... 214327.025972: block_bio_backmerge: 8,32 R 67493880 + 8 [gc] gc-5678 [003] .... 214327.026000: block_bio_queue: 8,32 WS 34123776 + 2048 [gc] gc-5678 [003] .... 214327.026019: block_getrq: 8,32 WS 34123776 + 2048 [gc] gc-5678 [003] d..1 214327.026021: block_rq_insert: 8,32 R 131072 () 67493632 + 256 [gc] gc-5678 [003] d..1 214327.026023: block_unplug: [gc] 1 gc-5678 [003] d..1 214327.026026: block_rq_issue: 8,32 R 131072 () 67493632 + 256 [gc] gc-5678 [003] .... 214327.026046: block_plug: [gc] Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 35 ++++++++++----- fs/f2fs/gc.c | 111 +++++++++++++++++++++++++++++++++++++++++----- fs/f2fs/segment.c | 10 ++++- 3 files changed, 134 insertions(+), 22 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 350c1228c7072..1a3414514cff5 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -875,6 +875,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_summary sum; struct node_info ni; + block_t old_blkaddr; pgoff_t fofs; blkcnt_t count = 1; int err; @@ -896,9 +897,12 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) alloc: set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); - - f2fs_allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr, + old_blkaddr = dn->data_blkaddr; + f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr, &sum, seg_type, NULL, false); + if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) + invalidate_mapping_pages(META_MAPPING(sbi), + old_blkaddr, old_blkaddr); f2fs_set_data_blkaddr(dn); /* update i_size */ @@ -1614,6 +1618,7 @@ static int f2fs_read_data_pages(struct file *file, static int encrypt_one_page(struct f2fs_io_info *fio) { struct inode *inode = fio->page->mapping->host; + struct page *mpage; gfp_t gfp_flags = GFP_NOFS; if (!f2fs_encrypted_file(inode)) @@ -1625,17 +1630,25 @@ static int encrypt_one_page(struct f2fs_io_info *fio) retry_encrypt: fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page, PAGE_SIZE, 0, fio->page->index, gfp_flags); - if (!IS_ERR(fio->encrypted_page)) - return 0; + if (IS_ERR(fio->encrypted_page)) { + /* flush pending IOs and wait for a while in the ENOMEM case */ + if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { + f2fs_flush_merged_writes(fio->sbi); + congestion_wait(BLK_RW_ASYNC, HZ/50); + gfp_flags |= __GFP_NOFAIL; + goto retry_encrypt; + } + return PTR_ERR(fio->encrypted_page); + } - /* flush pending IOs and wait for a while in the ENOMEM case */ - if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { - f2fs_flush_merged_writes(fio->sbi); - congestion_wait(BLK_RW_ASYNC, HZ/50); - gfp_flags |= __GFP_NOFAIL; - goto retry_encrypt; + mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr); + if (mpage) { + if (PageUptodate(mpage)) + memcpy(page_address(mpage), + page_address(fio->encrypted_page), PAGE_SIZE); + f2fs_put_page(mpage, 1); } - return PTR_ERR(fio->encrypted_page); + return 0; } static inline bool check_inplace_update_policy(struct inode *inode, diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index aa533a161e298..c6322ef9b5fc9 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -599,6 +599,72 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, return true; } +static int ra_data_block(struct inode *inode, pgoff_t index) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct address_space *mapping = inode->i_mapping; + struct dnode_of_data dn; + struct page *page; + struct extent_info ei = {0, 0, 0}; + struct f2fs_io_info fio = { + .sbi = sbi, + .ino = inode->i_ino, + .type = DATA, + .temp = COLD, + .op = REQ_OP_READ, + .op_flags = 0, + .encrypted_page = NULL, + .in_list = false, + .retry = false, + }; + int err; + + page = f2fs_grab_cache_page(mapping, index, true); + if (!page) + return -ENOMEM; + + if (f2fs_lookup_extent_cache(inode, index, &ei)) { + dn.data_blkaddr = ei.blk + index - ei.fofs; + goto got_it; + } + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); + if (err) + goto put_page; + f2fs_put_dnode(&dn); + + if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, + DATA_GENERIC))) { + err = -EFAULT; + goto put_page; + } +got_it: + /* read page */ + fio.page = page; + fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; + + fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(sbi), + dn.data_blkaddr, + FGP_LOCK | FGP_CREAT, GFP_NOFS); + if (!fio.encrypted_page) { + err = -ENOMEM; + goto put_page; + } + + err = f2fs_submit_page_bio(&fio); + if (err) + goto put_encrypted_page; + f2fs_put_page(fio.encrypted_page, 0); + f2fs_put_page(page, 1); + return 0; +put_encrypted_page: + f2fs_put_page(fio.encrypted_page, 1); +put_page: + f2fs_put_page(page, 1); + return err; +} + /* * Move data block via META_MAPPING while keeping locked data page. * This can be used to move blocks, aka LBAs, directly on disk. @@ -620,7 +686,7 @@ static void move_data_block(struct inode *inode, block_t bidx, struct dnode_of_data dn; struct f2fs_summary sum; struct node_info ni; - struct page *page; + struct page *page, *mpage; block_t newaddr; int err; bool lfs_mode = test_opt(fio.sbi, LFS); @@ -683,6 +749,23 @@ static void move_data_block(struct inode *inode, block_t bidx, goto recover_block; } + mpage = f2fs_pagecache_get_page(META_MAPPING(fio.sbi), + fio.old_blkaddr, FGP_LOCK, GFP_NOFS); + if (mpage) { + bool updated = false; + + if (PageUptodate(mpage)) { + memcpy(page_address(fio.encrypted_page), + page_address(mpage), PAGE_SIZE); + updated = true; + } + f2fs_put_page(mpage, 1); + invalidate_mapping_pages(META_MAPPING(fio.sbi), + fio.old_blkaddr, fio.old_blkaddr); + if (updated) + goto write_page; + } + err = f2fs_submit_page_bio(&fio); if (err) goto put_page_out; @@ -699,6 +782,7 @@ static void move_data_block(struct inode *inode, block_t bidx, goto put_page_out; } +write_page: set_page_dirty(fio.encrypted_page); f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true); if (clear_page_dirty_for_io(fio.encrypted_page)) @@ -873,12 +957,6 @@ next_step: if (IS_ERR(inode) || is_bad_inode(inode)) continue; - /* if inode uses special I/O path, let's go phase 3 */ - if (f2fs_post_read_required(inode)) { - add_gc_inode(gc_list, inode); - continue; - } - if (!down_write_trylock( &F2FS_I(inode)->i_gc_rwsem[WRITE])) { iput(inode); @@ -886,10 +964,23 @@ next_step: continue; } - start_bidx = f2fs_start_bidx_of_node(nofs, inode); + start_bidx = f2fs_start_bidx_of_node(nofs, inode) + + ofs_in_node; + + if (f2fs_post_read_required(inode)) { + int err = ra_data_block(inode, start_bidx); + + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + if (err) { + iput(inode); + continue; + } + add_gc_inode(gc_list, inode); + continue; + } + data_page = f2fs_get_read_data_page(inode, - start_bidx + ofs_in_node, REQ_RAHEAD, - true); + start_bidx, REQ_RAHEAD, true); up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); if (IS_ERR(data_page)) { iput(inode); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7dcfe38e70cc1..30779aaa9dbae 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2079,6 +2079,8 @@ void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) if (addr == NEW_ADDR) return; + invalidate_mapping_pages(META_MAPPING(sbi), addr, addr); + /* add it into sit main buffer */ down_write(&sit_i->sentry_lock); @@ -2978,6 +2980,9 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) reallocate: f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, &fio->new_blkaddr, sum, type, fio, true); + if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) + invalidate_mapping_pages(META_MAPPING(fio->sbi), + fio->old_blkaddr, fio->old_blkaddr); /* writeout dirty page into bdev */ f2fs_submit_page_write(fio); @@ -3132,8 +3137,11 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (!recover_curseg || recover_newaddr) update_sit_entry(sbi, new_blkaddr, 1); - if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) + if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) { + invalidate_mapping_pages(META_MAPPING(sbi), + old_blkaddr, old_blkaddr); update_sit_entry(sbi, old_blkaddr, -1); + } locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr)); -- GitLab From a08d7ea10bc7df3b0177d7de2aab3805899fb8fd Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 3 Sep 2018 11:08:15 -0700 Subject: [PATCH 0090/2269] act_ife: fix a potential use-after-free [ Upstream commit 6d784f1625ea68783cc1fb17de8f6cd3e1660c3f ] Immediately after module_put(), user could delete this module, so e->ops could be already freed before we call e->ops->release(). Fix this by moving module_put() after ops->release(). Fixes: ef6980b6becb ("introduce IFE action") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_ife.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 85757af7f1508..711ccc4607cc5 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -377,7 +377,6 @@ static void _tcf_ife_cleanup(struct tc_action *a, int bind) struct tcf_meta_info *e, *n; list_for_each_entry_safe(e, n, &ife->metalist, metalist) { - module_put(e->ops->owner); list_del(&e->metalist); if (e->metaval) { if (e->ops->release) @@ -385,6 +384,7 @@ static void _tcf_ife_cleanup(struct tc_action *a, int bind) else kfree(e->metaval); } + module_put(e->ops->owner); kfree(e); } } -- GitLab From a16405ad27f6952f7f18460cda40cf92605b5215 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Aug 2018 13:30:45 -0700 Subject: [PATCH 0091/2269] ipv4: tcp: send zero IPID for RST and ACK sent in SYN-RECV and TIME-WAIT state [ Upstream commit 431280eebed9f5079553daf003011097763e71fd ] tcp uses per-cpu (and per namespace) sockets (net->ipv4.tcp_sk) internally to send some control packets. 1) RST packets, through tcp_v4_send_reset() 2) ACK packets in SYN-RECV and TIME-WAIT state, through tcp_v4_send_ack() These packets assert IP_DF, and also use the hashed IP ident generator to provide an IPv4 ID number. Geoff Alexander reported this could be used to build off-path attacks. These packets should not be fragmented, since their size is smaller than IPV4_MIN_MTU. Only some tunneled paths could eventually have to fragment, regardless of inner IPID. We really can use zero IPID, to address the flaw, and as a bonus, avoid a couple of atomic operations in ip_idents_reserve() Signed-off-by: Eric Dumazet Reported-by: Geoff Alexander Tested-by: Geoff Alexander Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_ipv4.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a95ccdceb7979..0e1a670dabd98 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2468,6 +2468,12 @@ static int __net_init tcp_sk_init(struct net *net) if (res) goto fail; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); + + /* Please enforce IP_DF and IPID==0 for RST and + * ACK sent in SYN-RECV and TIME-WAIT state. + */ + inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO; + *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk; } -- GitLab From 1ef819e411f8d970cae5de9a07ec520ba307f02a Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Tue, 28 Aug 2018 12:33:15 -0700 Subject: [PATCH 0092/2269] net: bcmgenet: use MAC link status for fixed phy [ Upstream commit c3c397c1f16c51601a3fac4fe0c63ad8aa85a904 ] When using the fixed PHY with GENET (e.g. MOCA) the PHY link status can be determined from the internal link status captured by the MAC. This allows the PHY state machine to use the correct link state with the fixed PHY even if MAC link event interrupts are missed when the net device is opened. Fixes: 8d88c6ebb34c ("net: bcmgenet: enable MoCA link state change detection") Signed-off-by: Doug Berger Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/genet/bcmgenet.h | 3 +++ drivers/net/ethernet/broadcom/genet/bcmmii.c | 10 ++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 4c49d0b977483..9d499c5c8f8aa 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -185,6 +185,9 @@ struct bcmgenet_mib_counters { #define UMAC_MAC1 0x010 #define UMAC_MAX_FRAME_LEN 0x014 +#define UMAC_MODE 0x44 +#define MODE_LINK_STATUS (1 << 5) + #define UMAC_EEE_CTRL 0x064 #define EN_LPI_RX_PAUSE (1 << 0) #define EN_LPI_TX_PFC (1 << 1) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 18f5723be2c91..6ad0ca7ed3e91 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -115,8 +115,14 @@ void bcmgenet_mii_setup(struct net_device *dev) static int bcmgenet_fixed_phy_link_update(struct net_device *dev, struct fixed_phy_status *status) { - if (dev && dev->phydev && status) - status->link = dev->phydev->link; + struct bcmgenet_priv *priv; + u32 reg; + + if (dev && dev->phydev && status) { + priv = netdev_priv(dev); + reg = bcmgenet_umac_readl(priv, UMAC_MODE); + status->link = !!(reg & MODE_LINK_STATUS); + } return 0; } -- GitLab From cb765f5c3c5b1e982112e4ccccb80bd90f7cb7bf Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Thu, 23 Aug 2018 10:45:22 +0300 Subject: [PATCH 0093/2269] net: macb: do not disable MDIO bus at open/close time [ Upstream commit 0da70f808029476001109b6cb076737bc04cea2e ] macb_reset_hw() is called from macb_close() and indirectly from macb_open(). macb_reset_hw() zeroes the NCR register, including the MPE (Management Port Enable) bit. This will prevent accessing any other PHYs for other Ethernet MACs on the MDIO bus, which remains registered at macb_reset_hw() time, until macb_init_hw() is called from macb_open() which sets the MPE bit again. I.e. currently the MDIO bus has a short disruption at open time and is disabled at close time until the interface is opened again. Fix that by only touching the RE and TE bits when enabling and disabling RX/TX. v2: Make macb_init_hw() NCR write a single statement. Fixes: 6c36a7074436 ("macb: Use generic PHY layer") Signed-off-by: Anssi Hannula Reviewed-by: Claudiu Beznea Tested-by: Claudiu Beznea Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cadence/macb_main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 6df2cad61647a..dfef4ec167c18 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1884,14 +1884,17 @@ static void macb_reset_hw(struct macb *bp) { struct macb_queue *queue; unsigned int q; + u32 ctrl = macb_readl(bp, NCR); /* Disable RX and TX (XXX: Should we halt the transmission * more gracefully?) */ - macb_writel(bp, NCR, 0); + ctrl &= ~(MACB_BIT(RE) | MACB_BIT(TE)); /* Clear the stats registers (XXX: Update stats first?) */ - macb_writel(bp, NCR, MACB_BIT(CLRSTAT)); + ctrl |= MACB_BIT(CLRSTAT); + + macb_writel(bp, NCR, ctrl); /* Clear all status flags */ macb_writel(bp, TSR, -1); @@ -2070,7 +2073,7 @@ static void macb_init_hw(struct macb *bp) } /* Enable TX and RX */ - macb_writel(bp, NCR, MACB_BIT(RE) | MACB_BIT(TE) | MACB_BIT(MPE)); + macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(RE) | MACB_BIT(TE)); } /* The hash address register is 64 bits long and takes up two -- GitLab From 7f1e6ec4ff1234bb2bcf1dd7763cd16e66d6a3fe Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 25 Aug 2018 22:58:01 -0700 Subject: [PATCH 0094/2269] net: sched: Fix memory exposure from short TCA_U32_SEL [ Upstream commit 98c8f125fd8a6240ea343c1aa50a1be9047791b8 ] Via u32_change(), TCA_U32_SEL has an unspecified type in the netlink policy, so max length isn't enforced, only minimum. This means nkeys (from userspace) was being trusted without checking the actual size of nla_len(), which could lead to a memory over-read, and ultimately an exposure via a call to u32_dump(). Reachability is CAP_NET_ADMIN within a namespace. Reported-by: Al Viro Cc: Jamal Hadi Salim Cc: Cong Wang Cc: Jiri Pirko Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_u32.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index ba37d8f57e686..0c9bc29dcf97a 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -903,6 +903,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_U32_MAX + 1]; u32 htid, flags = 0; + size_t sel_size; int err; #ifdef CONFIG_CLS_U32_PERF size_t size; @@ -1024,8 +1025,11 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; s = nla_data(tb[TCA_U32_SEL]); + sel_size = sizeof(*s) + sizeof(*s->keys) * s->nkeys; + if (nla_len(tb[TCA_U32_SEL]) < sel_size) + return -EINVAL; - n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL); + n = kzalloc(offsetof(typeof(*n), sel) + sel_size, GFP_KERNEL); if (n == NULL) return -ENOBUFS; @@ -1038,7 +1042,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, } #endif - memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key)); + memcpy(&n->sel, s, sel_size); RCU_INIT_POINTER(n->ht_up, ht); n->handle = handle; n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0; -- GitLab From d19688e347a17edcdca2f90be9d15cc5cc09daea Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Thu, 23 Aug 2018 13:20:52 -0700 Subject: [PATCH 0095/2269] qlge: Fix netdev features configuration. [ Upstream commit 6750c87074c5b534d82fdaabb1deb45b8f1f57de ] qlge_fix_features() is not supposed to modify hardware or driver state, rather it is supposed to only fix requested fetures bits. Currently qlge_fix_features() also goes for interface down and up unnecessarily if there is not even any change in features set. This patch changes/fixes following - 1) Move reload of interface or device re-config from qlge_fix_features() to qlge_set_features(). 2) Reload of interface in qlge_set_features() only if relevant feature bit (NETIF_F_HW_VLAN_CTAG_RX) is changed. 3) Get rid of qlge_fix_features() since driver is not really required to fix any features bit. Signed-off-by: Manish Reviewed-by: Benjamin Poirier Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qlge/qlge_main.c | 23 +++++++------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index 9feec70094435..0e3b2890b9258 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -2386,26 +2386,20 @@ static int qlge_update_hw_vlan_features(struct net_device *ndev, return status; } -static netdev_features_t qlge_fix_features(struct net_device *ndev, - netdev_features_t features) -{ - int err; - - /* Update the behavior of vlan accel in the adapter */ - err = qlge_update_hw_vlan_features(ndev, features); - if (err) - return err; - - return features; -} - static int qlge_set_features(struct net_device *ndev, netdev_features_t features) { netdev_features_t changed = ndev->features ^ features; + int err; + + if (changed & NETIF_F_HW_VLAN_CTAG_RX) { + /* Update the behavior of vlan accel in the adapter */ + err = qlge_update_hw_vlan_features(ndev, features); + if (err) + return err; - if (changed & NETIF_F_HW_VLAN_CTAG_RX) qlge_vlan_mode(ndev, features); + } return 0; } @@ -4719,7 +4713,6 @@ static const struct net_device_ops qlge_netdev_ops = { .ndo_set_mac_address = qlge_set_mac_address, .ndo_validate_addr = eth_validate_addr, .ndo_tx_timeout = qlge_tx_timeout, - .ndo_fix_features = qlge_fix_features, .ndo_set_features = qlge_set_features, .ndo_vlan_rx_add_vid = qlge_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = qlge_vlan_rx_kill_vid, -- GitLab From 3eada53de462d8bbeb826e52ebbd419fea1ee00d Mon Sep 17 00:00:00 2001 From: Anthony Wong Date: Fri, 31 Aug 2018 20:06:42 +0800 Subject: [PATCH 0096/2269] r8169: add support for NCube 8168 network card [ Upstream commit 9fd0e09a4e86499639653243edfcb417a05c5c46 ] This card identifies itself as: Ethernet controller [0200]: NCube Device [10ff:8168] (rev 06) Subsystem: TP-LINK Technologies Co., Ltd. Device [7470:3468] Adding a new entry to rtl8169_pci_tbl makes the card work. Link: http://launchpad.net/bugs/1788730 Signed-off-by: Anthony Wong Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169.c | 1 + include/linux/pci_ids.h | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index b98fcc9e93e5a..3669005b9294a 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -329,6 +329,7 @@ static const struct pci_device_id rtl8169_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8161), 0, 0, RTL_CFG_1 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8167), 0, 0, RTL_CFG_0 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8168), 0, 0, RTL_CFG_1 }, + { PCI_DEVICE(PCI_VENDOR_ID_NCUBE, 0x8168), 0, 0, RTL_CFG_1 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8169), 0, 0, RTL_CFG_0 }, { PCI_VENDOR_ID_DLINK, 0x4300, PCI_VENDOR_ID_DLINK, 0x4b10, 0, 0, RTL_CFG_1 }, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index ab20dc5db423c..7fa3f1498b340 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -3062,4 +3062,6 @@ #define PCI_VENDOR_ID_OCZ 0x1b85 +#define PCI_VENDOR_ID_NCUBE 0x10ff + #endif /* _LINUX_PCI_IDS_H */ -- GitLab From e4b6c5fd31bddadb97058f38db2ae1a819d65d42 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 30 Aug 2018 14:24:29 +0200 Subject: [PATCH 0097/2269] tcp: do not restart timewait timer on rst reception [ Upstream commit 63cc357f7bba6729869565a12df08441a5995d9a ] RFC 1337 says: ''Ignore RST segments in TIME-WAIT state. If the 2 minute MSL is enforced, this fix avoids all three hazards.'' So with net.ipv4.tcp_rfc1337=1, expected behaviour is to have TIME-WAIT sk expire rather than removing it instantly when a reset is received. However, Linux will also re-start the TIME-WAIT timer. This causes connect to fail when tying to re-use ports or very long delays (until syn retry interval exceeds MSL). packetdrill test case: // Demonstrate bogus rearming of TIME-WAIT timer in rfc1337 mode. `sysctl net.ipv4.tcp_rfc1337=1` 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 0.000 bind(3, ..., ...) = 0 0.000 listen(3, 1) = 0 0.100 < S 0:0(0) win 29200 0.100 > S. 0:0(0) ack 1 0.200 < . 1:1(0) ack 1 win 257 0.200 accept(3, ..., ...) = 4 // Receive first segment 0.310 < P. 1:1001(1000) ack 1 win 46 // Send one ACK 0.310 > . 1:1(0) ack 1001 // read 1000 byte 0.310 read(4, ..., 1000) = 1000 // Application writes 100 bytes 0.350 write(4, ..., 100) = 100 0.350 > P. 1:101(100) ack 1001 // ACK 0.500 < . 1001:1001(0) ack 101 win 257 // close the connection 0.600 close(4) = 0 0.600 > F. 101:101(0) ack 1001 win 244 // Our side is in FIN_WAIT_1 & waits for ack to fin 0.7 < . 1001:1001(0) ack 102 win 244 // Our side is in FIN_WAIT_2 with no outstanding data. 0.8 < F. 1001:1001(0) ack 102 win 244 0.8 > . 102:102(0) ack 1002 win 244 // Our side is now in TIME_WAIT state, send ack for fin. 0.9 < F. 1002:1002(0) ack 102 win 244 0.9 > . 102:102(0) ack 1002 win 244 // Peer reopens with in-window SYN: 1.000 < S 1000:1000(0) win 9200 // Therefore, reply with ACK. 1.000 > . 102:102(0) ack 1002 win 244 // Peer sends RST for this ACK. Normally this RST results // in tw socket removal, but rfc1337=1 setting prevents this. 1.100 < R 1002:1002(0) win 244 // second syn. Due to rfc1337=1 expect another pure ACK. 31.0 < S 1000:1000(0) win 9200 31.0 > . 102:102(0) ack 1002 win 244 // .. and another RST from peer. 31.1 < R 1002:1002(0) win 244 31.2 `echo no timer restart;ss -m -e -a -i -n -t -o state TIME-WAIT` // third syn after one minute. Time-Wait socket should have expired by now. 63.0 < S 1000:1000(0) win 9200 // so we expect a syn-ack & 3whs to proceed from here on. 63.0 > S. 0:0(0) ack 1 Without this patch, 'ss' shows restarts of tw timer and last packet is thus just another pure ack, more than one minute later. This restores the original code from commit 283fd6cf0be690a83 ("Merge in ANK networking jumbo patch") in netdev-vger-cvs.git . For some reason the else branch was removed/lost in 1f28b683339f7 ("Merge in TCP/UDP optimizations and [..]") and timer restart became unconditional. Reported-by: Michal Tesar Signed-off-by: Florian Westphal Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_minisocks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 420fecbb98fe7..61584638dba7f 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -185,8 +185,9 @@ kill: inet_twsk_deschedule_put(tw); return TCP_TW_SUCCESS; } + } else { + inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); } - inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); if (tmp_opt.saw_tstamp) { tcptw->tw_ts_recent = tmp_opt.rcv_tsval; -- GitLab From 56af4184d356e768f977a4bf75839722b83ce97b Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Thu, 23 Aug 2018 19:49:54 +0300 Subject: [PATCH 0098/2269] vti6: remove !skb->ignore_df check from vti6_xmit() [ Upstream commit 9f2895461439fda2801a7906fb4c5fb3dbb37a0a ] Before the commit d6990976af7c ("vti6: fix PMTU caching and reporting on xmit") '!skb->ignore_df' check was always true because the function skb_scrub_packet() was called before it, resetting ignore_df to zero. In the commit, skb_scrub_packet() was moved below, and now this check can be false for the packet, e.g. when sending it in the two fragments, this prevents successful PMTU updates in such case. The next attempts to send the packet lead to the same tx error. Moreover, vti6 initial MTU value relies on PMTU adjustments. This issue can be reproduced with the following LTP test script: udp_ipsec_vti.sh -6 -p ah -m tunnel -s 2000 Fixes: ccd740cbc6e0 ("vti6: Add pmtu handling to vti6_xmit.") Signed-off-by: Alexey Kodanev Acked-by: Steffen Klassert Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_vti.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index b9e638cc955fa..db5a24f093352 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -481,7 +481,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) } mtu = dst_mtu(dst); - if (!skb->ignore_df && skb->len > mtu) { + if (skb->len > mtu) { skb_dst_update_pmtu(skb, mtu); if (skb->protocol == htons(ETH_P_IPV6)) { -- GitLab From 417b068a6a7a42073d72480e1d5722dbe01e878e Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 27 Aug 2018 22:56:22 +0200 Subject: [PATCH 0099/2269] net/sched: act_pedit: fix dump of extended layered op [ Upstream commit 85eb9af182243ce9a8b72410d5321c440ac5f8d7 ] in the (rare) case of failure in nla_nest_start(), missing NULL checks in tcf_pedit_key_ex_dump() can make the following command # tc action add action pedit ex munge ip ttl set 64 dereference a NULL pointer: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 PGD 800000007d1cd067 P4D 800000007d1cd067 PUD 7acd3067 PMD 0 Oops: 0002 [#1] SMP PTI CPU: 0 PID: 3336 Comm: tc Tainted: G E 4.18.0.pedit+ #425 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:tcf_pedit_dump+0x19d/0x358 [act_pedit] Code: be 02 00 00 00 48 89 df 66 89 44 24 20 e8 9b b1 fd e0 85 c0 75 46 8b 83 c8 00 00 00 49 83 c5 08 48 03 83 d0 00 00 00 4d 39 f5 <66> 89 04 25 00 00 00 00 0f 84 81 01 00 00 41 8b 45 00 48 8d 4c 24 RSP: 0018:ffffb5d4004478a8 EFLAGS: 00010246 RAX: ffff8880fcda2070 RBX: ffff8880fadd2900 RCX: 0000000000000000 RDX: 0000000000000002 RSI: ffffb5d4004478ca RDI: ffff8880fcda206e RBP: ffff8880fb9cb900 R08: 0000000000000008 R09: ffff8880fcda206e R10: ffff8880fadd2900 R11: 0000000000000000 R12: ffff8880fd26cf40 R13: ffff8880fc957430 R14: ffff8880fc957430 R15: ffff8880fb9cb988 FS: 00007f75a537a740(0000) GS:ffff8880fda00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000007a2fa005 CR4: 00000000001606f0 Call Trace: ? __nla_reserve+0x38/0x50 tcf_action_dump_1+0xd2/0x130 tcf_action_dump+0x6a/0xf0 tca_get_fill.constprop.31+0xa3/0x120 tcf_action_add+0xd1/0x170 tc_ctl_action+0x137/0x150 rtnetlink_rcv_msg+0x263/0x2d0 ? _cond_resched+0x15/0x40 ? rtnl_calcit.isra.30+0x110/0x110 netlink_rcv_skb+0x4d/0x130 netlink_unicast+0x1a3/0x250 netlink_sendmsg+0x2ae/0x3a0 sock_sendmsg+0x36/0x40 ___sys_sendmsg+0x26f/0x2d0 ? do_wp_page+0x8e/0x5f0 ? handle_pte_fault+0x6c3/0xf50 ? __handle_mm_fault+0x38e/0x520 ? __sys_sendmsg+0x5e/0xa0 __sys_sendmsg+0x5e/0xa0 do_syscall_64+0x5b/0x180 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f75a4583ba0 Code: c3 48 8b 05 f2 62 2c 00 f7 db 64 89 18 48 83 cb ff eb dd 0f 1f 80 00 00 00 00 83 3d fd c3 2c 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 ae cc 00 00 48 89 04 24 RSP: 002b:00007fff60ee7418 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007fff60ee7540 RCX: 00007f75a4583ba0 RDX: 0000000000000000 RSI: 00007fff60ee7490 RDI: 0000000000000003 RBP: 000000005b842d3e R08: 0000000000000002 R09: 0000000000000000 R10: 00007fff60ee6ea0 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fff60ee7554 R14: 0000000000000001 R15: 000000000066c100 Modules linked in: act_pedit(E) ip6table_filter ip6_tables iptable_filter binfmt_misc crct10dif_pclmul ext4 crc32_pclmul mbcache ghash_clmulni_intel jbd2 pcbc snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd snd_timer cryptd glue_helper snd joydev pcspkr soundcore virtio_balloon i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c ata_generic pata_acpi virtio_net net_failover virtio_blk virtio_console failover qxl crc32c_intel drm_kms_helper syscopyarea serio_raw sysfillrect sysimgblt fb_sys_fops ttm drm ata_piix virtio_pci libata virtio_ring i2c_core virtio floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: act_pedit] CR2: 0000000000000000 Like it's done for other TC actions, give up dumping pedit rules and return an error if nla_nest_start() returns NULL. Fixes: 71d0ed7079df ("net/act_pedit: Support using offset relative to the conventional network headers") Signed-off-by: Davide Caratti Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_pedit.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 51ab463d9e168..656b6ada92210 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -109,16 +109,18 @@ static int tcf_pedit_key_ex_dump(struct sk_buff *skb, { struct nlattr *keys_start = nla_nest_start(skb, TCA_PEDIT_KEYS_EX); + if (!keys_start) + goto nla_failure; for (; n > 0; n--) { struct nlattr *key_start; key_start = nla_nest_start(skb, TCA_PEDIT_KEY_EX); + if (!key_start) + goto nla_failure; if (nla_put_u16(skb, TCA_PEDIT_KEY_EX_HTYPE, keys_ex->htype) || - nla_put_u16(skb, TCA_PEDIT_KEY_EX_CMD, keys_ex->cmd)) { - nlmsg_trim(skb, keys_start); - return -EINVAL; - } + nla_put_u16(skb, TCA_PEDIT_KEY_EX_CMD, keys_ex->cmd)) + goto nla_failure; nla_nest_end(skb, key_start); @@ -128,6 +130,9 @@ static int tcf_pedit_key_ex_dump(struct sk_buff *skb, nla_nest_end(skb, keys_start); return 0; +nla_failure: + nla_nest_cancel(skb, keys_start); + return -EINVAL; } static int tcf_pedit_init(struct net *net, struct nlattr *nla, @@ -395,7 +400,10 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, opt->bindcnt = p->tcf_bindcnt - bind; if (p->tcfp_keys_ex) { - tcf_pedit_key_ex_dump(skb, p->tcfp_keys_ex, p->tcfp_nkeys); + if (tcf_pedit_key_ex_dump(skb, + p->tcfp_keys_ex, + p->tcfp_nkeys)) + goto nla_put_failure; if (nla_put(skb, TCA_PEDIT_PARMS_EX, s, opt)) goto nla_put_failure; -- GitLab From 8fed734df37511ecfb522adb7dfe8541de362349 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 23 Aug 2018 16:19:44 -0700 Subject: [PATCH 0100/2269] tipc: fix a missing rhashtable_walk_exit() [ Upstream commit bd583fe30427500a2d0abe25724025b1cb5e2636 ] rhashtable_walk_exit() must be paired with rhashtable_walk_enter(). Fixes: 40f9f4397060 ("tipc: Fix tipc_sk_reinit race conditions") Cc: Herbert Xu Cc: Ying Xue Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/socket.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 98a44ecb11e7b..0aebf0695ae02 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2268,6 +2268,8 @@ void tipc_sk_reinit(struct net *net) walk_stop: rhashtable_walk_stop(&iter); } while (tsk == ERR_PTR(-EAGAIN)); + + rhashtable_walk_exit(&iter); } static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid) -- GitLab From 456e46f05b37e016628c07fa5e63ec42749d13d3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 29 Aug 2018 12:46:08 -0700 Subject: [PATCH 0101/2269] nfp: wait for posted reconfigs when disabling the device [ Upstream commit 9ad716b95fd6c6be46a4f2d5936e514b5bcd744d ] To avoid leaking a running timer we need to wait for the posted reconfigs after netdev is unregistered. In common case the process of deinitializing the device will perform synchronous reconfigs which wait for posted requests, but especially with VXLAN ports being actively added and removed there can be a race condition leaving a timer running after adapter structure is freed leading to a crash. Add an explicit flush after deregistering and for a good measure a warning to check if timer is running just before structures are freed. Fixes: 3d780b926a12 ("nfp: add async reconfiguration mechanism") Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../ethernet/netronome/nfp/nfp_net_common.c | 48 +++++++++++++------ 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 8d53a593fb274..b482a8fb0e927 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -227,29 +227,16 @@ done: spin_unlock_bh(&nn->reconfig_lock); } -/** - * nfp_net_reconfig() - Reconfigure the firmware - * @nn: NFP Net device to reconfigure - * @update: The value for the update field in the BAR config - * - * Write the update word to the BAR and ping the reconfig queue. The - * poll until the firmware has acknowledged the update by zeroing the - * update word. - * - * Return: Negative errno on error, 0 on success - */ -int nfp_net_reconfig(struct nfp_net *nn, u32 update) +static void nfp_net_reconfig_sync_enter(struct nfp_net *nn) { bool cancelled_timer = false; u32 pre_posted_requests; - int ret; spin_lock_bh(&nn->reconfig_lock); nn->reconfig_sync_present = true; if (nn->reconfig_timer_active) { - del_timer(&nn->reconfig_timer); nn->reconfig_timer_active = false; cancelled_timer = true; } @@ -258,14 +245,43 @@ int nfp_net_reconfig(struct nfp_net *nn, u32 update) spin_unlock_bh(&nn->reconfig_lock); - if (cancelled_timer) + if (cancelled_timer) { + del_timer_sync(&nn->reconfig_timer); nfp_net_reconfig_wait(nn, nn->reconfig_timer.expires); + } /* Run the posted reconfigs which were issued before we started */ if (pre_posted_requests) { nfp_net_reconfig_start(nn, pre_posted_requests); nfp_net_reconfig_wait(nn, jiffies + HZ * NFP_NET_POLL_TIMEOUT); } +} + +static void nfp_net_reconfig_wait_posted(struct nfp_net *nn) +{ + nfp_net_reconfig_sync_enter(nn); + + spin_lock_bh(&nn->reconfig_lock); + nn->reconfig_sync_present = false; + spin_unlock_bh(&nn->reconfig_lock); +} + +/** + * nfp_net_reconfig() - Reconfigure the firmware + * @nn: NFP Net device to reconfigure + * @update: The value for the update field in the BAR config + * + * Write the update word to the BAR and ping the reconfig queue. The + * poll until the firmware has acknowledged the update by zeroing the + * update word. + * + * Return: Negative errno on error, 0 on success + */ +int nfp_net_reconfig(struct nfp_net *nn, u32 update) +{ + int ret; + + nfp_net_reconfig_sync_enter(nn); nfp_net_reconfig_start(nn, update); ret = nfp_net_reconfig_wait(nn, jiffies + HZ * NFP_NET_POLL_TIMEOUT); @@ -3560,6 +3576,7 @@ struct nfp_net *nfp_net_alloc(struct pci_dev *pdev, bool needs_netdev, */ void nfp_net_free(struct nfp_net *nn) { + WARN_ON(timer_pending(&nn->reconfig_timer) || nn->reconfig_posted); if (nn->xdp_prog) bpf_prog_put(nn->xdp_prog); @@ -3829,4 +3846,5 @@ void nfp_net_clean(struct nfp_net *nn) return; unregister_netdev(nn->dp.netdev); + nfp_net_reconfig_wait_posted(nn); } -- GitLab From 3c035a48e7746d2b6f76719376473ad5d12e423a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 27 Aug 2018 18:38:31 +0800 Subject: [PATCH 0102/2269] sctp: hold transport before accessing its asoc in sctp_transport_get_next [ Upstream commit bab1be79a5169ac748d8292b20c86d874022d7ba ] As Marcelo noticed, in sctp_transport_get_next, it is iterating over transports but then also accessing the association directly, without checking any refcnts before that, which can cause an use-after-free Read. So fix it by holding transport before accessing the association. With that, sctp_transport_hold calls can be removed in the later places. Fixes: 626d16f50f39 ("sctp: export some apis or variables for sctp_diag and reuse some for proc") Reported-by: syzbot+fe62a0c9aa6a85c6de16@syzkaller.appspotmail.com Signed-off-by: Xin Long Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/proc.c | 4 ---- net/sctp/socket.c | 22 +++++++++++++++------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 26b4be6b41720..6c82a959fc6e2 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -335,8 +335,6 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) } transport = (struct sctp_transport *)v; - if (!sctp_transport_hold(transport)) - return 0; assoc = transport->asoc; epb = &assoc->base; sk = epb->sk; @@ -426,8 +424,6 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) } transport = (struct sctp_transport *)v; - if (!sctp_transport_hold(transport)) - return 0; assoc = transport->asoc; list_for_each_entry_rcu(tsp, &assoc->peer.transport_addr_list, diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 2d6f612f32c3e..7900943111431 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4660,9 +4660,14 @@ struct sctp_transport *sctp_transport_get_next(struct net *net, break; } + if (!sctp_transport_hold(t)) + continue; + if (net_eq(sock_net(t->asoc->base.sk), net) && t->asoc->peer.primary_path == t) break; + + sctp_transport_put(t); } return t; @@ -4672,13 +4677,18 @@ struct sctp_transport *sctp_transport_get_idx(struct net *net, struct rhashtable_iter *iter, int pos) { - void *obj = SEQ_START_TOKEN; + struct sctp_transport *t; - while (pos && (obj = sctp_transport_get_next(net, iter)) && - !IS_ERR(obj)) - pos--; + if (!pos) + return SEQ_START_TOKEN; - return obj; + while ((t = sctp_transport_get_next(net, iter)) && !IS_ERR(t)) { + if (!--pos) + break; + sctp_transport_put(t); + } + + return t; } int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), @@ -4738,8 +4748,6 @@ again: tsp = sctp_transport_get_idx(net, &hti, *pos + 1); for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) { - if (!sctp_transport_hold(tsp)) - continue; ret = cb(tsp, p); if (ret) break; -- GitLab From fe0d111fea19e7d43d78e6b49f1d682e7177706d Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 24 Aug 2018 15:41:35 +0300 Subject: [PATCH 0103/2269] mlxsw: spectrum_switchdev: Do not leak RIFs when removing bridge [ Upstream commit 602b74eda81311dbdb5dbab08c30f789f648ebdc ] When a bridge device is removed, the VLANs are flushed from each configured port. This causes the ports to decrement the reference count on the associated FIDs (filtering identifier). If the reference count of a FID is 1 and it has a RIF (router interface), then this RIF is destroyed. However, if no port is member in the VLAN for which a RIF exists, then the RIF will continue to exist after the removal of the bridge. To reproduce: # ip link add name br0 type bridge vlan_filtering 1 # ip link set dev swp1 master br0 # ip link add link br0 name br0.10 type vlan id 10 # ip address add 192.0.2.0/24 dev br0.10 # ip link del dev br0 The RIF associated with br0.10 continues to exist. Fix this by iterating over all the bridge device uppers when it is destroyed and take care of destroying their RIFs. Fixes: 99f44bb3527b ("mlxsw: spectrum: Enable L3 interfaces on top of bridge devices") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/mellanox/mlxsw/spectrum.h | 2 ++ .../ethernet/mellanox/mlxsw/spectrum_router.c | 11 ++++++++++ .../mellanox/mlxsw/spectrum_switchdev.c | 20 +++++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 8c4ce0a0cc825..06eeea6b2f931 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -395,6 +395,8 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, void mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif); +void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev); /* spectrum_kvdl.c */ int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 516e632446067..3ed4fb346f235 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5131,6 +5131,17 @@ void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) mlxsw_sp_vr_put(vr); } +void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev) +{ + struct mlxsw_sp_rif *rif; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!rif) + return; + mlxsw_sp_rif_destroy(rif); +} + static void mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params, struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 7924f241e3ad0..32c25772f7558 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -140,6 +140,24 @@ bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp, return !!mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); } +static int mlxsw_sp_bridge_device_upper_rif_destroy(struct net_device *dev, + void *data) +{ + struct mlxsw_sp *mlxsw_sp = data; + + mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, dev); + return 0; +} + +static void mlxsw_sp_bridge_device_rifs_destroy(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev) +{ + mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, dev); + netdev_walk_all_upper_dev_rcu(dev, + mlxsw_sp_bridge_device_upper_rif_destroy, + mlxsw_sp); +} + static struct mlxsw_sp_bridge_device * mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge, struct net_device *br_dev) @@ -176,6 +194,8 @@ static void mlxsw_sp_bridge_device_destroy(struct mlxsw_sp_bridge *bridge, struct mlxsw_sp_bridge_device *bridge_device) { + mlxsw_sp_bridge_device_rifs_destroy(bridge->mlxsw_sp, + bridge_device->dev); list_del(&bridge_device->list); if (bridge_device->vlan_enabled) bridge->vlan_enabled_exists = false; -- GitLab From bf82c2cb14509281df54cd1801b783cd15c0c98b Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 24 Aug 2018 16:53:13 +0800 Subject: [PATCH 0104/2269] vhost: correctly check the iova range when waking virtqueue [ Upstream commit 2d66f997f0545c8f7fc5cf0b49af1decb35170e7 ] We don't wakeup the virtqueue if the first byte of pending iova range is the last byte of the range we just got updated. This will lead a virtqueue to wait for IOTLB updating forever. Fixing by correct the check and wake up the virtqueue in this case. Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API") Reported-by: Peter Xu Signed-off-by: Jason Wang Reviewed-by: Peter Xu Tested-by: Peter Xu Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vhost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 3cf74f54c7a1e..7ee3167bc083e 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -960,7 +960,7 @@ static void vhost_iotlb_notify_vq(struct vhost_dev *d, list_for_each_entry_safe(node, n, &d->pending_list, node) { struct vhost_iotlb_msg *vq_msg = &node->msg.iotlb; if (msg->iova <= vq_msg->iova && - msg->iova + msg->size - 1 > vq_msg->iova && + msg->iova + msg->size - 1 >= vq_msg->iova && vq_msg->type == VHOST_IOTLB_MISS) { vhost_poll_queue(&node->vq->poll); list_del(&node->node); -- GitLab From dadb0110a49dd50b94efd89c42157e39bceaed19 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 21 Aug 2018 10:40:38 -0700 Subject: [PATCH 0105/2269] hv_netvsc: ignore devices that are not PCI [ Upstream commit b93c1b5ac8643cc08bb74fa8ae21d6c63dfcb23d ] Registering another device with same MAC address (such as TAP, VPN or DPDK KNI) will confuse the VF autobinding logic. Restrict the search to only run if the device is known to be a PCI attached VF. Fixes: e8ff40d4bff1 ("hv_netvsc: improve VF device matching") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/netvsc_drv.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 6a77ef38c5495..66bbbef0fd2ce 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -1895,11 +1896,15 @@ static int netvsc_register_vf(struct net_device *vf_netdev) { struct net_device *ndev; struct net_device_context *net_device_ctx; + struct device *pdev = vf_netdev->dev.parent; struct netvsc_device *netvsc_dev; if (vf_netdev->addr_len != ETH_ALEN) return NOTIFY_DONE; + if (!pdev || !dev_is_pci(pdev) || dev_is_pf(pdev)) + return NOTIFY_DONE; + /* * We will use the MAC address to locate the synthetic interface to * associate with the VF interface. If we don't find a matching -- GitLab From 8aa07625eb810fa845311a8480f9ec70d7d93faa Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 30 Aug 2018 05:42:13 +0000 Subject: [PATCH 0106/2269] hv_netvsc: Fix a deadlock by getting rtnl lock earlier in netvsc_probe() [ Upstream commit e04e7a7bbd4bbabef4e1a58367e5fc9b2edc3b10 ] This patch fixes the race between netvsc_probe() and rndis_set_subchannel(), which can cause a deadlock. These are the related 3 paths which show the deadlock: path #1: Workqueue: hv_vmbus_con vmbus_onmessage_work [hv_vmbus] Call Trace: schedule schedule_preempt_disabled __mutex_lock __device_attach bus_probe_device device_add vmbus_device_register vmbus_onoffer vmbus_onmessage_work process_one_work worker_thread kthread ret_from_fork path #2: schedule schedule_preempt_disabled __mutex_lock netvsc_probe vmbus_probe really_probe __driver_attach bus_for_each_dev driver_attach_async async_run_entry_fn process_one_work worker_thread kthread ret_from_fork path #3: Workqueue: events netvsc_subchan_work [hv_netvsc] Call Trace: schedule rndis_set_subchannel netvsc_subchan_work process_one_work worker_thread kthread ret_from_fork Before path #1 finishes, path #2 can start to run, because just before the "bus_probe_device(dev);" in device_add() in path #1, there is a line "object_uevent(&dev->kobj, KOBJ_ADD);", so systemd-udevd can immediately try to load hv_netvsc and hence path #2 can start to run. Next, path #2 offloads the subchannal's initialization to a workqueue, i.e. path #3, so we can end up in a deadlock situation like this: Path #2 gets the device lock, and is trying to get the rtnl lock; Path #3 gets the rtnl lock and is waiting for all the subchannel messages to be processed; Path #1 is trying to get the device lock, but since #2 is not releasing the device lock, path #1 has to sleep; since the VMBus messages are processed one by one, this means the sub-channel messages can't be procedded, so #3 has to sleep with the rtnl lock held, and finally #2 has to sleep... Now all the 3 paths are sleeping and we hit the deadlock. With the patch, we can make sure #2 gets both the device lock and the rtnl lock together, gets its job done, and releases the locks, so #1 and #3 will not be blocked for ever. Fixes: 8195b1396ec8 ("hv_netvsc: fix deadlock on hotplug") Signed-off-by: Dexuan Cui Cc: Stephen Hemminger Cc: K. Y. Srinivasan Cc: Haiyang Zhang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/netvsc_drv.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 66bbbef0fd2ce..aba16d81e9bba 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2044,6 +2044,16 @@ static int netvsc_probe(struct hv_device *dev, memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); + /* We must get rtnl lock before scheduling nvdev->subchan_work, + * otherwise netvsc_subchan_work() can get rtnl lock first and wait + * all subchannels to show up, but that may not happen because + * netvsc_probe() can't get rtnl lock and as a result vmbus_onoffer() + * -> ... -> device_add() -> ... -> __device_attach() can't get + * the device lock, so all the subchannels can't be processed -- + * finally netvsc_subchan_work() hangs for ever. + */ + rtnl_lock(); + if (nvdev->num_chn > 1) schedule_work(&nvdev->subchan_work); @@ -2062,7 +2072,6 @@ static int netvsc_probe(struct hv_device *dev, else net->max_mtu = ETH_DATA_LEN; - rtnl_lock(); ret = register_netdevice(net); if (ret != 0) { pr_err("Unable to register netdev.\n"); -- GitLab From cd7330c06195d3ff8c7a601bac449b0e8b3f2bec Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:12 -0700 Subject: [PATCH 0107/2269] act_ife: move tcfa_lock down to where necessary [ Upstream commit 4e407ff5cd67ec76eeeea1deec227b7982dc7f66 ] The only time we need to take tcfa_lock is when adding a new metainfo to an existing ife->metalist. We don't need to take tcfa_lock so early and so broadly in tcf_ife_init(). This means we can always take ife_mod_lock first, avoid the reverse locking ordering warning as reported by Vlad. Reported-by: Vlad Buslov Tested-by: Vlad Buslov Cc: Vlad Buslov Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_ife.c | 37 +++++++++++++------------------------ 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 711ccc4607cc5..408584314af36 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -249,10 +249,8 @@ static int ife_validate_metatype(struct tcf_meta_ops *ops, void *val, int len) } /* called when adding new meta information - * under ife->tcf_lock for existing action */ -static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, - void *val, int len, bool exists) +static int load_metaops_and_vet(u32 metaid, void *val, int len) { struct tcf_meta_ops *ops = find_ife_oplist(metaid); int ret = 0; @@ -260,13 +258,9 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, if (!ops) { ret = -ENOENT; #ifdef CONFIG_MODULES - if (exists) - spin_unlock_bh(&ife->tcf_lock); rtnl_unlock(); request_module("ifemeta%u", metaid); rtnl_lock(); - if (exists) - spin_lock_bh(&ife->tcf_lock); ops = find_ife_oplist(metaid); #endif } @@ -283,10 +277,9 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, } /* called when adding new meta information - * under ife->tcf_lock for existing action */ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, - int len, bool atomic) + int len, bool atomic, bool exists) { struct tcf_meta_info *mi = NULL; struct tcf_meta_ops *ops = find_ife_oplist(metaid); @@ -313,12 +306,16 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, } } + if (exists) + spin_lock_bh(&ife->tcf_lock); list_add_tail(&mi->metalist, &ife->metalist); + if (exists) + spin_unlock_bh(&ife->tcf_lock); return ret; } -static int use_all_metadata(struct tcf_ife_info *ife) +static int use_all_metadata(struct tcf_ife_info *ife, bool exists) { struct tcf_meta_ops *o; int rc = 0; @@ -326,7 +323,7 @@ static int use_all_metadata(struct tcf_ife_info *ife) read_lock(&ife_mod_lock); list_for_each_entry(o, &ifeoplist, list) { - rc = add_metainfo(ife, o->metaid, NULL, 0, true); + rc = add_metainfo(ife, o->metaid, NULL, 0, true, exists); if (rc == 0) installed += 1; } @@ -398,7 +395,6 @@ static void tcf_ife_cleanup(struct tc_action *a, int bind) spin_unlock_bh(&ife->tcf_lock); } -/* under ife->tcf_lock for existing action */ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, bool exists) { @@ -412,11 +408,11 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, val = nla_data(tb[i]); len = nla_len(tb[i]); - rc = load_metaops_and_vet(ife, i, val, len, exists); + rc = load_metaops_and_vet(i, val, len); if (rc != 0) return rc; - rc = add_metainfo(ife, i, val, len, exists); + rc = add_metainfo(ife, i, val, len, false, exists); if (rc) return rc; } @@ -481,6 +477,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, if (exists) spin_lock_bh(&ife->tcf_lock); ife->tcf_action = parm->action; + if (exists) + spin_unlock_bh(&ife->tcf_lock); if (parm->flags & IFE_ENCODE) { if (daddr) @@ -508,9 +506,6 @@ metadata_parse_err: tcf_idr_release(*a, bind); if (ret == ACT_P_CREATED) _tcf_ife_cleanup(*a, bind); - - if (exists) - spin_unlock_bh(&ife->tcf_lock); return err; } @@ -524,20 +519,14 @@ metadata_parse_err: * as we can. You better have at least one else we are * going to bail out */ - err = use_all_metadata(ife); + err = use_all_metadata(ife, exists); if (err) { if (ret == ACT_P_CREATED) _tcf_ife_cleanup(*a, bind); - - if (exists) - spin_unlock_bh(&ife->tcf_lock); return err; } } - if (exists) - spin_unlock_bh(&ife->tcf_lock); - if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); -- GitLab From e54c50709019d8680ba55b520407f1ba82432683 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 19 Aug 2018 12:22:13 -0700 Subject: [PATCH 0108/2269] act_ife: fix a potential deadlock [ Upstream commit 5ffe57da29b3802baeddaa40909682bbb4cb4d48 ] use_all_metadata() acquires read_lock(&ife_mod_lock), then calls add_metainfo() which calls find_ife_oplist() which acquires the same lock again. Deadlock! Introduce __add_metainfo() which accepts struct tcf_meta_ops *ops as an additional parameter and let its callers to decide how to find it. For use_all_metadata(), it already has ops, no need to find it again, just call __add_metainfo() directly. And, as ife_mod_lock is only needed for find_ife_oplist(), this means we can make non-atomic allocation for populate_metalist() now. Fixes: 817e9f2c5c26 ("act_ife: acquire ife_mod_lock before reading ifeoplist") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_ife.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 408584314af36..0f81b81c1ff62 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -278,22 +278,16 @@ static int load_metaops_and_vet(u32 metaid, void *val, int len) /* called when adding new meta information */ -static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, - int len, bool atomic, bool exists) +static int __add_metainfo(const struct tcf_meta_ops *ops, + struct tcf_ife_info *ife, u32 metaid, void *metaval, + int len, bool atomic, bool exists) { struct tcf_meta_info *mi = NULL; - struct tcf_meta_ops *ops = find_ife_oplist(metaid); int ret = 0; - if (!ops) - return -ENOENT; - mi = kzalloc(sizeof(*mi), atomic ? GFP_ATOMIC : GFP_KERNEL); - if (!mi) { - /*put back what find_ife_oplist took */ - module_put(ops->owner); + if (!mi) return -ENOMEM; - } mi->metaid = metaid; mi->ops = ops; @@ -301,7 +295,6 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, ret = ops->alloc(mi, metaval, atomic ? GFP_ATOMIC : GFP_KERNEL); if (ret != 0) { kfree(mi); - module_put(ops->owner); return ret; } } @@ -315,6 +308,21 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, return ret; } +static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, + int len, bool exists) +{ + const struct tcf_meta_ops *ops = find_ife_oplist(metaid); + int ret; + + if (!ops) + return -ENOENT; + ret = __add_metainfo(ops, ife, metaid, metaval, len, false, exists); + if (ret) + /*put back what find_ife_oplist took */ + module_put(ops->owner); + return ret; +} + static int use_all_metadata(struct tcf_ife_info *ife, bool exists) { struct tcf_meta_ops *o; @@ -323,7 +331,7 @@ static int use_all_metadata(struct tcf_ife_info *ife, bool exists) read_lock(&ife_mod_lock); list_for_each_entry(o, &ifeoplist, list) { - rc = add_metainfo(ife, o->metaid, NULL, 0, true, exists); + rc = __add_metainfo(o, ife, o->metaid, NULL, 0, true, exists); if (rc == 0) installed += 1; } @@ -412,7 +420,7 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, if (rc != 0) return rc; - rc = add_metainfo(ife, i, val, len, false, exists); + rc = add_metainfo(ife, i, val, len, exists); if (rc) return rc; } -- GitLab From c9ba1b82ce96256bd4b05120dfb84d7062ba5376 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 4 Sep 2018 00:44:42 +0300 Subject: [PATCH 0109/2269] net: sched: action_ife: take reference to meta module [ Upstream commit 84cb8eb26cb9ce3c79928094962a475a9d850a53 ] Recent refactoring of add_metainfo() caused use_all_metadata() to add metainfo to ife action metalist without taking reference to module. This causes warning in module_put called from ife action cleanup function. Implement add_metainfo_and_get_ops() function that returns with reference to module taken if metainfo was added successfully, and call it from use_all_metadata(), instead of calling __add_metainfo() directly. Example warning: [ 646.344393] WARNING: CPU: 1 PID: 2278 at kernel/module.c:1139 module_put+0x1cb/0x230 [ 646.352437] Modules linked in: act_meta_skbtcindex act_meta_mark act_meta_skbprio act_ife ife veth nfsv3 nfs fscache xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_nat xt_conntrack nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c tun ebtable_filter ebtables ip6table_filter ip6_tables bridge stp llc mlx5_ib ib_uverbs ib_core intel_rapl sb_edac x86_pkg_temp_thermal mlx5_core coretemp kvm_intel kvm nfsd igb irqbypass crct10dif_pclmul devlink crc32_pclmul mei_me joydev ses crc32c_intel enclosure auth_rpcgss i2c_algo_bit ioatdma ptp mei pps_core ghash_clmulni_intel iTCO_wdt iTCO_vendor_support pcspkr dca ipmi_ssif lpc_ich target_core_mod i2c_i801 ipmi_si ipmi_devintf pcc_cpufreq wmi ipmi_msghandler nfs_acl lockd acpi_pad acpi_power_meter grace sunrpc mpt3sas raid_class scsi_transport_sas [ 646.425631] CPU: 1 PID: 2278 Comm: tc Not tainted 4.19.0-rc1+ #799 [ 646.432187] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017 [ 646.440595] RIP: 0010:module_put+0x1cb/0x230 [ 646.445238] Code: f3 66 94 02 e8 26 ff fa ff 85 c0 74 11 0f b6 1d 51 30 94 02 80 fb 01 77 60 83 e3 01 74 13 65 ff 0d 3a 83 db 73 e9 2b ff ff ff <0f> 0b e9 00 ff ff ff e8 59 01 fb ff 85 c0 75 e4 48 c7 c2 20 62 6b [ 646.464997] RSP: 0018:ffff880354d37068 EFLAGS: 00010286 [ 646.470599] RAX: 0000000000000000 RBX: ffffffffc0a52518 RCX: ffffffff8c2668db [ 646.478118] RDX: 0000000000000003 RSI: dffffc0000000000 RDI: ffffffffc0a52518 [ 646.485641] RBP: ffffffffc0a52180 R08: fffffbfff814a4a4 R09: fffffbfff814a4a3 [ 646.493164] R10: ffffffffc0a5251b R11: fffffbfff814a4a4 R12: 1ffff1006a9a6e0d [ 646.500687] R13: 00000000ffffffff R14: ffff880362bab890 R15: dead000000000100 [ 646.508213] FS: 00007f4164c99800(0000) GS:ffff88036fe40000(0000) knlGS:0000000000000000 [ 646.516961] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 646.523080] CR2: 00007f41638b8420 CR3: 0000000351df0004 CR4: 00000000001606e0 [ 646.530595] Call Trace: [ 646.533408] ? find_symbol_in_section+0x260/0x260 [ 646.538509] tcf_ife_cleanup+0x11b/0x200 [act_ife] [ 646.543695] tcf_action_cleanup+0x29/0xa0 [ 646.548078] __tcf_action_put+0x5a/0xb0 [ 646.552289] ? nla_put+0x65/0xe0 [ 646.555889] __tcf_idr_release+0x48/0x60 [ 646.560187] tcf_generic_walker+0x448/0x6b0 [ 646.564764] ? tcf_action_dump_1+0x450/0x450 [ 646.569411] ? __lock_is_held+0x84/0x110 [ 646.573720] ? tcf_ife_walker+0x10c/0x20f [act_ife] [ 646.578982] tca_action_gd+0x972/0xc40 [ 646.583129] ? tca_get_fill.constprop.17+0x250/0x250 [ 646.588471] ? mark_lock+0xcf/0x980 [ 646.592324] ? check_chain_key+0x140/0x1f0 [ 646.596832] ? debug_show_all_locks+0x240/0x240 [ 646.601839] ? memset+0x1f/0x40 [ 646.605350] ? nla_parse+0xca/0x1a0 [ 646.609217] tc_ctl_action+0x215/0x230 [ 646.613339] ? tcf_action_add+0x220/0x220 [ 646.617748] rtnetlink_rcv_msg+0x56a/0x6d0 [ 646.622227] ? rtnl_fdb_del+0x3f0/0x3f0 [ 646.626466] netlink_rcv_skb+0x18d/0x200 [ 646.630752] ? rtnl_fdb_del+0x3f0/0x3f0 [ 646.634959] ? netlink_ack+0x500/0x500 [ 646.639106] netlink_unicast+0x2d0/0x370 [ 646.643409] ? netlink_attachskb+0x340/0x340 [ 646.648050] ? _copy_from_iter_full+0xe9/0x3e0 [ 646.652870] ? import_iovec+0x11e/0x1c0 [ 646.657083] netlink_sendmsg+0x3b9/0x6a0 [ 646.661388] ? netlink_unicast+0x370/0x370 [ 646.665877] ? netlink_unicast+0x370/0x370 [ 646.670351] sock_sendmsg+0x6b/0x80 [ 646.674212] ___sys_sendmsg+0x4a1/0x520 [ 646.678443] ? copy_msghdr_from_user+0x210/0x210 [ 646.683463] ? lock_downgrade+0x320/0x320 [ 646.687849] ? debug_show_all_locks+0x240/0x240 [ 646.692760] ? do_raw_spin_unlock+0xa2/0x130 [ 646.697418] ? _raw_spin_unlock+0x24/0x30 [ 646.701798] ? __handle_mm_fault+0x1819/0x1c10 [ 646.706619] ? __pmd_alloc+0x320/0x320 [ 646.710738] ? debug_show_all_locks+0x240/0x240 [ 646.715649] ? restore_nameidata+0x7b/0xa0 [ 646.720117] ? check_chain_key+0x140/0x1f0 [ 646.724590] ? check_chain_key+0x140/0x1f0 [ 646.729070] ? __fget_light+0xbc/0xd0 [ 646.733121] ? __sys_sendmsg+0xd7/0x150 [ 646.737329] __sys_sendmsg+0xd7/0x150 [ 646.741359] ? __ia32_sys_shutdown+0x30/0x30 [ 646.746003] ? up_read+0x53/0x90 [ 646.749601] ? __do_page_fault+0x484/0x780 [ 646.754105] ? do_syscall_64+0x1e/0x2c0 [ 646.758320] do_syscall_64+0x72/0x2c0 [ 646.762353] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 646.767776] RIP: 0033:0x7f4163872150 [ 646.771713] Code: 8b 15 3c 7d 2b 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb cd 66 0f 1f 44 00 00 83 3d b9 d5 2b 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be cd 00 00 48 89 04 24 [ 646.791474] RSP: 002b:00007ffdef7d6b58 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 646.799721] RAX: ffffffffffffffda RBX: 0000000000000024 RCX: 00007f4163872150 [ 646.807240] RDX: 0000000000000000 RSI: 00007ffdef7d6bd0 RDI: 0000000000000003 [ 646.814760] RBP: 000000005b8b9482 R08: 0000000000000001 R09: 0000000000000000 [ 646.822286] R10: 00000000000005e7 R11: 0000000000000246 R12: 00007ffdef7dad20 [ 646.829807] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000679bc0 [ 646.837360] irq event stamp: 6083 [ 646.841043] hardirqs last enabled at (6081): [] __call_rcu+0x17d/0x500 [ 646.849882] hardirqs last disabled at (6083): [] trace_hardirqs_off_thunk+0x1a/0x1c [ 646.859775] softirqs last enabled at (5968): [] __do_softirq+0x4a1/0x6ee [ 646.868784] softirqs last disabled at (6082): [] tcf_ife_cleanup+0x39/0x200 [act_ife] [ 646.878845] ---[ end trace b1b8c12ffe51e657 ]--- Fixes: 5ffe57da29b3 ("act_ife: fix a potential deadlock") Signed-off-by: Vlad Buslov Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_ife.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 0f81b81c1ff62..31de26c990231 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -308,6 +308,20 @@ static int __add_metainfo(const struct tcf_meta_ops *ops, return ret; } +static int add_metainfo_and_get_ops(const struct tcf_meta_ops *ops, + struct tcf_ife_info *ife, u32 metaid, + bool exists) +{ + int ret; + + if (!try_module_get(ops->owner)) + return -ENOENT; + ret = __add_metainfo(ops, ife, metaid, NULL, 0, true, exists); + if (ret) + module_put(ops->owner); + return ret; +} + static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, int len, bool exists) { @@ -331,7 +345,7 @@ static int use_all_metadata(struct tcf_ife_info *ife, bool exists) read_lock(&ife_mod_lock); list_for_each_entry(o, &ifeoplist, list) { - rc = __add_metainfo(o, ife, o->metaid, NULL, 0, true, exists); + rc = add_metainfo_and_get_ops(o, ife, o->metaid, exists); if (rc == 0) installed += 1; } -- GitLab From d4e42116f77e41216cee1178fa515a1b4a1beba6 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 22 Aug 2018 12:19:24 +1000 Subject: [PATCH 0110/2269] cifs: check if SMB2 PDU size has been padded and suppress the warning [ Upstream commit e6c47dd0da1e3a484e778046fc10da0b20606a86 ] Some SMB2/3 servers, Win2016 but possibly others too, adds padding not only between PDUs in a compound but also to the final PDU. This padding extends the PDU to a multiple of 8 bytes. Check if the unexpected length looks like this might be the case and avoid triggering the log messages for : "SMB2 server sent bad RFC1001 len %d not %d\n" Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2misc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 7b08a1446a7fb..efdfdb47a7dd3 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -211,6 +211,13 @@ smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr) if (clc_len == 4 + len + 1) return 0; + /* + * Some windows servers (win2016) will pad also the final + * PDU in a compound to 8 bytes. + */ + if (((clc_len + 7) & ~7) == len) + return 0; + /* * MacOS server pads after SMB2.1 write response with 3 bytes * of junk. Other servers match RFC1001 len to actual -- GitLab From 7d6eba211a1a3766378d10eadcffcca717537a8e Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 21 Aug 2018 21:59:12 -0700 Subject: [PATCH 0111/2269] hfsplus: don't return 0 when fill_super() failed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7464726cb5998846306ed0a7d6714afb2e37b25d ] syzbot is reporting NULL pointer dereference at mount_fs() [1]. This is because hfsplus_fill_super() is by error returning 0 when hfsplus_fill_super() detected invalid filesystem image, and mount_bdev() is returning NULL because dget(s->s_root) == NULL if s->s_root == NULL, and mount_fs() is accessing root->d_sb because IS_ERR(root) == false if root == NULL. Fix this by returning -EINVAL when hfsplus_fill_super() detected invalid filesystem image. [1] https://syzkaller.appspot.com/bug?id=21acb6850cecbc960c927229e597158cf35f33d0 Link: http://lkml.kernel.org/r/d83ce31a-874c-dd5b-f790-41405983a5be@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Reported-by: syzbot Reviewed-by: Ernesto A. Fernández Reviewed-by: Andrew Morton Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/hfsplus/super.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 3cba08c931eeb..410f59372f19d 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -524,8 +524,10 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) goto out_put_root; if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { hfs_find_exit(&fd); - if (entry.type != cpu_to_be16(HFSPLUS_FOLDER)) + if (entry.type != cpu_to_be16(HFSPLUS_FOLDER)) { + err = -EINVAL; goto out_put_root; + } inode = hfsplus_iget(sb, be32_to_cpu(entry.folder.id)); if (IS_ERR(inode)) { err = PTR_ERR(inode); -- GitLab From ece4ba1c0c9003a1cde72862b0945bb163013e4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ernesto=20A=2E=20Fern=C3=A1ndez?= Date: Thu, 23 Aug 2018 17:00:31 -0700 Subject: [PATCH 0112/2269] hfs: prevent crash on exit from failed search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit dc2572791d3a41bab94400af2b6bca9d71ccd303 ] hfs_find_exit() expects fd->bnode to be NULL after a search has failed. hfs_brec_insert() may instead set it to an error-valued pointer. Fix this to prevent a crash. Link: http://lkml.kernel.org/r/53d9749a029c41b4016c495fc5838c9dba3afc52.1530294815.git.ernesto.mnd.fernandez@gmail.com Signed-off-by: Ernesto A. Fernández Cc: Anatoly Trosinenko Cc: Viacheslav Dubeyko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/hfs/brec.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c index ad04a57410169..9a8772465a907 100644 --- a/fs/hfs/brec.c +++ b/fs/hfs/brec.c @@ -75,9 +75,10 @@ int hfs_brec_insert(struct hfs_find_data *fd, void *entry, int entry_len) if (!fd->bnode) { if (!tree->root) hfs_btree_inc_height(tree); - fd->bnode = hfs_bnode_find(tree, tree->leaf_head); - if (IS_ERR(fd->bnode)) - return PTR_ERR(fd->bnode); + node = hfs_bnode_find(tree, tree->leaf_head); + if (IS_ERR(node)) + return PTR_ERR(node); + fd->bnode = node; fd->record = -1; } new_node = NULL; -- GitLab From 8b89affb42ae14e549c233d0a90c8e46e533811f Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 17 Aug 2018 14:43:54 -0700 Subject: [PATCH 0113/2269] sunrpc: Don't use stack buffer with scatterlist [ Upstream commit 44090cc876926277329e1608bafc01b9f6da627f ] Fedora got a bug report from NFS: kernel BUG at include/linux/scatterlist.h:143! ... RIP: 0010:sg_init_one+0x7d/0x90 .. make_checksum+0x4e7/0x760 [rpcsec_gss_krb5] gss_get_mic_kerberos+0x26e/0x310 [rpcsec_gss_krb5] gss_marshal+0x126/0x1a0 [auth_rpcgss] ? __local_bh_enable_ip+0x80/0xe0 ? call_transmit_status+0x1d0/0x1d0 [sunrpc] call_transmit+0x137/0x230 [sunrpc] __rpc_execute+0x9b/0x490 [sunrpc] rpc_run_task+0x119/0x150 [sunrpc] nfs4_run_exchange_id+0x1bd/0x250 [nfsv4] _nfs4_proc_exchange_id+0x2d/0x490 [nfsv4] nfs41_discover_server_trunking+0x1c/0xa0 [nfsv4] nfs4_discover_server_trunking+0x80/0x270 [nfsv4] nfs4_init_client+0x16e/0x240 [nfsv4] ? nfs_get_client+0x4c9/0x5d0 [nfs] ? _raw_spin_unlock+0x24/0x30 ? nfs_get_client+0x4c9/0x5d0 [nfs] nfs4_set_client+0xb2/0x100 [nfsv4] nfs4_create_server+0xff/0x290 [nfsv4] nfs4_remote_mount+0x28/0x50 [nfsv4] mount_fs+0x3b/0x16a vfs_kern_mount.part.35+0x54/0x160 nfs_do_root_mount+0x7f/0xc0 [nfsv4] nfs4_try_mount+0x43/0x70 [nfsv4] ? get_nfs_version+0x21/0x80 [nfs] nfs_fs_mount+0x789/0xbf0 [nfs] ? pcpu_alloc+0x6ca/0x7e0 ? nfs_clone_super+0x70/0x70 [nfs] ? nfs_parse_mount_options+0xb40/0xb40 [nfs] mount_fs+0x3b/0x16a vfs_kern_mount.part.35+0x54/0x160 do_mount+0x1fd/0xd50 ksys_mount+0xba/0xd0 __x64_sys_mount+0x21/0x30 do_syscall_64+0x60/0x1f0 entry_SYSCALL_64_after_hwframe+0x49/0xbe This is BUG_ON(!virt_addr_valid(buf)) triggered by using a stack allocated buffer with a scatterlist. Convert the buffer for rc4salt to be dynamically allocated instead. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1615258 Signed-off-by: Laura Abbott Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/auth_gss/gss_krb5_crypto.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 8654494b4d0a3..834eb2b9e41b5 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -169,7 +169,7 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen, struct scatterlist sg[1]; int err = -1; u8 *checksumdata; - u8 rc4salt[4]; + u8 *rc4salt; struct crypto_ahash *md5; struct crypto_ahash *hmac_md5; struct ahash_request *req; @@ -183,14 +183,18 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen, return GSS_S_FAILURE; } + rc4salt = kmalloc_array(4, sizeof(*rc4salt), GFP_NOFS); + if (!rc4salt) + return GSS_S_FAILURE; + if (arcfour_hmac_md5_usage_to_salt(usage, rc4salt)) { dprintk("%s: invalid usage value %u\n", __func__, usage); - return GSS_S_FAILURE; + goto out_free_rc4salt; } checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_NOFS); if (!checksumdata) - return GSS_S_FAILURE; + goto out_free_rc4salt; md5 = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(md5)) @@ -258,6 +262,8 @@ out_free_md5: crypto_free_ahash(md5); out_free_cksum: kfree(checksumdata); +out_free_rc4salt: + kfree(rc4salt); return err ? GSS_S_FAILURE : 0; } -- GitLab From f552f8c28d34ffea62dec915c0a63d900464504b Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 21 Aug 2018 22:00:58 -0700 Subject: [PATCH 0114/2269] fork: don't copy inconsistent signal handler state to child [ Upstream commit 06e62a46bbba20aa5286102016a04214bb446141 ] Before this change, if a multithreaded process forks while one of its threads is changing a signal handler using sigaction(), the memcpy() in copy_sighand() can race with the struct assignment in do_sigaction(). It isn't clear whether this can cause corruption of the userspace signal handler pointer, but it definitely can cause inconsistency between different fields of struct sigaction. Take the appropriate spinlock to avoid this. I have tested that this patch prevents inconsistency between sa_sigaction and sa_flags, which is possible before this patch. Link: http://lkml.kernel.org/r/20180702145108.73189-1-jannh@google.com Signed-off-by: Jann Horn Acked-by: Michal Hocko Reviewed-by: Andrew Morton Cc: Rik van Riel Cc: "Peter Zijlstra (Intel)" Cc: Kees Cook Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/fork.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/fork.c b/kernel/fork.c index 91907a3701ce5..6a219fea49269 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1350,7 +1350,9 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) return -ENOMEM; atomic_set(&sig->count, 1); + spin_lock_irq(¤t->sighand->siglock); memcpy(sig->action, current->sighand->action, sizeof(sig->action)); + spin_unlock_irq(¤t->sighand->siglock); return 0; } -- GitLab From 003d4c3bf5a5a722f162fda8da59d2d39fff0c2a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 21 Aug 2018 21:59:34 -0700 Subject: [PATCH 0115/2269] reiserfs: change j_timestamp type to time64_t [ Upstream commit 8b73ce6a4bae4fe12bcb2c361c0da4183c2e1b6f ] This uses the deprecated time_t type but is write-only, and could be removed, but as Jeff explains, having a timestamp can be usefule for post-mortem analysis in crash dumps. In order to remove one of the last instances of time_t, this changes the type to time64_t, same as j_trans_start_time. Link: http://lkml.kernel.org/r/20180622133315.221210-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Cc: Jan Kara Cc: Jeff Mahoney Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/reiserfs/reiserfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 48835a659948f..eabf85371ece6 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -271,7 +271,7 @@ struct reiserfs_journal_list { struct mutex j_commit_mutex; unsigned int j_trans_id; - time_t j_timestamp; + time64_t j_timestamp; /* write-only but useful for crash dump analysis */ struct reiserfs_list_bitmap *j_list_bitmap; struct buffer_head *j_commit_bh; /* commit buffer head */ struct reiserfs_journal_cnode *j_realblock; -- GitLab From 68e787c3c80059c776d1d7afb20f5eb9f20237a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ernesto=20A=2E=20Fern=C3=A1ndez?= Date: Thu, 23 Aug 2018 17:00:25 -0700 Subject: [PATCH 0116/2269] hfsplus: fix NULL dereference in hfsplus_lookup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a7ec7a4193a2eb3b5341243fc0b621c1ac9e4ec4 ] An HFS+ filesystem can be mounted read-only without having a metadata directory, which is needed to support hardlinks. But if the catalog data is corrupted, a directory lookup may still find dentries claiming to be hardlinks. hfsplus_lookup() does check that ->hidden_dir is not NULL in such a situation, but mistakenly does so after dereferencing it for the first time. Reorder this check to prevent a crash. This happens when looking up corrupted catalog data (dentry) on a filesystem with no metadata directory (this could only ever happen on a read-only mount). Wen Xu sent the replication steps in detail to the fsdevel list: https://bugzilla.kernel.org/show_bug.cgi?id=200297 Link: http://lkml.kernel.org/r/20180712215344.q44dyrhymm4ajkao@eaf Signed-off-by: Ernesto A. Fernández Reported-by: Wen Xu Cc: Viacheslav Dubeyko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/hfsplus/dir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index e8120a282435d..1a44c4621e74b 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -78,13 +78,13 @@ again: cpu_to_be32(HFSP_HARDLINK_TYPE) && entry.file.user_info.fdCreator == cpu_to_be32(HFSP_HFSPLUS_CREATOR) && + HFSPLUS_SB(sb)->hidden_dir && (entry.file.create_date == HFSPLUS_I(HFSPLUS_SB(sb)->hidden_dir)-> create_date || entry.file.create_date == HFSPLUS_I(d_inode(sb->s_root))-> - create_date) && - HFSPLUS_SB(sb)->hidden_dir) { + create_date)) { struct qstr str; char name[32]; -- GitLab From 305277dae99e0917d4729e464cc254b0a12cf559 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 21 Aug 2018 21:54:48 -0700 Subject: [PATCH 0117/2269] fs/proc/kcore.c: use __pa_symbol() for KCORE_TEXT list entries [ Upstream commit df865e8337c397471b95f51017fea559bc8abb4a ] elf_kcore_store_hdr() uses __pa() to find the physical address of KCORE_RAM or KCORE_TEXT entries exported as program headers. This trips CONFIG_DEBUG_VIRTUAL's checks, as the KCORE_TEXT entries are not in the linear map. Handle these two cases separately, using __pa_symbol() for the KCORE_TEXT entries. Link: http://lkml.kernel.org/r/20180711131944.15252-1-james.morse@arm.com Signed-off-by: James Morse Cc: Alexey Dobriyan Cc: Omar Sandoval Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/proc/kcore.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index e64ecb9f27209..66c373230e60e 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -384,8 +384,10 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) phdr->p_flags = PF_R|PF_W|PF_X; phdr->p_offset = kc_vaddr_to_offset(m->addr) + dataoff; phdr->p_vaddr = (size_t)m->addr; - if (m->type == KCORE_RAM || m->type == KCORE_TEXT) + if (m->type == KCORE_RAM) phdr->p_paddr = __pa(m->addr); + else if (m->type == KCORE_TEXT) + phdr->p_paddr = __pa_symbol(m->addr); else phdr->p_paddr = (elf_addr_t)-1; phdr->p_filesz = phdr->p_memsz = m->size; -- GitLab From 7bbf1e8a2471d3f604bf91578ebed067461ba4e7 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Tue, 21 Aug 2018 21:59:44 -0700 Subject: [PATCH 0118/2269] fat: validate ->i_start before using [ Upstream commit 0afa9626667c3659ef8bd82d42a11e39fedf235c ] On corrupted FATfs may have invalid ->i_start. To handle it, this checks ->i_start before using, and return proper error code. Link: http://lkml.kernel.org/r/87o9f8y1t5.fsf_-_@mail.parknet.co.jp Signed-off-by: OGAWA Hirofumi Reported-by: Anatoly Trosinenko Tested-by: Anatoly Trosinenko Cc: Alan Cox Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/fat/cache.c | 19 ++++++++++++------- fs/fat/fat.h | 5 +++++ fs/fat/fatent.c | 6 +++--- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/fs/fat/cache.c b/fs/fat/cache.c index e9bed49df6b71..78d501c1fb658 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -225,7 +225,8 @@ static inline void cache_init(struct fat_cache_id *cid, int fclus, int dclus) int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus) { struct super_block *sb = inode->i_sb; - const int limit = sb->s_maxbytes >> MSDOS_SB(sb)->cluster_bits; + struct msdos_sb_info *sbi = MSDOS_SB(sb); + const int limit = sb->s_maxbytes >> sbi->cluster_bits; struct fat_entry fatent; struct fat_cache_id cid; int nr; @@ -234,6 +235,12 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus) *fclus = 0; *dclus = MSDOS_I(inode)->i_start; + if (!fat_valid_entry(sbi, *dclus)) { + fat_fs_error_ratelimit(sb, + "%s: invalid start cluster (i_pos %lld, start %08x)", + __func__, MSDOS_I(inode)->i_pos, *dclus); + return -EIO; + } if (cluster == 0) return 0; @@ -250,9 +257,8 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus) /* prevent the infinite loop of cluster chain */ if (*fclus > limit) { fat_fs_error_ratelimit(sb, - "%s: detected the cluster chain loop" - " (i_pos %lld)", __func__, - MSDOS_I(inode)->i_pos); + "%s: detected the cluster chain loop (i_pos %lld)", + __func__, MSDOS_I(inode)->i_pos); nr = -EIO; goto out; } @@ -262,9 +268,8 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus) goto out; else if (nr == FAT_ENT_FREE) { fat_fs_error_ratelimit(sb, - "%s: invalid cluster chain (i_pos %lld)", - __func__, - MSDOS_I(inode)->i_pos); + "%s: invalid cluster chain (i_pos %lld)", + __func__, MSDOS_I(inode)->i_pos); nr = -EIO; goto out; } else if (nr == FAT_ENT_EOF) { diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 8fc1093da47d3..a0a00f3734bc1 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -348,6 +348,11 @@ static inline void fatent_brelse(struct fat_entry *fatent) fatent->fat_inode = NULL; } +static inline bool fat_valid_entry(struct msdos_sb_info *sbi, int entry) +{ + return FAT_START_ENT <= entry && entry < sbi->max_cluster; +} + extern void fat_ent_access_init(struct super_block *sb); extern int fat_ent_read(struct inode *inode, struct fat_entry *fatent, int entry); diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 48b2336692f9f..a40f36b1b292e 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -23,7 +23,7 @@ static void fat12_ent_blocknr(struct super_block *sb, int entry, { struct msdos_sb_info *sbi = MSDOS_SB(sb); int bytes = entry + (entry >> 1); - WARN_ON(entry < FAT_START_ENT || sbi->max_cluster <= entry); + WARN_ON(!fat_valid_entry(sbi, entry)); *offset = bytes & (sb->s_blocksize - 1); *blocknr = sbi->fat_start + (bytes >> sb->s_blocksize_bits); } @@ -33,7 +33,7 @@ static void fat_ent_blocknr(struct super_block *sb, int entry, { struct msdos_sb_info *sbi = MSDOS_SB(sb); int bytes = (entry << sbi->fatent_shift); - WARN_ON(entry < FAT_START_ENT || sbi->max_cluster <= entry); + WARN_ON(!fat_valid_entry(sbi, entry)); *offset = bytes & (sb->s_blocksize - 1); *blocknr = sbi->fat_start + (bytes >> sb->s_blocksize_bits); } @@ -353,7 +353,7 @@ int fat_ent_read(struct inode *inode, struct fat_entry *fatent, int entry) int err, offset; sector_t blocknr; - if (entry < FAT_START_ENT || sbi->max_cluster <= entry) { + if (!fat_valid_entry(sbi, entry)) { fatent_brelse(fatent); fat_fs_error(sb, "invalid access to FAT (entry 0x%08x)", entry); return -EIO; -- GitLab From ab4bddc2d032b175c4c9689a6a6f3e50d1c66f5a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Aug 2018 12:30:38 -0700 Subject: [PATCH 0119/2269] scripts: modpost: check memory allocation results [ Upstream commit 1f3aa9002dc6a0d59a4b599b4fc8f01cf43ef014 ] Fix missing error check for memory allocation functions in scripts/mod/modpost.c. Fixes kernel bugzilla #200319: https://bugzilla.kernel.org/show_bug.cgi?id=200319 Signed-off-by: Randy Dunlap Cc: Yuexing Wang Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- scripts/mod/modpost.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 54deaa1066cf0..957f6041dd79e 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -677,7 +677,7 @@ static void handle_modversions(struct module *mod, struct elf_info *info, if (ELF_ST_TYPE(sym->st_info) == STT_SPARC_REGISTER) break; if (symname[0] == '.') { - char *munged = strdup(symname); + char *munged = NOFAIL(strdup(symname)); munged[0] = '_'; munged[1] = toupper(munged[1]); symname = munged; @@ -1329,7 +1329,7 @@ static Elf_Sym *find_elf_symbol2(struct elf_info *elf, Elf_Addr addr, static char *sec2annotation(const char *s) { if (match(s, init_exit_sections)) { - char *p = malloc(20); + char *p = NOFAIL(malloc(20)); char *r = p; *p++ = '_'; @@ -1349,7 +1349,7 @@ static char *sec2annotation(const char *s) strcat(p, " "); return r; } else { - return strdup(""); + return NOFAIL(strdup("")); } } @@ -2050,7 +2050,7 @@ void buf_write(struct buffer *buf, const char *s, int len) { if (buf->size - buf->pos < len) { buf->size += len + SZ; - buf->p = realloc(buf->p, buf->size); + buf->p = NOFAIL(realloc(buf->p, buf->size)); } strncpy(buf->p + buf->pos, s, len); buf->pos += len; -- GitLab From b7540b6235f070b2f02f20f58d6ba46995cb2b9d Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 18 Jul 2018 10:18:45 +0100 Subject: [PATCH 0120/2269] virtio: pci-legacy: Validate queue pfn [ Upstream commit 69599206ea9a3f8f2e94d46580579cbf9d08ad6c ] Legacy PCI over virtio uses a 32bit PFN for the queue. If the queue pfn is too large to fit in 32bits, which we could hit on arm64 systems with 52bit physical addresses (even with 64K page size), we simply miss out a proper link to the other side of the queue. Add a check to validate the PFN, rather than silently breaking the devices. Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Marc Zyngier Cc: Christoffer Dall Cc: Peter Maydel Cc: Jean-Philippe Brucker Signed-off-by: Suzuki K Poulose Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio_pci_legacy.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index 2780886e8ba3d..de062fb201bc2 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -122,6 +122,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, struct virtqueue *vq; u16 num; int err; + u64 q_pfn; /* Select the queue we're interested in */ iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); @@ -141,9 +142,17 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, if (!vq) return ERR_PTR(-ENOMEM); + q_pfn = virtqueue_get_desc_addr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; + if (q_pfn >> 32) { + dev_err(&vp_dev->pci_dev->dev, + "platform bug: legacy virtio-mmio must not be used with RAM above 0x%llxGB\n", + 0x1ULL << (32 + PAGE_SHIFT - 30)); + err = -E2BIG; + goto out_del_vq; + } + /* activate the queue */ - iowrite32(virtqueue_get_desc_addr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT, - vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); + iowrite32(q_pfn, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); vq->priv = (void __force *)vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY; @@ -160,6 +169,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, out_deactivate: iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); +out_del_vq: vring_del_virtqueue(vq); return ERR_PTR(err); } -- GitLab From e5ef973b137fcef7d9fea58ad11b7e1c20c913e6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 17 Aug 2018 12:01:36 +0200 Subject: [PATCH 0121/2269] x86/mce: Add notifier_block forward declaration [ Upstream commit 704ae091b061082b37a9968621af4c290c641d50 ] Without linux/irq.h, there is no declaration of notifier_block, leading to a build warning: In file included from arch/x86/kernel/cpu/mcheck/threshold.c:10: arch/x86/include/asm/mce.h:151:46: error: 'struct notifier_block' declared inside parameter list will not be visible outside of this definition or declaration [-Werror] It's sufficient to declare the struct tag here, which avoids pulling in more header files. Fixes: 447ae3166702 ("x86: Don't include linux/irq.h from asm/hardirq.h") Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Cc: Nicolai Stange Cc: "H. Peter Anvin" Cc: Greg Kroah-Hartman Cc: Borislav Petkov Link: https://lkml.kernel.org/r/20180817100156.3009043-1-arnd@arndb.de Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mce.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 340070415c2c3..90fef69e4c5a1 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -200,6 +200,7 @@ enum mce_notifier_prios { MCE_PRIO_LOWEST = 0, }; +struct notifier_block; extern void mce_register_decode_chain(struct notifier_block *nb); extern void mce_unregister_decode_chain(struct notifier_block *nb); -- GitLab From 52ec8484a7c53ccc81bdba93468c583d926dc6de Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Wed, 15 Aug 2018 22:54:49 -0700 Subject: [PATCH 0122/2269] IB/hfi1: Invalid NUMA node information can cause a divide by zero [ Upstream commit c513de490f808d8480346f9a58e6a4a5f3de12e7 ] If the system BIOS does not supply NUMA node information to the PCI devices, the NUMA node is selected by choosing the current node. This can lead to the following crash: divide error: 0000 SMP CPU: 0 PID: 4 Comm: kworker/0:0 Tainted: G IOE ------------ 3.10.0-693.21.1.el7.x86_64 #1 Hardware name: Intel Corporation S2600KP/S2600KP, BIOS SE5C610.86B.01.01.0005.101720141054 10/17/2014 Workqueue: events work_for_cpu_fn task: ffff880174480fd0 ti: ffff880174488000 task.ti: ffff880174488000 RIP: 0010: [] hfi1_dev_affinity_init+0x129/0x6a0 [hfi1] RSP: 0018:ffff88017448bbf8 EFLAGS: 00010246 RAX: 0000000000000011 RBX: ffff88107ffba6c0 RCX: ffff88085c22e130 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff880824ad0000 RBP: ffff88017448bc48 R08: 0000000000000011 R09: 0000000000000002 R10: ffff8808582b6ca0 R11: 0000000000003151 R12: ffff8808582b6ca0 R13: ffff8808582b6518 R14: ffff8808582b6010 R15: 0000000000000012 FS: 0000000000000000(0000) GS:ffff88085ec00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007efc707404f0 CR3: 0000000001a02000 CR4: 00000000001607f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Call Trace: hfi1_init_dd+0x14b3/0x27a0 [hfi1] ? pcie_capability_write_word+0x46/0x70 ? hfi1_pcie_init+0xc0/0x200 [hfi1] do_init_one+0x153/0x4c0 [hfi1] ? sched_clock_cpu+0x85/0xc0 init_one+0x1b5/0x260 [hfi1] local_pci_probe+0x4a/0xb0 work_for_cpu_fn+0x1a/0x30 process_one_work+0x17f/0x440 worker_thread+0x278/0x3c0 ? manage_workers.isra.24+0x2a0/0x2a0 kthread+0xd1/0xe0 ? insert_kthread_work+0x40/0x40 ret_from_fork+0x77/0xb0 ? insert_kthread_work+0x40/0x40 If the BIOS is not supplying NUMA information: - set the default table count to 1 for all possible nodes - select node 0 (instead of current NUMA) node to get consistent performance - generate an error indicating that the BIOS should be upgraded Reviewed-by: Gary Leshner Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/affinity.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index b5fab55cc2750..b197e925fe363 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -146,7 +146,7 @@ int node_affinity_init(void) while ((dev = pci_get_device(ids->vendor, ids->device, dev))) { node = pcibus_to_node(dev->bus); if (node < 0) - node = numa_node_id(); + goto out; hfi1_per_node_cntr[node]++; } @@ -154,6 +154,18 @@ int node_affinity_init(void) } return 0; + +out: + /* + * Invalid PCI NUMA node information found, note it, and populate + * our database 1:1. + */ + pr_err("HFI: Invalid PCI NUMA node. Performance may be affected\n"); + pr_err("HFI: System BIOS may need to be upgraded\n"); + for (node = 0; node < node_affinity.num_possible_nodes; node++) + hfi1_per_node_cntr[node] = 1; + + return 0; } void node_affinity_destroy(void) @@ -227,8 +239,14 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) const struct cpumask *local_mask; int curr_cpu, possible, i; - if (node < 0) - node = numa_node_id(); + /* + * If the BIOS does not have the NUMA node information set, select + * NUMA 0 so we get consistent performance. + */ + if (node < 0) { + dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n"); + node = 0; + } dd->node = node; local_mask = cpumask_of_node(dd->node); -- GitLab From 2dc61587a1035b6f0ab333e5151fe89e240c7606 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 1 Aug 2018 12:57:20 +0200 Subject: [PATCH 0123/2269] pwm: meson: Fix mux clock names [ Upstream commit b96e9eb62841c519ba1db32d036628be3cdef91f ] Current clock name looks like this: /soc/bus@ffd00000/pwm@1b000#mux0 This is bad because CCF uses the clock to create a directory in clk debugfs. With such name, the directory creation (silently) fails and the debugfs entry end up being created at the debugfs root. With this change, the clock name will now be: ffd1b000.pwm#mux0 This matches the clock naming scheme used in the ethernet and mmc driver. It also fixes the problem with debugfs. Fixes: 36af66a79056 ("pwm: Convert to using %pOF instead of full_name") Signed-off-by: Jerome Brunet Acked-by: Neil Armstrong Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pwm/pwm-meson.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c index d589331d1884b..3540d00425d03 100644 --- a/drivers/pwm/pwm-meson.c +++ b/drivers/pwm/pwm-meson.c @@ -432,7 +432,6 @@ static int meson_pwm_init_channels(struct meson_pwm *meson, struct meson_pwm_channel *channels) { struct device *dev = meson->chip.dev; - struct device_node *np = dev->of_node; struct clk_init_data init; unsigned int i; char name[255]; @@ -441,7 +440,7 @@ static int meson_pwm_init_channels(struct meson_pwm *meson, for (i = 0; i < meson->chip.npwm; i++) { struct meson_pwm_channel *channel = &channels[i]; - snprintf(name, sizeof(name), "%pOF#mux%u", np, i); + snprintf(name, sizeof(name), "%s#mux%u", dev_name(dev), i); init.name = name; init.ops = &clk_mux_ops; -- GitLab From 4570403f6e116701503052011d129e7f86e44abd Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 17 Aug 2018 15:46:57 -0700 Subject: [PATCH 0124/2269] mm/fadvise.c: fix signed overflow UBSAN complaint [ Upstream commit a718e28f538441a3b6612da9ff226973376cdf0f ] Signed integer overflow is undefined according to the C standard. The overflow in ksys_fadvise64_64() is deliberate, but since it is signed overflow, UBSAN complains: UBSAN: Undefined behaviour in mm/fadvise.c:76:10 signed integer overflow: 4 + 9223372036854775805 cannot be represented in type 'long long int' Use unsigned types to do math. Unsigned overflow is defined so UBSAN will not complain about it. This patch doesn't change generated code. [akpm@linux-foundation.org: add comment explaining the casts] Link: http://lkml.kernel.org/r/20180629184453.7614-1-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Reported-by: Reviewed-by: Andrew Morton Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- mm/fadvise.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mm/fadvise.c b/mm/fadvise.c index 767887f5f3bfd..3f5f68ad57080 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -71,8 +71,12 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) goto out; } - /* Careful about overflows. Len == 0 means "as much as possible" */ - endbyte = offset + len; + /* + * Careful about overflows. Len == 0 means "as much as possible". Use + * unsigned math because signed overflows are undefined and UBSan + * complains. + */ + endbyte = (u64)offset + (u64)len; if (!len || endbyte < len) endbyte = -1; else -- GitLab From e20c4abf91867c906fa1df432850d658cc1cf19f Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 17 Aug 2018 15:44:34 -0700 Subject: [PATCH 0125/2269] fs/dcache.c: fix kmemcheck splat at take_dentry_name_snapshot() [ Upstream commit 6cd00a01f0c1ae6a852b09c59b8dd55cc6c35d1d ] Since only dentry->d_name.len + 1 bytes out of DNAME_INLINE_LEN bytes are initialized at __d_alloc(), we can't copy the whole size unconditionally. WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff8fa27465ac50) 636f6e66696766732e746d70000000000010000000000000020000000188ffff i i i i i i i i i i i i i u u u u u u u u u u i i i i i u u u u ^ RIP: 0010:take_dentry_name_snapshot+0x28/0x50 RSP: 0018:ffffa83000f5bdf8 EFLAGS: 00010246 RAX: 0000000000000020 RBX: ffff8fa274b20550 RCX: 0000000000000002 RDX: ffffa83000f5be40 RSI: ffff8fa27465ac50 RDI: ffffa83000f5be60 RBP: ffffa83000f5bdf8 R08: ffffa83000f5be48 R09: 0000000000000001 R10: ffff8fa27465ac00 R11: ffff8fa27465acc0 R12: ffff8fa27465ac00 R13: ffff8fa27465acc0 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f79737ac8c0(0000) GS:ffffffff8fc30000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff8fa274c0b000 CR3: 0000000134aa7002 CR4: 00000000000606f0 take_dentry_name_snapshot+0x28/0x50 vfs_rename+0x128/0x870 SyS_rename+0x3b2/0x3d0 entry_SYSCALL_64_fastpath+0x1a/0xa4 0xffffffffffffffff Link: http://lkml.kernel.org/r/201709131912.GBG39012.QMJLOVFSFFOOtH@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Cc: Vegard Nossum Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/dcache.c b/fs/dcache.c index 8d4935978fecc..c1a7c174a9050 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -291,7 +291,8 @@ void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry spin_unlock(&dentry->d_lock); name->name = p->name; } else { - memcpy(name->inline_name, dentry->d_iname, DNAME_INLINE_LEN); + memcpy(name->inline_name, dentry->d_iname, + dentry->d_name.len + 1); spin_unlock(&dentry->d_lock); name->name = name->inline_name; } -- GitLab From f95c5cde34e8d5f6a632b084889092dc8a2bdec8 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Aug 2018 09:12:07 -0700 Subject: [PATCH 0126/2269] platform/x86: intel_punit_ipc: fix build errors [ Upstream commit 340fd4cff43f18bace9358d4decdc9b6ed0715be ] Fix build errors by #including . ../drivers/platform/x86/intel_punit_ipc.c: In function 'ipc_read_status': ../drivers/platform/x86/intel_punit_ipc.c:55:2: error: implicit declaration of function 'readl' [-Werror=implicit-function-declaration] return readl(ipcdev->base[type][BASE_IFACE]); ../drivers/platform/x86/intel_punit_ipc.c: In function 'ipc_write_cmd': ../drivers/platform/x86/intel_punit_ipc.c:60:2: error: implicit declaration of function 'writel' [-Werror=implicit-function-declaration] writel(cmd, ipcdev->base[type][BASE_IFACE]); Fixes: 447ae3166702 ("x86: Don't include linux/irq.h from asm/hardirq.h") Signed-off-by: Randy Dunlap Cc: Zha Qipeng Cc: platform-driver-x86@vger.kernel.org Signed-off-by: Andy Shevchenko Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/intel_punit_ipc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/intel_punit_ipc.c b/drivers/platform/x86/intel_punit_ipc.c index b5b890127479f..b7dfe06261f1e 100644 --- a/drivers/platform/x86/intel_punit_ipc.c +++ b/drivers/platform/x86/intel_punit_ipc.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include -- GitLab From 2b81b00edba15c61117e7e0b727757b6f28dc5dd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 25 Jul 2018 21:38:43 +0200 Subject: [PATCH 0127/2269] netfilter: ip6t_rpfilter: set F_IFACE for linklocal addresses [ Upstream commit da786717e0894886301ed2536843c13f9e8fd53e ] Roman reports that DHCPv6 client no longer sees replies from server due to ip6tables -t raw -A PREROUTING -m rpfilter --invert -j DROP rule. We need to set the F_IFACE flag for linklocal addresses, they are scoped per-device. Fixes: 47b7e7f82802 ("netfilter: don't set F_IFACE on ipv6 fib lookups") Reported-by: Roman Mamedov Tested-by: Roman Mamedov Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/ipv6/netfilter/ip6t_rpfilter.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 1c4a5de3f301a..40eb16bd97860 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -26,6 +26,12 @@ static bool rpfilter_addr_unicast(const struct in6_addr *addr) return addr_type & IPV6_ADDR_UNICAST; } +static bool rpfilter_addr_linklocal(const struct in6_addr *addr) +{ + int addr_type = ipv6_addr_type(addr); + return addr_type & IPV6_ADDR_LINKLOCAL; +} + static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, const struct net_device *dev, u8 flags) { @@ -48,7 +54,11 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, } fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; - if ((flags & XT_RPFILTER_LOOSE) == 0) + + if (rpfilter_addr_linklocal(&iph->saddr)) { + lookup_flags |= RT6_LOOKUP_F_IFACE; + fl6.flowi6_oif = dev->ifindex; + } else if ((flags & XT_RPFILTER_LOOSE) == 0) fl6.flowi6_oif = dev->ifindex; rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags); -- GitLab From a2805f40c77e8db5ad4a0534bf1b82ad01b5c7a7 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Mon, 13 Aug 2018 11:16:57 +0200 Subject: [PATCH 0128/2269] s390/kdump: Fix memleak in nt_vmcoreinfo [ Upstream commit 2d2e7075b87181ed0c675e4936e20bdadba02e1f ] The vmcoreinfo of a crashed system is potentially fragmented. Thus the crash kernel has an intermediate step where the vmcoreinfo is copied into a temporary, continuous buffer in the crash kernel memory. This temporary buffer is never freed. Free it now to prevent the memleak. While at it replace all occurrences of "VMCOREINFO" by its corresponding macro to prevent potential renaming issues. Signed-off-by: Philipp Rudo Acked-by: Heiko Carstens Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/crash_dump.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 9f5ea9d870690..9b0216d571adc 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -404,11 +404,13 @@ static void *get_vmcoreinfo_old(unsigned long *size) if (copy_oldmem_kernel(nt_name, addr + sizeof(note), sizeof(nt_name) - 1)) return NULL; - if (strcmp(nt_name, "VMCOREINFO") != 0) + if (strcmp(nt_name, VMCOREINFO_NOTE_NAME) != 0) return NULL; vmcoreinfo = kzalloc_panic(note.n_descsz); - if (copy_oldmem_kernel(vmcoreinfo, addr + 24, note.n_descsz)) + if (copy_oldmem_kernel(vmcoreinfo, addr + 24, note.n_descsz)) { + kfree(vmcoreinfo); return NULL; + } *size = note.n_descsz; return vmcoreinfo; } @@ -418,15 +420,20 @@ static void *get_vmcoreinfo_old(unsigned long *size) */ static void *nt_vmcoreinfo(void *ptr) { + const char *name = VMCOREINFO_NOTE_NAME; unsigned long size; void *vmcoreinfo; vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size); - if (!vmcoreinfo) - vmcoreinfo = get_vmcoreinfo_old(&size); + if (vmcoreinfo) + return nt_init_name(ptr, 0, vmcoreinfo, size, name); + + vmcoreinfo = get_vmcoreinfo_old(&size); if (!vmcoreinfo) return ptr; - return nt_init_name(ptr, 0, vmcoreinfo, size, "VMCOREINFO"); + ptr = nt_init_name(ptr, 0, vmcoreinfo, size, name); + kfree(vmcoreinfo); + return ptr; } /* -- GitLab From ccf1ae823e4a55839873f4e892a19d06f2723f59 Mon Sep 17 00:00:00 2001 From: Tan Hu Date: Wed, 25 Jul 2018 15:23:07 +0800 Subject: [PATCH 0129/2269] ipvs: fix race between ip_vs_conn_new() and ip_vs_del_dest() [ Upstream commit a53b42c11815d2357e31a9403ae3950517525894 ] We came across infinite loop in ipvs when using ipvs in docker env. When ipvs receives new packets and cannot find an ipvs connection, it will create a new connection, then if the dest is unavailable (i.e. IP_VS_DEST_F_AVAILABLE), the packet will be dropped sliently. But if the dropped packet is the first packet of this connection, the connection control timer never has a chance to start and the ipvs connection cannot be released. This will lead to memory leak, or infinite loop in cleanup_net() when net namespace is released like this: ip_vs_conn_net_cleanup at ffffffffa0a9f31a [ip_vs] __ip_vs_cleanup at ffffffffa0a9f60a [ip_vs] ops_exit_list at ffffffff81567a49 cleanup_net at ffffffff81568b40 process_one_work at ffffffff810a851b worker_thread at ffffffff810a9356 kthread at ffffffff810b0b6f ret_from_fork at ffffffff81697a18 race condition: CPU1 CPU2 ip_vs_in() ip_vs_conn_new() ip_vs_del_dest() __ip_vs_unlink_dest() ~IP_VS_DEST_F_AVAILABLE cp->dest && !IP_VS_DEST_F_AVAILABLE __ip_vs_conn_put ... cleanup_net ---> infinite looping Fix this by checking whether the timer already started. Signed-off-by: Tan Hu Reviewed-by: Jiang Biao Acked-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipvs/ip_vs_core.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 5cb7cac9177d8..1bd53b1e76723 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1960,13 +1960,20 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* the destination server is not available */ - if (sysctl_expire_nodest_conn(ipvs)) { + __u32 flags = cp->flags; + + /* when timer already started, silently drop the packet.*/ + if (timer_pending(&cp->timer)) + __ip_vs_conn_put(cp); + else + ip_vs_conn_put(cp); + + if (sysctl_expire_nodest_conn(ipvs) && + !(flags & IP_VS_CONN_F_ONE_PACKET)) { /* try to expire the connection immediately */ ip_vs_conn_expire_now(cp); } - /* don't restart its timer, and silently - drop the packet. */ - __ip_vs_conn_put(cp); + return NF_DROP; } -- GitLab From 044e9463ec2056d1b98e6a7c69075ded1be19426 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 3 Aug 2018 20:59:51 -0700 Subject: [PATCH 0130/2269] mfd: sm501: Set coherent_dma_mask when creating subdevices [ Upstream commit 2f606da78230f09cf1a71fde6ee91d0c710fa2b2 ] Instantiating the sm501 OHCI subdevice results in a kernel warning. sm501-usb sm501-usb: SM501 OHCI sm501-usb sm501-usb: new USB bus registered, assigned bus number 1 WARNING: CPU: 0 PID: 1 at ./include/linux/dma-mapping.h:516 ohci_init+0x194/0x2d8 Modules linked in: CPU: 0 PID: 1 Comm: swapper Tainted: G W 4.18.0-rc7-00178-g0b5b1f9a78b5 #1 PC is at ohci_init+0x194/0x2d8 PR is at ohci_init+0x168/0x2d8 PC : 8c27844c SP : 8f81dd94 SR : 40008001 TEA : 29613060 R0 : 00000000 R1 : 00000000 R2 : 00000000 R3 : 00000202 R4 : 8fa98b88 R5 : 8c277e68 R6 : 00000000 R7 : 00000000 R8 : 8f965814 R9 : 8c388100 R10 : 8fa98800 R11 : 8fa98928 R12 : 8c48302c R13 : 8fa98920 R14 : 8c48302c MACH: 00000096 MACL: 0000017c GBR : 00000000 PR : 8c278420 Call trace: [<(ptrval)>] usb_add_hcd+0x1e8/0x6ec [<(ptrval)>] _dev_info+0x0/0x54 [<(ptrval)>] arch_local_save_flags+0x0/0x8 [<(ptrval)>] arch_local_irq_restore+0x0/0x24 [<(ptrval)>] ohci_hcd_sm501_drv_probe+0x114/0x2d8 ... Initialize coherent_dma_mask when creating SM501 subdevices to fix the problem. Fixes: b6d6454fdb66f ("mfd: SM501 core driver") Signed-off-by: Guenter Roeck Signed-off-by: Lee Jones Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/sm501.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c index 40534352e5748..3270b8dbc9498 100644 --- a/drivers/mfd/sm501.c +++ b/drivers/mfd/sm501.c @@ -714,6 +714,7 @@ sm501_create_subdev(struct sm501_devdata *sm, char *name, smdev->pdev.name = name; smdev->pdev.id = sm->pdev_id; smdev->pdev.dev.parent = sm->dev; + smdev->pdev.dev.coherent_dma_mask = 0xffffffff; if (res_count) { smdev->pdev.resource = (struct resource *)(smdev+1); -- GitLab From e4f419133dfaa72f9830f258ca10527e3ab359a9 Mon Sep 17 00:00:00 2001 From: Aleh Filipovich Date: Fri, 10 Aug 2018 22:07:25 +0200 Subject: [PATCH 0131/2269] platform/x86: asus-nb-wmi: Add keymap entry for lid flip action on UX360 [ Upstream commit 880b29ac107d15644bf4da228376ba3cd6af6d71 ] Add entry to WMI keymap for lid flip event on Asus UX360. On Asus Zenbook ux360 flipping lid from/to tablet mode triggers keyscan code 0xfa which cannot be handled and results in kernel log message "Unknown key fa pressed". Signed-off-by: Aleh Filipovich Signed-off-by: Andy Shevchenko Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/asus-nb-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index 5269a01d9bdd9..a6a33327f5e7f 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -487,6 +487,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = { { KE_KEY, 0xC4, { KEY_KBDILLUMUP } }, { KE_KEY, 0xC5, { KEY_KBDILLUMDOWN } }, { KE_IGNORE, 0xC6, }, /* Ambient Light Sensor notification */ + { KE_KEY, 0xFA, { KEY_PROG2 } }, /* Lid flip action */ { KE_END, 0}, }; -- GitLab From 58de2cef2b6834dce6d2c7014215739603d43eab Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 31 Jul 2018 13:41:23 +0200 Subject: [PATCH 0132/2269] netfilter: fix memory leaks on netlink_dump_start error [ Upstream commit 3e673b23b541b8e7f773b2d378d6eb99831741cd ] Shaochun Chen points out we leak dumper filter state allocations stored in dump_control->data in case there is an error before netlink sets cb_running (after which ->done will be called at some point). In order to fix this, add .start functions and move allocations there. Same pattern as used in commit 90fd131afc565159c9e0ea742f082b337e10f8c6 ("netfilter: nf_tables: move dumper state allocation into ->start"). Reported-by: shaochun chen Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_conntrack_netlink.c | 26 ++++++++++++++++--------- net/netfilter/nfnetlink_acct.c | 29 +++++++++++++--------------- 2 files changed, 30 insertions(+), 25 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index de4053d84364b..48dab1403b2c7 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -788,6 +788,21 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[]) #endif } +static int ctnetlink_start(struct netlink_callback *cb) +{ + const struct nlattr * const *cda = cb->data; + struct ctnetlink_filter *filter = NULL; + + if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) { + filter = ctnetlink_alloc_filter(cda); + if (IS_ERR(filter)) + return PTR_ERR(filter); + } + + cb->data = filter; + return 0; +} + static int ctnetlink_filter_match(struct nf_conn *ct, void *data) { struct ctnetlink_filter *filter = data; @@ -1194,19 +1209,12 @@ static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl, if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { + .start = ctnetlink_start, .dump = ctnetlink_dump_table, .done = ctnetlink_done, + .data = (void *)cda, }; - if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) { - struct ctnetlink_filter *filter; - - filter = ctnetlink_alloc_filter(cda); - if (IS_ERR(filter)) - return PTR_ERR(filter); - - c.data = filter; - } return netlink_dump_start(ctnl, skb, nlh, &c); } diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index c45e6d4358abe..75624d17fc69e 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -238,29 +238,33 @@ static const struct nla_policy filter_policy[NFACCT_FILTER_MAX + 1] = { [NFACCT_FILTER_VALUE] = { .type = NLA_U32 }, }; -static struct nfacct_filter * -nfacct_filter_alloc(const struct nlattr * const attr) +static int nfnl_acct_start(struct netlink_callback *cb) { - struct nfacct_filter *filter; + const struct nlattr *const attr = cb->data; struct nlattr *tb[NFACCT_FILTER_MAX + 1]; + struct nfacct_filter *filter; int err; + if (!attr) + return 0; + err = nla_parse_nested(tb, NFACCT_FILTER_MAX, attr, filter_policy, NULL); if (err < 0) - return ERR_PTR(err); + return err; if (!tb[NFACCT_FILTER_MASK] || !tb[NFACCT_FILTER_VALUE]) - return ERR_PTR(-EINVAL); + return -EINVAL; filter = kzalloc(sizeof(struct nfacct_filter), GFP_KERNEL); if (!filter) - return ERR_PTR(-ENOMEM); + return -ENOMEM; filter->mask = ntohl(nla_get_be32(tb[NFACCT_FILTER_MASK])); filter->value = ntohl(nla_get_be32(tb[NFACCT_FILTER_VALUE])); + cb->data = filter; - return filter; + return 0; } static int nfnl_acct_get(struct net *net, struct sock *nfnl, @@ -275,18 +279,11 @@ static int nfnl_acct_get(struct net *net, struct sock *nfnl, if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = nfnl_acct_dump, + .start = nfnl_acct_start, .done = nfnl_acct_done, + .data = (void *)tb[NFACCT_FILTER], }; - if (tb[NFACCT_FILTER]) { - struct nfacct_filter *filter; - - filter = nfacct_filter_alloc(tb[NFACCT_FILTER]); - if (IS_ERR(filter)) - return PTR_ERR(filter); - - c.data = filter; - } return netlink_dump_start(nfnl, skb, nlh, &c); } -- GitLab From 0c02e0c3fd13f9b6eb4b5f7ce3b739e7c348ae2b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 16 Aug 2018 21:49:06 +0200 Subject: [PATCH 0133/2269] tcp, ulp: add alias for all ulp modules [ Upstream commit 037b0b86ecf5646f8eae777d8b52ff8b401692ec ] Lets not turn the TCP ULP lookup into an arbitrary module loader as we only intend to load ULP modules through this mechanism, not other unrelated kernel modules: [root@bar]# cat foo.c #include #include #include #include int main(void) { int sock = socket(PF_INET, SOCK_STREAM, 0); setsockopt(sock, IPPROTO_TCP, TCP_ULP, "sctp", sizeof("sctp")); return 0; } [root@bar]# gcc foo.c -O2 -Wall [root@bar]# lsmod | grep sctp [root@bar]# ./a.out [root@bar]# lsmod | grep sctp sctp 1077248 4 libcrc32c 16384 3 nf_conntrack,nf_nat,sctp [root@bar]# Fix it by adding module alias to TCP ULP modules, so probing module via request_module() will be limited to tcp-ulp-[name]. The existing modules like kTLS will load fine given tcp-ulp-tls alias, but others will fail to load: [root@bar]# lsmod | grep sctp [root@bar]# ./a.out [root@bar]# lsmod | grep sctp [root@bar]# Sockmap is not affected from this since it's either built-in or not. Fixes: 734942cc4ea6 ("tcp: ULP infrastructure") Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Song Liu Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/net/tcp.h | 4 ++++ net/ipv4/tcp_ulp.c | 2 +- net/tls/tls_main.c | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index eca8d65cad1e9..0c828aac7e044 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2063,6 +2063,10 @@ int tcp_set_ulp(struct sock *sk, const char *name); void tcp_get_available_ulp(char *buf, size_t len); void tcp_cleanup_ulp(struct sock *sk); +#define MODULE_ALIAS_TCP_ULP(name) \ + __MODULE_INFO(alias, alias_userspace, name); \ + __MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name) + /* Call BPF_SOCK_OPS program that returns an int. If the return value * is < 0, then the BPF op failed (for example if the loaded BPF * program does not support the chosen operation or there is no BPF diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 6bb9e14c710a7..1feecb72f4fc8 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -39,7 +39,7 @@ static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name) #ifdef CONFIG_MODULES if (!ulp && capable(CAP_NET_ADMIN)) { rcu_read_unlock(); - request_module("%s", name); + request_module("tcp-ulp-%s", name); rcu_read_lock(); ulp = tcp_ulp_find(name); } diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index ffb1a3a69bdd9..055b9992d8c78 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -44,6 +44,7 @@ MODULE_AUTHOR("Mellanox Technologies"); MODULE_DESCRIPTION("Transport Layer Security Support"); MODULE_LICENSE("Dual BSD/GPL"); +MODULE_ALIAS_TCP_ULP("tls"); static struct proto tls_base_prot; static struct proto tls_sw_prot; -- GitLab From c16a0727c71a3e05e6b9fdaac759d76c79375215 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 9 Aug 2018 22:00:47 +0300 Subject: [PATCH 0134/2269] RDMA/hns: Fix usage of bitmap allocation functions return values [ Upstream commit a1ceeca679dccc492235f0f629d9e9f7b3d51ca8 ] hns bitmap allocation functions return 0 on success and -1 on failure. Callers of these functions wrongly used their return value as an errno, fix that by making a proper conversion. Fixes: a598c6f4c5a8 ("IB/hns: Simplify function of pd alloc and qp alloc") Signed-off-by: Gal Pressman Acked-by: Lijun Ou Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hns/hns_roce_pd.c | 2 +- drivers/infiniband/hw/hns/hns_roce_qp.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index a64500fa11453..3cef53c651331 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -35,7 +35,7 @@ static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn) { - return hns_roce_bitmap_alloc(&hr_dev->pd_bitmap, pdn); + return hns_roce_bitmap_alloc(&hr_dev->pd_bitmap, pdn) ? -ENOMEM : 0; } static void hns_roce_pd_free(struct hns_roce_dev *hr_dev, unsigned long pdn) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index f5dd21c2d275e..3a37d26889df1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -114,7 +114,10 @@ static int hns_roce_reserve_range_qp(struct hns_roce_dev *hr_dev, int cnt, { struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; - return hns_roce_bitmap_alloc_range(&qp_table->bitmap, cnt, align, base); + return hns_roce_bitmap_alloc_range(&qp_table->bitmap, cnt, align, + base) ? + -ENOMEM : + 0; } enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state) -- GitLab From a77439e5fb28b8396eaec38f24d373393d65e95b Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Tue, 14 Aug 2018 17:13:13 +0100 Subject: [PATCH 0135/2269] net: hns3: Fix for command format parsing error in hclge_is_all_function_id_zero [ Upstream commit 6c39d5278e62956238a681e4cfc69fae5507fc57 ] According to the functional specification of hardware, the first descriptor of response from command 'lookup vlan talbe' is not valid. Currently, the first descriptor is parsed as normal value, which will cause an expected error. This patch fixes this problem by skipping the first descriptor. Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support") Signed-off-by: Xi Wang Signed-off-by: Peng Li Signed-off-by: Salil Mehta Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index c133491ad9fa0..654aad6e748bd 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -3105,7 +3105,7 @@ static bool hclge_is_all_function_id_zero(struct hclge_desc *desc) #define HCLGE_FUNC_NUMBER_PER_DESC 6 int i, j; - for (i = 0; i < HCLGE_DESC_NUMBER; i++) + for (i = 1; i < HCLGE_DESC_NUMBER; i++) for (j = 0; j < HCLGE_FUNC_NUMBER_PER_DESC; j++) if (desc[i].data[j]) return false; -- GitLab From 800dbcee10cd3933991e4e1865e54ba6dd73220e Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Tue, 14 Aug 2018 17:13:15 +0100 Subject: [PATCH 0136/2269] net: hns3: Fix for phy link issue when using marvell phy driver [ Upstream commit 60081dcc4fce385ade26d3145b2479789df0b7e5 ] For marvell phy m88e1510, bit SUPPORTED_FIBRE of phydev->supported is default on. Both phy_resume() and phy_suspend() will check the SUPPORTED_FIBRE bit and write register of fibre page. Currently in hns3 driver, the SUPPORTED_FIBRE bit will be cleared after phy_connect_direct() finished. Because phy_resume() is called in phy_connect_direct(), and phy_suspend() is called when disconnect phy device, so the operation for fibre page register is not symmetrical. It will cause phy link issue when reload hns3 driver. This patch fixes it by disable the SUPPORTED_FIBRE before connecting phy. Fixes: 256727da7395 ("net: hns3: Add MDIO support to HNS3 Ethernet driver for hip08 SoC") Signed-off-by: Jian Shen Signed-off-by: Peng Li Signed-off-by: Salil Mehta Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c index f32d719c4f77a..8f90dd1be6b59 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c @@ -187,6 +187,8 @@ int hclge_mac_start_phy(struct hclge_dev *hdev) if (!phydev) return 0; + phydev->supported &= ~SUPPORTED_FIBRE; + ret = phy_connect_direct(netdev, phydev, hclge_mac_adjust_link, PHY_INTERFACE_MODE_SGMII); -- GitLab From 5e51aa84f4e82017ebd53b6d5e60c4435d9dd7f3 Mon Sep 17 00:00:00 2001 From: Benno Evers Date: Fri, 10 Aug 2018 15:36:13 +0200 Subject: [PATCH 0137/2269] perf tools: Check for null when copying nsinfo. [ Upstream commit 3f4417d693b43fa240ac8bde4487f67745ca23d8 ] The argument to nsinfo__copy() was assumed to be valid, but some code paths exist that will lead to NULL being passed. In particular, running 'perf script -D' on a perf.data file containing an PERF_RECORD_MMAP event associating the '[vdso]' dso with pid 0 earlier in the event stream will lead to a segfault. Since all calling code is already checking for a non-null return value, just return NULL for this case as well. Signed-off-by: Benno Evers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Krister Johansen Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180810133614.9925-1-bevers@mesosphere.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/namespaces.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c index a58e91197729a..1ef0049860a8b 100644 --- a/tools/perf/util/namespaces.c +++ b/tools/perf/util/namespaces.c @@ -138,6 +138,9 @@ struct nsinfo *nsinfo__copy(struct nsinfo *nsi) { struct nsinfo *nnsi; + if (nsi == NULL) + return NULL; + nnsi = calloc(1, sizeof(*nnsi)); if (nnsi != NULL) { nnsi->pid = nsi->pid; -- GitLab From 5859129cf62ea3e96bcaf9592942344a8eff1853 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Thu, 9 Aug 2018 10:59:01 +0200 Subject: [PATCH 0138/2269] irqchip/bcm7038-l1: Hide cpu offline callback when building for !SMP [ Upstream commit 0702bc4d2fe793018ad9aa0eb14bff7f526c4095 ] When compiling bmips with SMP disabled, the build fails with: drivers/irqchip/irq-bcm7038-l1.o: In function `bcm7038_l1_cpu_offline': drivers/irqchip/irq-bcm7038-l1.c:242: undefined reference to `irq_set_affinity_locked' make[5]: *** [vmlinux] Error 1 Fix this by adding and setting bcm7038_l1_cpu_offline only when actually compiling for SMP. It wouldn't have been used anyway, as it requires CPU_HOTPLUG, which in turn requires SMP. Fixes: 34c535793bcb ("irqchip/bcm7038-l1: Implement irq_cpu_offline() callback") Signed-off-by: Jonas Gorski Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-bcm7038-l1.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/irqchip/irq-bcm7038-l1.c b/drivers/irqchip/irq-bcm7038-l1.c index 55cfb986225be..0b9a8b709abf8 100644 --- a/drivers/irqchip/irq-bcm7038-l1.c +++ b/drivers/irqchip/irq-bcm7038-l1.c @@ -217,6 +217,7 @@ static int bcm7038_l1_set_affinity(struct irq_data *d, return 0; } +#ifdef CONFIG_SMP static void bcm7038_l1_cpu_offline(struct irq_data *d) { struct cpumask *mask = irq_data_get_affinity_mask(d); @@ -241,6 +242,7 @@ static void bcm7038_l1_cpu_offline(struct irq_data *d) } irq_set_affinity_locked(d, &new_affinity, false); } +#endif static int __init bcm7038_l1_init_one(struct device_node *dn, unsigned int idx, @@ -293,7 +295,9 @@ static struct irq_chip bcm7038_l1_irq_chip = { .irq_mask = bcm7038_l1_mask, .irq_unmask = bcm7038_l1_unmask, .irq_set_affinity = bcm7038_l1_set_affinity, +#ifdef CONFIG_SMP .irq_cpu_offline = bcm7038_l1_cpu_offline, +#endif }; static int bcm7038_l1_map(struct irq_domain *d, unsigned int virq, -- GitLab From 4cd195263e420311d31451cc053fde8037f326f1 Mon Sep 17 00:00:00 2001 From: Tomas Bortoli Date: Mon, 23 Jul 2018 20:42:53 +0200 Subject: [PATCH 0139/2269] net/9p/trans_fd.c: fix race by holding the lock [ Upstream commit 9f476d7c540cb57556d3cc7e78704e6cd5100f5f ] It may be possible to run p9_fd_cancel() with a deleted req->req_list and incur in a double del. To fix hold the client->lock while changing the status, so the other threads will be synchronized. Link: http://lkml.kernel.org/r/20180723184253.6682-1-tomasbortoli@gmail.com Signed-off-by: Tomas Bortoli Reported-by: syzbot+735d926e9d1317c3310c@syzkaller.appspotmail.com To: Eric Van Hensbergen To: Ron Minnich To: Latchesar Ionkov Cc: Yiwen Jiang Cc: David S. Miller Signed-off-by: Dominique Martinet Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/9p/trans_fd.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 38e21a1e97bcf..a9c65f13b7f51 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -199,15 +199,14 @@ static void p9_mux_poll_stop(struct p9_conn *m) static void p9_conn_cancel(struct p9_conn *m, int err) { struct p9_req_t *req, *rtmp; - unsigned long flags; LIST_HEAD(cancel_list); p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); - spin_lock_irqsave(&m->client->lock, flags); + spin_lock(&m->client->lock); if (m->err) { - spin_unlock_irqrestore(&m->client->lock, flags); + spin_unlock(&m->client->lock); return; } @@ -219,7 +218,6 @@ static void p9_conn_cancel(struct p9_conn *m, int err) list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { list_move(&req->req_list, &cancel_list); } - spin_unlock_irqrestore(&m->client->lock, flags); list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req); @@ -228,6 +226,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err) req->t_err = err; p9_client_cb(m->client, req, REQ_STATUS_ERROR); } + spin_unlock(&m->client->lock); } static int @@ -385,8 +384,9 @@ static void p9_read_work(struct work_struct *work) if (m->req->status != REQ_STATUS_ERROR) status = REQ_STATUS_RCVD; list_del(&m->req->req_list); - spin_unlock(&m->client->lock); + /* update req->status while holding client->lock */ p9_client_cb(m->client, m->req, status); + spin_unlock(&m->client->lock); m->rc.sdata = NULL; m->rc.offset = 0; m->rc.capacity = 0; -- GitLab From c2842800d6bde2e28574017180ff475826fba75d Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 17 Jul 2018 19:14:45 -0700 Subject: [PATCH 0140/2269] net/9p: fix error path of p9_virtio_probe [ Upstream commit 92aef4675d5b1b55404e1532379e343bed0e5cf2 ] Currently when virtio_find_single_vq fails, we go through del_vqs which throws a warning (Trying to free already-free IRQ). Skip del_vqs if vq allocation failed. Link: http://lkml.kernel.org/r/20180524101021.49880-1-jean-philippe.brucker@arm.com Signed-off-by: Jean-Philippe Brucker Reviewed-by: Greg Kurz Cc: Eric Van Hensbergen Cc: Ron Minnich Cc: Latchesar Ionkov Signed-off-by: Andrew Morton Signed-off-by: Dominique Martinet Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/9p/trans_virtio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index da0d3b2574593..e73fd647065a1 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -571,7 +571,7 @@ static int p9_virtio_probe(struct virtio_device *vdev) chan->vq = virtio_find_single_vq(vdev, req_done, "requests"); if (IS_ERR(chan->vq)) { err = PTR_ERR(chan->vq); - goto out_free_vq; + goto out_free_chan; } chan->vq->vdev->priv = chan; spin_lock_init(&chan->lock); @@ -624,6 +624,7 @@ out_free_tag: kfree(tag); out_free_vq: vdev->config->del_vqs(vdev); +out_free_chan: kfree(chan); fail: return err; -- GitLab From 47425c36d3fbd00c9f5d1f76d6c70d05fa0274a1 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 29 Jul 2018 12:16:59 +0800 Subject: [PATCH 0141/2269] f2fs: fix to clear PG_checked flag in set_page_dirty() [ Upstream commit 66110abc4c931f879d70e83e1281f891699364bf ] PG_checked flag will be set on data page during GC, later, we can recognize such page by the flag and migrate page to cold segment. But previously, we don't clear this flag when invalidating data page, after page redirtying, we will write it into wrong log. Let's clear PG_checked flag in set_page_dirty() to avoid this. Signed-off-by: Weichao Guo Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/data.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 85142e5df88b0..e10bd73f0723e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2190,6 +2190,10 @@ static int f2fs_set_data_page_dirty(struct page *page) if (!PageUptodate(page)) SetPageUptodate(page); + /* don't remain PG_checked flag which was set during GC */ + if (is_cold_data(page)) + clear_cold_data(page); + if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) { if (!IS_ATOMIC_WRITTEN_PAGE(page)) { register_inmem_page(inode, page); -- GitLab From c20a5e06b75063d0903872adf583b38cd84966e5 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 10 Jul 2018 16:20:56 +1000 Subject: [PATCH 0142/2269] powerpc/uaccess: Enable get_user(u64, *p) on 32-bit [ Upstream commit f7a6947cd49b7ff4e03f1b4f7e7b223003d752ca ] Currently if you build a 32-bit powerpc kernel and use get_user() to load a u64 value it will fail to build with eg: kernel/rseq.o: In function `rseq_get_rseq_cs': kernel/rseq.c:123: undefined reference to `__get_user_bad' This is hitting the check in __get_user_size() that makes sure the size we're copying doesn't exceed the size of the destination: #define __get_user_size(x, ptr, size, retval) do { retval = 0; __chk_user_ptr(ptr); if (size > sizeof(x)) (x) = __get_user_bad(); Which doesn't immediately make sense because the size of the destination is u64, but it's not really, because __get_user_check() etc. internally create an unsigned long and copy into that: #define __get_user_check(x, ptr, size) ({ long __gu_err = -EFAULT; unsigned long __gu_val = 0; The problem being that on 32-bit unsigned long is not big enough to hold a u64. We can fix this with a trick from hpa in the x86 code, we statically check the type of x and set the type of __gu_val to either unsigned long or unsigned long long. Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/uaccess.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 11f4bd07cce0e..565cead12be2f 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -223,10 +223,17 @@ do { \ } \ } while (0) +/* + * This is a type: either unsigned long, if the argument fits into + * that type, or otherwise unsigned long long. + */ +#define __long_type(x) \ + __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) + #define __get_user_nocheck(x, ptr, size) \ ({ \ long __gu_err; \ - unsigned long __gu_val; \ + __long_type(*(ptr)) __gu_val; \ const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ if (!is_kernel_addr((unsigned long)__gu_addr)) \ @@ -239,7 +246,7 @@ do { \ #define __get_user_check(x, ptr, size) \ ({ \ long __gu_err = -EFAULT; \ - unsigned long __gu_val = 0; \ + __long_type(*(ptr)) __gu_val = 0; \ const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ might_fault(); \ if (access_ok(VERIFY_READ, __gu_addr, (size))) \ @@ -251,7 +258,7 @@ do { \ #define __get_user_nosleep(x, ptr, size) \ ({ \ long __gu_err; \ - unsigned long __gu_val; \ + __long_type(*(ptr)) __gu_val; \ const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ -- GitLab From af675a6eefe905585bf954d0db309fdc022c6af2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 8 Aug 2018 14:57:24 +0300 Subject: [PATCH 0143/2269] powerpc: Fix size calculation using resource_size() [ Upstream commit c42d3be0c06f0c1c416054022aa535c08a1f9b39 ] The problem is the the calculation should be "end - start + 1" but the plus one is missing in this calculation. Fixes: 8626816e905e ("powerpc: add support for MPIC message register API") Signed-off-by: Dan Carpenter Reviewed-by: Tyrel Datwyler Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/sysdev/mpic_msgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c index eb69a51862431..280e964e1aa88 100644 --- a/arch/powerpc/sysdev/mpic_msgr.c +++ b/arch/powerpc/sysdev/mpic_msgr.c @@ -196,7 +196,7 @@ static int mpic_msgr_probe(struct platform_device *dev) /* IO map the message register block. */ of_address_to_resource(np, 0, &rsrc); - msgr_block_addr = ioremap(rsrc.start, rsrc.end - rsrc.start); + msgr_block_addr = ioremap(rsrc.start, resource_size(&rsrc)); if (!msgr_block_addr) { dev_err(&dev->dev, "Failed to iomap MPIC message registers"); return -EFAULT; -- GitLab From 6952b4ed9e4229485dde7b52d5511cc16e2c0df8 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 9 Aug 2018 21:49:29 +0530 Subject: [PATCH 0144/2269] perf probe powerpc: Fix trace event post-processing [ Upstream commit 354b064b8ebc1e1ede58550ca9e08bfa81e6af43 ] In some cases, a symbol may have multiple aliases. Attempting to add an entry probe for such symbols results in a probe being added at an incorrect location while it fails altogether for return probes. This is only applicable for binaries with debug information. During the arch-dependent post-processing, the offset from the start of the symbol at which the probe is to be attached is determined and added to the start address of the symbol to get the probe's location. In case there are multiple aliases, this offset gets added multiple times for each alias of the symbol and we end up with an incorrect probe location. This can be verified on a powerpc64le system as shown below. $ nm /lib/modules/$(uname -r)/build/vmlinux | grep "sys_open$" ... c000000000414290 T __se_sys_open c000000000414290 T sys_open $ objdump -d /lib/modules/$(uname -r)/build/vmlinux | grep -A 10 "<__se_sys_open>:" c000000000414290 <__se_sys_open>: c000000000414290: 19 01 4c 3c addis r2,r12,281 c000000000414294: 70 c4 42 38 addi r2,r2,-15248 c000000000414298: a6 02 08 7c mflr r0 c00000000041429c: e8 ff a1 fb std r29,-24(r1) c0000000004142a0: f0 ff c1 fb std r30,-16(r1) c0000000004142a4: f8 ff e1 fb std r31,-8(r1) c0000000004142a8: 10 00 01 f8 std r0,16(r1) c0000000004142ac: c1 ff 21 f8 stdu r1,-64(r1) c0000000004142b0: 78 23 9f 7c mr r31,r4 c0000000004142b4: 78 1b 7e 7c mr r30,r3 For both the entry probe and the return probe, the probe location should be _text+4276888 (0xc000000000414298). Since another alias exists for 'sys_open', the post-processing code will end up adding the offset (8 for powerpc64le) twice and perf will attempt to add the probe at _text+4276896 (0xc0000000004142a0) instead. Before: # perf probe -v -a sys_open probe-definition(0): sys_open symbol:sys_open file:(null) line:0 offset:0 return:0 lazy:(null) 0 arguments Looking at the vmlinux_path (8 entries long) Using /lib/modules/4.18.0-rc8+/build/vmlinux for symbols Open Debuginfo file: /lib/modules/4.18.0-rc8+/build/vmlinux Try to find probe point from debuginfo. Symbol sys_open address found : c000000000414290 Matched function: __se_sys_open [2ad03a0] Probe point found: __se_sys_open+0 Found 1 probe_trace_events. Opening /sys/kernel/debug/tracing/kprobe_events write=1 Writing event: p:probe/sys_open _text+4276896 Added new event: probe:sys_open (on sys_open) ... # perf probe -v -a sys_open%return $retval probe-definition(0): sys_open%return symbol:sys_open file:(null) line:0 offset:0 return:1 lazy:(null) 0 arguments Looking at the vmlinux_path (8 entries long) Using /lib/modules/4.18.0-rc8+/build/vmlinux for symbols Open Debuginfo file: /lib/modules/4.18.0-rc8+/build/vmlinux Try to find probe point from debuginfo. Symbol sys_open address found : c000000000414290 Matched function: __se_sys_open [2ad03a0] Probe point found: __se_sys_open+0 Found 1 probe_trace_events. Opening /sys/kernel/debug/tracing/README write=0 Opening /sys/kernel/debug/tracing/kprobe_events write=1 Parsing probe_events: p:probe/sys_open _text+4276896 Group:probe Event:sys_open probe:p Writing event: r:probe/sys_open__return _text+4276896 Failed to write event: Invalid argument Error: Failed to add events. Reason: Invalid argument (Code: -22) After: # perf probe -v -a sys_open probe-definition(0): sys_open symbol:sys_open file:(null) line:0 offset:0 return:0 lazy:(null) 0 arguments Looking at the vmlinux_path (8 entries long) Using /lib/modules/4.18.0-rc8+/build/vmlinux for symbols Open Debuginfo file: /lib/modules/4.18.0-rc8+/build/vmlinux Try to find probe point from debuginfo. Symbol sys_open address found : c000000000414290 Matched function: __se_sys_open [2ad03a0] Probe point found: __se_sys_open+0 Found 1 probe_trace_events. Opening /sys/kernel/debug/tracing/kprobe_events write=1 Writing event: p:probe/sys_open _text+4276888 Added new event: probe:sys_open (on sys_open) ... # perf probe -v -a sys_open%return $retval probe-definition(0): sys_open%return symbol:sys_open file:(null) line:0 offset:0 return:1 lazy:(null) 0 arguments Looking at the vmlinux_path (8 entries long) Using /lib/modules/4.18.0-rc8+/build/vmlinux for symbols Open Debuginfo file: /lib/modules/4.18.0-rc8+/build/vmlinux Try to find probe point from debuginfo. Symbol sys_open address found : c000000000414290 Matched function: __se_sys_open [2ad03a0] Probe point found: __se_sys_open+0 Found 1 probe_trace_events. Opening /sys/kernel/debug/tracing/README write=0 Opening /sys/kernel/debug/tracing/kprobe_events write=1 Parsing probe_events: p:probe/sys_open _text+4276888 Group:probe Event:sys_open probe:p Writing event: r:probe/sys_open__return _text+4276888 Added new event: probe:sys_open__return (on sys_open%return) ... Reported-by: Aneesh Kumar Signed-off-by: Sandipan Das Acked-by: Naveen N. Rao Cc: Aneesh Kumar Cc: Jiri Olsa Cc: Ravi Bangoria Fixes: 99e608b5954c ("perf probe ppc64le: Fix probe location when using DWARF") Link: http://lkml.kernel.org/r/20180809161929.35058-1-sandipan@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/arch/powerpc/util/sym-handling.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index 53d83d7e6a096..20e7d74d86cd1 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -141,8 +141,10 @@ void arch__post_process_probe_trace_events(struct perf_probe_event *pev, for (i = 0; i < ntevs; i++) { tev = &pev->tevs[i]; map__for_each_symbol(map, sym, tmp) { - if (map->unmap_ip(map, sym->start) == tev->point.address) + if (map->unmap_ip(map, sym->start) == tev->point.address) { arch__fix_tev_from_maps(pev, tev, map, sym); + break; + } } } } -- GitLab From d67c7c9dd14f3f6d6743d802ef017f14ddc73d3a Mon Sep 17 00:00:00 2001 From: Greg Edwards Date: Wed, 8 Aug 2018 13:27:53 -0600 Subject: [PATCH 0145/2269] block: bvec_nr_vecs() returns value for wrong slab [ Upstream commit d6c02a9beb67f13d5f14f23e72fa9981e8b84477 ] In commit ed996a52c868 ("block: simplify and cleanup bvec pool handling"), the value of the slab index is incremented by one in bvec_alloc() after the allocation is done to indicate an index value of 0 does not need to be later freed. bvec_nr_vecs() was not updated accordingly, and thus returns the wrong value. Decrement idx before performing the lookup. Fixes: ed996a52c868 ("block: simplify and cleanup bvec pool handling") Signed-off-by: Greg Edwards Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- block/bio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/bio.c b/block/bio.c index 194d28cdc642b..2e5d881423b82 100644 --- a/block/bio.c +++ b/block/bio.c @@ -156,7 +156,7 @@ out: unsigned int bvec_nr_vecs(unsigned short idx) { - return bvec_slabs[idx].nr_vecs; + return bvec_slabs[--idx].nr_vecs; } void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned int idx) -- GitLab From ab16afe8c624e63a9e11205f70883f25f82d5ec9 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Wed, 25 Jul 2018 14:00:47 +0200 Subject: [PATCH 0146/2269] s390/dasd: fix hanging offline processing due to canceled worker [ Upstream commit 669f3765b755fd8739ab46ce3a9c6292ce8b3d2a ] During offline processing two worker threads are canceled without freeing the device reference which leads to a hanging offline process. Reviewed-by: Jan Hoeppner Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/s390/block/dasd_eckd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 5ede251c52ca9..4f966613b36e9 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -2032,8 +2032,11 @@ static int dasd_eckd_basic_to_ready(struct dasd_device *device) static int dasd_eckd_online_to_ready(struct dasd_device *device) { - cancel_work_sync(&device->reload_device); - cancel_work_sync(&device->kick_validate); + if (cancel_work_sync(&device->reload_device)) + dasd_put_device(device); + if (cancel_work_sync(&device->kick_validate)) + dasd_put_device(device); + return 0; }; -- GitLab From c953a5038d2d3accdf91f703a42ebd1e1274b693 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Wed, 25 Jul 2018 13:27:10 +0200 Subject: [PATCH 0147/2269] s390/dasd: fix panic for failed online processing [ Upstream commit 7c6553d4db03350dad0110c3224194c19df76a8f ] Fix a panic that occurs for a device that got an error in dasd_eckd_check_characteristics() during online processing. For example the read configuration data command may have failed. If this error occurs the device is not being set online and the earlier invoked steps during online processing are rolled back. Therefore dasd_eckd_uncheck_device() is called which needs a valid private structure. But this pointer is not valid if dasd_eckd_check_characteristics() has failed. Check for a valid device->private pointer to prevent a panic. Reviewed-by: Jan Hoeppner Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/s390/block/dasd_eckd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 4f966613b36e9..4c7c8455da961 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1778,6 +1778,9 @@ static void dasd_eckd_uncheck_device(struct dasd_device *device) struct dasd_eckd_private *private = device->private; int i; + if (!private) + return; + dasd_alias_disconnect_device_from_lcu(device); private->ned = NULL; private->sneq = NULL; -- GitLab From 24165131bcab624409b16bcb31f6214c43c1ac7d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 8 Aug 2018 10:30:03 +0200 Subject: [PATCH 0148/2269] ACPI / scan: Initialize status to ACPI_STA_DEFAULT [ Upstream commit 5971b0c1594d6c34e257101ed5fdffec65205c50 ] Since commit 63347db0affa "ACPI / scan: Use acpi_bus_get_status() to initialize ACPI_TYPE_DEVICE devs" the status field of normal acpi_devices gets set to 0 by acpi_bus_type_and_status() and filled with its actual value later when acpi_add_single_object() calls acpi_bus_get_status(). This means that any acpi_match_device_ids() calls in between will always fail with -ENOENT. We already have a workaround for this, which temporary forces status to ACPI_STA_DEFAULT in drivers/acpi/x86/utils.c: acpi_device_always_present() and the next commit in this series adds another acpi_match_device_ids() call between status being initialized as 0 and the acpi_bus_get_status() call. Rather then adding another workaround, this commit makes acpi_bus_type_and_status() initialize status to ACPI_STA_DEFAULT, this is safe to do as the only code looking at status between the initialization and the acpi_bus_get_status() call is those acpi_match_device_ids() calls. Note this does mean that we need to (re)set status to 0 in case the acpi_bus_get_status() call fails. Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/scan.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index c0984d33c4c85..2eddbb1fae6a0 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1599,7 +1599,8 @@ static int acpi_add_single_object(struct acpi_device **child, * Note this must be done before the get power-/wakeup_dev-flags calls. */ if (type == ACPI_BUS_TYPE_DEVICE) - acpi_bus_get_status(device); + if (acpi_bus_get_status(device) < 0) + acpi_set_device_status(device, 0); acpi_bus_get_power_flags(device); acpi_bus_get_wakeup_device_flags(device); @@ -1677,7 +1678,7 @@ static int acpi_bus_type_and_status(acpi_handle handle, int *type, * acpi_add_single_object updates this once we've an acpi_device * so that acpi_bus_get_status' quirk handling can be used. */ - *sta = 0; + *sta = ACPI_STA_DEFAULT; break; case ACPI_TYPE_PROCESSOR: *type = ACPI_BUS_TYPE_PROCESSOR; -- GitLab From 8ac6b147534cb09addbcf6eff099e9b0bffe859a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 8 Aug 2018 17:29:09 +0300 Subject: [PATCH 0149/2269] scsi: aic94xx: fix an error code in aic94xx_init() [ Upstream commit 0756c57bce3d26da2592d834d8910b6887021701 ] We accidentally return success instead of -ENOMEM on this error path. Fixes: 2908d778ab3e ("[SCSI] aic94xx: new driver") Signed-off-by: Dan Carpenter Reviewed-by: Johannes Thumshirn Reviewed-by: John Garry Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/aic94xx/aic94xx_init.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c index 6c838865ac5a7..4a4746cc6745f 100644 --- a/drivers/scsi/aic94xx/aic94xx_init.c +++ b/drivers/scsi/aic94xx/aic94xx_init.c @@ -1030,8 +1030,10 @@ static int __init aic94xx_init(void) aic94xx_transport_template = sas_domain_attach_transport(&aic94xx_transport_functions); - if (!aic94xx_transport_template) + if (!aic94xx_transport_template) { + err = -ENOMEM; goto out_destroy_caches; + } err = pci_register_driver(&aic94xx_pci_driver); if (err) -- GitLab From a9fac97e2e5012fe1e784e2bda60bdb812fc8c43 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 2 Aug 2018 05:42:04 +0000 Subject: [PATCH 0150/2269] NFSv4: Fix error handling in nfs4_sp4_select_mode() [ Upstream commit 72bf75cfc00c02aa66ef6133048f37aa5d88825c ] Error code is set in the error handling cases but never used. Fix it. Fixes: 937e3133cd0b ("NFSv4.1: Ensure we clear the SP4_MACH_CRED flags in nfs4_sp4_select_mode()") Signed-off-by: Wei Yongjun Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index dda4a3a3ef6e0..77c7d29fcd3b8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7497,7 +7497,7 @@ static int nfs4_sp4_select_mode(struct nfs_client *clp, } out: clp->cl_sp4_flags = flags; - return 0; + return ret; } struct nfs41_exchange_id_data { -- GitLab From a333f3f2a25597fecaae27a5b89f58f00a418a61 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 6 Aug 2018 15:10:40 -0700 Subject: [PATCH 0151/2269] Input: do not use WARN() in input_alloc_absinfo() [ Upstream commit 100294cee9a98bfd4d6cb2d1c8a8aef0e959b0c4 ] Some of fuzzers set panic_on_warn=1 so that they can handle WARN()ings the same way they handle full-blown kernel crashes. We used WARN() in input_alloc_absinfo() to get a better idea where memory allocation failed, but since then kmalloc() and friends started dumping call stack on memory allocation failures anyway, so we are not getting anything extra from WARN(). Because of the above, let's replace WARN with dev_err(). We use dev_err() instead of simply removing message and relying on kcalloc() to give us stack dump so that we'd know the instance of hardware device to which we were trying to attach input device. Reported-by: Dmitry Vyukov Acked-by: Dmitry Vyukov Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/input/input.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/input/input.c b/drivers/input/input.c index 762bfb9487dc9..50d425fe6706f 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -480,11 +480,19 @@ EXPORT_SYMBOL(input_inject_event); */ void input_alloc_absinfo(struct input_dev *dev) { - if (!dev->absinfo) - dev->absinfo = kcalloc(ABS_CNT, sizeof(*dev->absinfo), - GFP_KERNEL); + if (dev->absinfo) + return; - WARN(!dev->absinfo, "%s(): kcalloc() failed?\n", __func__); + dev->absinfo = kcalloc(ABS_CNT, sizeof(*dev->absinfo), GFP_KERNEL); + if (!dev->absinfo) { + dev_err(dev->dev.parent ?: &dev->dev, + "%s: unable to allocate memory\n", __func__); + /* + * We will handle this allocation failure in + * input_register_device() when we refuse to register input + * device with ABS bits but without absinfo. + */ + } } EXPORT_SYMBOL(input_alloc_absinfo); -- GitLab From 301ae59106485b18feba6f95ff3866231f5d7109 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Wed, 8 Aug 2018 13:46:41 +0200 Subject: [PATCH 0152/2269] xen/balloon: fix balloon initialization for PVH Dom0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3596924a233e45aa918c961a902170fc4916461b ] The current balloon code tries to calculate a delta factor for the balloon target when running in HVM mode in order to account for memory used by the firmware. This workaround for memory accounting doesn't work properly on a PVH Dom0, that has a static-max value different from the target value even at startup. Note that this is not a problem for DomUs because guests are started with a static-max value that matches the amount of RAM in the memory map. Fix this by forcefully setting target_diff for Dom0, regardless of it's mode. Reported-by: Gabriel Bercarug Signed-off-by: Roger Pau Monné Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/xen/xen-balloon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c index b437fccd4e624..294f35ce9e46b 100644 --- a/drivers/xen/xen-balloon.c +++ b/drivers/xen/xen-balloon.c @@ -81,7 +81,7 @@ static void watch_target(struct xenbus_watch *watch, static_max = new_target; else static_max >>= PAGE_SHIFT - 10; - target_diff = xen_pv_domain() ? 0 + target_diff = (xen_pv_domain() || xen_initial_domain()) ? 0 : static_max - balloon_stats.target_pages; } -- GitLab From 50956ef66cdbcd226eeb06adab328cd44de2aca7 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 3 Aug 2018 16:38:44 +0200 Subject: [PATCH 0153/2269] PCI: mvebu: Fix I/O space end address calculation [ Upstream commit dfd0309fd7b30a5baffaf47b2fccb88b46d64d69 ] pcie->realio.end should be the address of last byte of the area, therefore using resource_size() of another resource is not correct, we must substract 1 to get the address of the last byte. Fixes: 11be65472a427 ("PCI: mvebu: Adapt to the new device tree layout") Signed-off-by: Thomas Petazzoni Signed-off-by: Lorenzo Pieralisi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pci-mvebu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/host/pci-mvebu.c b/drivers/pci/host/pci-mvebu.c index 8d88f19dc1711..12c1c1851ee63 100644 --- a/drivers/pci/host/pci-mvebu.c +++ b/drivers/pci/host/pci-mvebu.c @@ -1220,7 +1220,7 @@ static int mvebu_pcie_probe(struct platform_device *pdev) pcie->realio.start = PCIBIOS_MIN_IO; pcie->realio.end = min_t(resource_size_t, IO_SPACE_LIMIT, - resource_size(&pcie->io)); + resource_size(&pcie->io) - 1); } else pcie->realio = pcie->io; -- GitLab From 120130a757247f3c2608fbba75e5cd7e55563a51 Mon Sep 17 00:00:00 2001 From: John Pittman Date: Mon, 6 Aug 2018 15:53:12 -0400 Subject: [PATCH 0154/2269] dm kcopyd: avoid softlockup in run_complete_job [ Upstream commit 784c9a29e99eb40b842c29ecf1cc3a79e00fb629 ] It was reported that softlockups occur when using dm-snapshot ontop of slow (rbd) storage. E.g.: [ 4047.990647] watchdog: BUG: soft lockup - CPU#10 stuck for 22s! [kworker/10:23:26177] ... [ 4048.034151] Workqueue: kcopyd do_work [dm_mod] [ 4048.034156] RIP: 0010:copy_callback+0x41/0x160 [dm_snapshot] ... [ 4048.034190] Call Trace: [ 4048.034196] ? __chunk_is_tracked+0x70/0x70 [dm_snapshot] [ 4048.034200] run_complete_job+0x5f/0xb0 [dm_mod] [ 4048.034205] process_jobs+0x91/0x220 [dm_mod] [ 4048.034210] ? kcopyd_put_pages+0x40/0x40 [dm_mod] [ 4048.034214] do_work+0x46/0xa0 [dm_mod] [ 4048.034219] process_one_work+0x171/0x370 [ 4048.034221] worker_thread+0x1fc/0x3f0 [ 4048.034224] kthread+0xf8/0x130 [ 4048.034226] ? max_active_store+0x80/0x80 [ 4048.034227] ? kthread_bind+0x10/0x10 [ 4048.034231] ret_from_fork+0x35/0x40 [ 4048.034233] Kernel panic - not syncing: softlockup: hung tasks Fix this by calling cond_resched() after run_complete_job()'s callout to the dm_kcopyd_notify_fn (which is dm-snap.c:copy_callback in the above trace). Signed-off-by: John Pittman Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-kcopyd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index cf2c67e35eafe..d4b326914f068 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -484,6 +484,8 @@ static int run_complete_job(struct kcopyd_job *job) if (atomic_dec_and_test(&kc->nr_jobs)) wake_up(&kc->destroyq); + cond_resched(); + return 0; } -- GitLab From 3e86f51576615af5e8baa7c38a1d3733f1eeff82 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 6 Aug 2018 11:05:13 +0100 Subject: [PATCH 0155/2269] staging: comedi: ni_mio_common: fix subdevice flags for PFI subdevice [ Upstream commit e083926b3e269d4064825dcf2ad50c636fddf8cf ] The PFI subdevice flags indicate that the subdevice is readable and writeable, but that is only true for the supported "M-series" boards, not the older "E-series" boards. Only set the SDF_READABLE and SDF_WRITABLE subdevice flags for the M-series boards. These two flags are mainly for informational purposes. Signed-off-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/ni_mio_common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c index 2cac160993bbd..158f3e83efb66 100644 --- a/drivers/staging/comedi/drivers/ni_mio_common.c +++ b/drivers/staging/comedi/drivers/ni_mio_common.c @@ -5453,11 +5453,11 @@ static int ni_E_init(struct comedi_device *dev, /* Digital I/O (PFI) subdevice */ s = &dev->subdevices[NI_PFI_DIO_SUBDEV]; s->type = COMEDI_SUBD_DIO; - s->subdev_flags = SDF_READABLE | SDF_WRITABLE | SDF_INTERNAL; s->maxdata = 1; if (devpriv->is_m_series) { s->n_chan = 16; s->insn_bits = ni_pfi_insn_bits; + s->subdev_flags = SDF_READABLE | SDF_WRITABLE | SDF_INTERNAL; ni_writew(dev, s->state, NI_M_PFI_DO_REG); for (i = 0; i < NUM_PFI_OUTPUT_SELECT_REGS; ++i) { @@ -5466,6 +5466,7 @@ static int ni_E_init(struct comedi_device *dev, } } else { s->n_chan = 10; + s->subdev_flags = SDF_INTERNAL; } s->insn_config = ni_pfi_insn_config; -- GitLab From 0a65ab39f8b649c4f9c4a09edbfd179ed7b3da47 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Tue, 7 Aug 2018 10:19:40 -0700 Subject: [PATCH 0156/2269] ASoC: rt5677: Fix initialization of rt5677_of_match.data [ Upstream commit f861e3e28a3016a2064d9f600eaa92a530b732b4 ] The driver expects to find the device id in rt5677_of_match.data, however it is currently assigned to rt5677_of_match.type. Fix this. The problem was found with the help of clang: sound/soc/codecs/rt5677.c:5010:36: warning: expression which evaluates to zero treated as a null pointer constant of type 'const void *' [-Wnon-literal-null-conversion] { .compatible = "realtek,rt5677", RT5677 }, ^~~~~~ Fixes: ddc9e69b9dc2 ("ASoC: rt5677: Hide platform data in the module sources") Signed-off-by: Matthias Kaehlcke Reviewed-by: Guenter Roeck Acked-by: Andy Shevchenko Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/rt5677.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt5677.c b/sound/soc/codecs/rt5677.c index 0791fec398fb6..1cd20b88a3a9c 100644 --- a/sound/soc/codecs/rt5677.c +++ b/sound/soc/codecs/rt5677.c @@ -5017,7 +5017,7 @@ static const struct i2c_device_id rt5677_i2c_id[] = { MODULE_DEVICE_TABLE(i2c, rt5677_i2c_id); static const struct of_device_id rt5677_of_match[] = { - { .compatible = "realtek,rt5677", RT5677 }, + { .compatible = "realtek,rt5677", .data = (const void *)RT5677 }, { } }; MODULE_DEVICE_TABLE(of, rt5677_of_match); -- GitLab From c861151b1fe9175e37395c2c7f076da63fc62170 Mon Sep 17 00:00:00 2001 From: Ralf Goebel Date: Mon, 6 Aug 2018 17:00:36 +0200 Subject: [PATCH 0157/2269] iommu/omap: Fix cache flushes on L2 table entries [ Upstream commit 04c532a1cdc7e423656c07937aa4b5c1c2b064f9 ] The base address used for DMA operations on the second-level table did incorrectly include the offset for the table entry. The offset was then added again which lead to incorrect behavior. Operations on the L1 table are not affected. The calculation of the base address is changed to point to the beginning of the L2 table. Fixes: bfee0cf0ee1d ("iommu/omap: Use DMA-API for performing cache flushes") Acked-by: Suman Anna Signed-off-by: Ralf Goebel Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/omap-iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index bd67e1b2c64ea..57960cb5e0455 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -529,7 +529,7 @@ static u32 *iopte_alloc(struct omap_iommu *obj, u32 *iopgd, pte_ready: iopte = iopte_offset(iopgd, da); - *pt_dma = virt_to_phys(iopte); + *pt_dma = iopgd_page_paddr(iopgd); dev_vdbg(obj->dev, "%s: da:%08x pgd:%p *pgd:%08x pte:%p *pte:%08x\n", __func__, da, iopgd, *iopgd, iopte, *iopte); @@ -717,7 +717,7 @@ static size_t iopgtable_clear_entry_core(struct omap_iommu *obj, u32 da) } bytes *= nent; memset(iopte, 0, nent * sizeof(*iopte)); - pt_dma = virt_to_phys(iopte); + pt_dma = iopgd_page_paddr(iopgd); flush_iopte_range(obj->dev, pt_dma, pt_offset, nent); /* -- GitLab From e33c8a2896ecbce3563eed297c55e1c16da9c76c Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 7 Aug 2018 11:15:39 -0300 Subject: [PATCH 0158/2269] selftests/powerpc: Kill child processes on SIGINT [ Upstream commit 7c27a26e1ed5a7dd709aa19685d2c98f64e1cf0c ] There are some powerpc selftests, as tm/tm-unavailable, that run for a long period (>120 seconds), and if it is interrupted, as pressing CRTL-C (SIGINT), the foreground process (harness) dies but the child process and threads continue to execute (with PPID = 1 now) in background. In this case, you'd think the whole test exited, but there are remaining threads and processes being executed in background. Sometimes these zombies processes are doing annoying things, as consuming the whole CPU or dumping things to STDOUT. This patch fixes this problem by attaching an empty signal handler to SIGINT in the harness process. This handler will interrupt (EINTR) the parent process waitpid() call, letting the code to follow through the normal flow, which will kill all the processes in the child process group. This patch also fixes a typo. Signed-off-by: Breno Leitao Signed-off-by: Gustavo Romero Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/powerpc/harness.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c index 66d31de60b9ae..9d7166dfad1ea 100644 --- a/tools/testing/selftests/powerpc/harness.c +++ b/tools/testing/selftests/powerpc/harness.c @@ -85,13 +85,13 @@ wait: return status; } -static void alarm_handler(int signum) +static void sig_handler(int signum) { - /* Jut wake us up from waitpid */ + /* Just wake us up from waitpid */ } -static struct sigaction alarm_action = { - .sa_handler = alarm_handler, +static struct sigaction sig_action = { + .sa_handler = sig_handler, }; void test_harness_set_timeout(uint64_t time) @@ -106,8 +106,14 @@ int test_harness(int (test_function)(void), char *name) test_start(name); test_set_git_version(GIT_VERSION); - if (sigaction(SIGALRM, &alarm_action, NULL)) { - perror("sigaction"); + if (sigaction(SIGINT, &sig_action, NULL)) { + perror("sigaction (sigint)"); + test_error(name); + return 1; + } + + if (sigaction(SIGALRM, &sig_action, NULL)) { + perror("sigaction (sigalrm)"); test_error(name); return 1; } -- GitLab From 9b3fa26f0092061b0acd81cc294dd3083aa6dc45 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 7 Aug 2018 19:34:16 +0800 Subject: [PATCH 0159/2269] RDS: IB: fix 'passing zero to ERR_PTR()' warning [ Upstream commit 5941923da29e84bc9e2a1abb2c14fffaf8d71e2f ] Fix a static code checker warning: net/rds/ib_frmr.c:82 rds_ib_alloc_frmr() warn: passing zero to 'ERR_PTR' The error path for ib_alloc_mr failure should set err to PTR_ERR. Fixes: 1659185fb4d0 ("RDS: IB: Support Fastreg MR (FRMR) memory registration mode") Signed-off-by: YueHaibing Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/rds/ib_frmr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c index 48332a6ed7383..d290416e79e96 100644 --- a/net/rds/ib_frmr.c +++ b/net/rds/ib_frmr.c @@ -61,6 +61,7 @@ static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev, pool->fmr_attr.max_pages); if (IS_ERR(frmr->mr)) { pr_warn("RDS/IB: %s failed to allocate MR", __func__); + err = PTR_ERR(frmr->mr); goto out_no_cigar; } -- GitLab From 9dd38052a3eb5fda89c3b72c35d8ee25bd202b38 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 7 Aug 2018 16:17:29 -0700 Subject: [PATCH 0160/2269] cfq: Suppress compiler warnings about comparisons [ Upstream commit f7ecb1b109da1006a08d5675debe60990e824432 ] This patch does not change any functionality but avoids that gcc reports the following warnings when building with W=1: block/cfq-iosched.c: In function ?cfq_back_seek_max_store?: block/cfq-iosched.c:4741:13: warning: comparison of unsigned expression < 0 is always false [-Wtype-limits] if (__data < (MIN)) \ ^ block/cfq-iosched.c:4756:1: note: in expansion of macro ?STORE_FUNCTION? STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0); ^~~~~~~~~~~~~~ block/cfq-iosched.c: In function ?cfq_slice_idle_store?: block/cfq-iosched.c:4741:13: warning: comparison of unsigned expression < 0 is always false [-Wtype-limits] if (__data < (MIN)) \ ^ block/cfq-iosched.c:4759:1: note: in expansion of macro ?STORE_FUNCTION? STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1); ^~~~~~~~~~~~~~ block/cfq-iosched.c: In function ?cfq_group_idle_store?: block/cfq-iosched.c:4741:13: warning: comparison of unsigned expression < 0 is always false [-Wtype-limits] if (__data < (MIN)) \ ^ block/cfq-iosched.c:4760:1: note: in expansion of macro ?STORE_FUNCTION? STORE_FUNCTION(cfq_group_idle_store, &cfqd->cfq_group_idle, 0, UINT_MAX, 1); ^~~~~~~~~~~~~~ block/cfq-iosched.c: In function ?cfq_low_latency_store?: block/cfq-iosched.c:4741:13: warning: comparison of unsigned expression < 0 is always false [-Wtype-limits] if (__data < (MIN)) \ ^ block/cfq-iosched.c:4765:1: note: in expansion of macro ?STORE_FUNCTION? STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0); ^~~~~~~~~~~~~~ block/cfq-iosched.c: In function ?cfq_slice_idle_us_store?: block/cfq-iosched.c:4775:13: warning: comparison of unsigned expression < 0 is always false [-Wtype-limits] if (__data < (MIN)) \ ^ block/cfq-iosched.c:4782:1: note: in expansion of macro ?USEC_STORE_FUNCTION? USEC_STORE_FUNCTION(cfq_slice_idle_us_store, &cfqd->cfq_slice_idle, 0, UINT_MAX); ^~~~~~~~~~~~~~~~~~~ block/cfq-iosched.c: In function ?cfq_group_idle_us_store?: block/cfq-iosched.c:4775:13: warning: comparison of unsigned expression < 0 is always false [-Wtype-limits] if (__data < (MIN)) \ ^ block/cfq-iosched.c:4783:1: note: in expansion of macro ?USEC_STORE_FUNCTION? USEC_STORE_FUNCTION(cfq_group_idle_us_store, &cfqd->cfq_group_idle, 0, UINT_MAX); ^~~~~~~~~~~~~~~~~~~ Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- block/cfq-iosched.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 9f342ef1ad426..9c4f1c496c90c 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -4741,12 +4741,13 @@ USEC_SHOW_FUNCTION(cfq_target_latency_us_show, cfqd->cfq_target_latency); static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ { \ struct cfq_data *cfqd = e->elevator_data; \ - unsigned int __data; \ + unsigned int __data, __min = (MIN), __max = (MAX); \ + \ cfq_var_store(&__data, (page)); \ - if (__data < (MIN)) \ - __data = (MIN); \ - else if (__data > (MAX)) \ - __data = (MAX); \ + if (__data < __min) \ + __data = __min; \ + else if (__data > __max) \ + __data = __max; \ if (__CONV) \ *(__PTR) = (u64)__data * NSEC_PER_MSEC; \ else \ @@ -4775,12 +4776,13 @@ STORE_FUNCTION(cfq_target_latency_store, &cfqd->cfq_target_latency, 1, UINT_MAX, static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ { \ struct cfq_data *cfqd = e->elevator_data; \ - unsigned int __data; \ + unsigned int __data, __min = (MIN), __max = (MAX); \ + \ cfq_var_store(&__data, (page)); \ - if (__data < (MIN)) \ - __data = (MIN); \ - else if (__data > (MAX)) \ - __data = (MAX); \ + if (__data < __min) \ + __data = __min; \ + else if (__data > __max) \ + __data = __max; \ *(__PTR) = (u64)__data * NSEC_PER_USEC; \ return count; \ } -- GitLab From f6a01ab96067b686472762e6a026eb575bf96141 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 1 Aug 2018 00:56:12 -0500 Subject: [PATCH 0161/2269] smb3: fix reset of bytes read and written stats [ Upstream commit c281bc0c7412308c7ec0888904f7c99353da4796 ] echo 0 > /proc/fs/cifs/Stats is supposed to reset the stats but there were four (see example below) that were not reset (bytes read and witten, total vfs ops and max ops at one time). ... 0 session 0 share reconnects Total vfs operations: 100 maximum at one time: 2 1) \\localhost\test SMBs: 0 Bytes read: 502092 Bytes written: 31457286 TreeConnects: 0 total 0 failed TreeDisconnects: 0 total 0 failed ... This patch fixes cifs_stats_proc_write to properly reset those four. Signed-off-by: Steve French Reviewed-by: Aurelien Aptel Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifs_debug.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 53c9c49f0fbbd..2565cee702e48 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -289,6 +289,10 @@ static ssize_t cifs_stats_proc_write(struct file *file, atomic_set(&totBufAllocCount, 0); atomic_set(&totSmBufAllocCount, 0); #endif /* CONFIG_CIFS_STATS2 */ + spin_lock(&GlobalMid_Lock); + GlobalMaxActiveXid = 0; + GlobalCurrentXid = 0; + spin_unlock(&GlobalMid_Lock); spin_lock(&cifs_tcp_ses_lock); list_for_each(tmp1, &cifs_tcp_ses_list) { server = list_entry(tmp1, struct TCP_Server_Info, @@ -301,6 +305,10 @@ static ssize_t cifs_stats_proc_write(struct file *file, struct cifs_tcon, tcon_list); atomic_set(&tcon->num_smbs_sent, 0); + spin_lock(&tcon->stat_lock); + tcon->bytes_read = 0; + tcon->bytes_written = 0; + spin_unlock(&tcon->stat_lock); if (server->ops->clear_stats) server->ops->clear_stats(tcon); } -- GitLab From e5f0192fe22096be359c72e285633e0ac0e506e8 Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 23 Jul 2018 09:15:18 -0500 Subject: [PATCH 0162/2269] SMB3: Number of requests sent should be displayed for SMB3 not just CIFS [ Upstream commit 289131e1f1e6ad8c661ec05e176b8f0915672059 ] For SMB2/SMB3 the number of requests sent was not displayed in /proc/fs/cifs/Stats unless CONFIG_CIFS_STATS2 was enabled (only number of failed requests displayed). As with earlier dialects, we should be displaying these counters if CONFIG_CIFS_STATS is enabled. They are important for debugging. e.g. when you cat /proc/fs/cifs/Stats (before the patch) Resources in use CIFS Session: 1 Share (unique mount targets): 2 SMB Request/Response Buffer: 1 Pool size: 5 SMB Small Req/Resp Buffer: 1 Pool size: 30 Operations (MIDs): 0 0 session 0 share reconnects Total vfs operations: 690 maximum at one time: 2 1) \\localhost\test SMBs: 975 Negotiates: 0 sent 0 failed SessionSetups: 0 sent 0 failed Logoffs: 0 sent 0 failed TreeConnects: 0 sent 0 failed TreeDisconnects: 0 sent 0 failed Creates: 0 sent 2 failed Closes: 0 sent 0 failed Flushes: 0 sent 0 failed Reads: 0 sent 0 failed Writes: 0 sent 0 failed Locks: 0 sent 0 failed IOCTLs: 0 sent 1 failed Cancels: 0 sent 0 failed Echos: 0 sent 0 failed QueryDirectories: 0 sent 63 failed Signed-off-by: Steve French Reviewed-by: Aurelien Aptel Reviewed-by: Pavel Shilovsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index e317e9a400c14..58842b36481d7 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -393,7 +393,7 @@ small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon, pdu->hdr.smb2_buf_length = cpu_to_be32(total_len); if (tcon != NULL) { -#ifdef CONFIG_CIFS_STATS2 +#ifdef CONFIG_CIFS_STATS uint16_t com_code = le16_to_cpu(smb2_command); cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_sent[com_code]); #endif -- GitLab From 17b1473e6433ede7fc882df136144455139e2f5b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 15 Jul 2018 10:34:46 -0700 Subject: [PATCH 0163/2269] powerpc/platforms/85xx: fix t1042rdb_diu.c build errors & warning [ Upstream commit f5daf77a55ef0e695cc90c440ed6503073ac5e07 ] Fix build errors and warnings in t1042rdb_diu.c by adding header files and MODULE_LICENSE(). ../arch/powerpc/platforms/85xx/t1042rdb_diu.c:152:1: warning: data definition has no type or storage class early_initcall(t1042rdb_diu_init); ../arch/powerpc/platforms/85xx/t1042rdb_diu.c:152:1: error: type defaults to 'int' in declaration of 'early_initcall' [-Werror=implicit-int] ../arch/powerpc/platforms/85xx/t1042rdb_diu.c:152:1: warning: parameter names (without types) in function declaration and WARNING: modpost: missing MODULE_LICENSE() in arch/powerpc/platforms/85xx/t1042rdb_diu.o Signed-off-by: Randy Dunlap Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Scott Wood Cc: Kumar Gala Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/85xx/t1042rdb_diu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/platforms/85xx/t1042rdb_diu.c b/arch/powerpc/platforms/85xx/t1042rdb_diu.c index 58fa3d319f1c1..dac36ba82fea8 100644 --- a/arch/powerpc/platforms/85xx/t1042rdb_diu.c +++ b/arch/powerpc/platforms/85xx/t1042rdb_diu.c @@ -9,8 +9,10 @@ * option) any later version. */ +#include #include #include +#include #include #include @@ -150,3 +152,5 @@ static int __init t1042rdb_diu_init(void) } early_initcall(t1042rdb_diu_init); + +MODULE_LICENSE("GPL"); -- GitLab From f3c124c3077d91ed3651ef8cd5e5c53efc0efd65 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 26 Jul 2018 22:42:44 +1000 Subject: [PATCH 0164/2269] powerpc/64s: Make rfi_flush_fallback a little more robust [ Upstream commit 78ee9946371f5848ddfc88ab1a43867df8f17d83 ] Because rfi_flush_fallback runs immediately before the return to userspace it currently runs with the user r1 (stack pointer). This means if we oops in there we will report a bad kernel stack pointer in the exception entry path, eg: Bad kernel stack pointer 7ffff7150e40 at c0000000000023b4 Oops: Bad kernel stack pointer, sig: 6 [#1] LE SMP NR_CPUS=32 NUMA PowerNV Modules linked in: CPU: 0 PID: 1246 Comm: klogd Not tainted 4.18.0-rc2-gcc-7.3.1-00175-g0443f8a69ba3 #7 NIP: c0000000000023b4 LR: 0000000010053e00 CTR: 0000000000000040 REGS: c0000000fffe7d40 TRAP: 4100 Not tainted (4.18.0-rc2-gcc-7.3.1-00175-g0443f8a69ba3) MSR: 9000000002803031 CR: 44000442 XER: 20000000 CFAR: c00000000000bac8 IRQMASK: c0000000f1e66a80 GPR00: 0000000002000000 00007ffff7150e40 00007fff93a99900 0000000000000020 ... NIP [c0000000000023b4] rfi_flush_fallback+0x34/0x80 LR [0000000010053e00] 0x10053e00 Although the NIP tells us where we were, and the TRAP number tells us what happened, it would still be nicer if we could report the actual exception rather than barfing about the stack pointer. We an do that fairly simply by loading the kernel stack pointer on entry and restoring the user value before returning. That way we see a regular oops such as: Unrecoverable exception 4100 at c00000000000239c Oops: Unrecoverable exception, sig: 6 [#1] LE SMP NR_CPUS=32 NUMA PowerNV Modules linked in: CPU: 0 PID: 1251 Comm: klogd Not tainted 4.18.0-rc3-gcc-7.3.1-00097-g4ebfcac65acd-dirty #40 NIP: c00000000000239c LR: 0000000010053e00 CTR: 0000000000000040 REGS: c0000000f1e17bb0 TRAP: 4100 Not tainted (4.18.0-rc3-gcc-7.3.1-00097-g4ebfcac65acd-dirty) MSR: 9000000002803031 CR: 44000442 XER: 20000000 CFAR: c00000000000bac8 IRQMASK: 0 ... NIP [c00000000000239c] rfi_flush_fallback+0x3c/0x80 LR [0000000010053e00] 0x10053e00 Call Trace: [c0000000f1e17e30] [c00000000000b9e4] system_call+0x5c/0x70 (unreliable) Note this shouldn't make the kernel stack pointer vulnerable to a meltdown attack, because it should be flushed from the cache before we return to userspace. The user r1 value will be in the cache, because we load it in the return path, but that is harmless. Signed-off-by: Michael Ellerman Reviewed-by: Nicholas Piggin Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/exceptions-64s.S | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index c09f0a6f84954..f65bb53df43bf 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1452,6 +1452,8 @@ TRAMP_REAL_BEGIN(stf_barrier_fallback) TRAMP_REAL_BEGIN(rfi_flush_fallback) SET_SCRATCH0(r13); GET_PACA(r13); + std r1,PACA_EXRFI+EX_R12(r13) + ld r1,PACAKSAVE(r13) std r9,PACA_EXRFI+EX_R9(r13) std r10,PACA_EXRFI+EX_R10(r13) std r11,PACA_EXRFI+EX_R11(r13) @@ -1486,12 +1488,15 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback) ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) ld r11,PACA_EXRFI+EX_R11(r13) + ld r1,PACA_EXRFI+EX_R12(r13) GET_SCRATCH0(r13); rfid TRAMP_REAL_BEGIN(hrfi_flush_fallback) SET_SCRATCH0(r13); GET_PACA(r13); + std r1,PACA_EXRFI+EX_R12(r13) + ld r1,PACAKSAVE(r13) std r9,PACA_EXRFI+EX_R9(r13) std r10,PACA_EXRFI+EX_R10(r13) std r11,PACA_EXRFI+EX_R11(r13) @@ -1526,6 +1531,7 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) ld r11,PACA_EXRFI+EX_R11(r13) + ld r1,PACA_EXRFI+EX_R12(r13) GET_SCRATCH0(r13); hrfid -- GitLab From 92ef9425c6e121b01e734f512e20d699370b04a6 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 4 Jul 2018 23:27:02 +0530 Subject: [PATCH 0165/2269] powerpc/pseries: Avoid using the size greater than RTAS_ERROR_LOG_MAX. [ Upstream commit 74e96bf44f430cf7a01de19ba6cf49b361cdfd6e ] The global mce data buffer that used to copy rtas error log is of 2048 (RTAS_ERROR_LOG_MAX) bytes in size. Before the copy we read extended_log_length from rtas error log header, then use max of extended_log_length and RTAS_ERROR_LOG_MAX as a size of data to be copied. Ideally the platform (phyp) will never send extended error log with size > 2048. But if that happens, then we have a risk of buffer overrun and corruption. Fix this by using min_t instead. Fixes: d368514c3097 ("powerpc: Fix corruption when grabbing FWNMI data") Reported-by: Michal Suchanek Signed-off-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/ras.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 2edc673be1374..99d1152ae2241 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -371,7 +371,7 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) int len, error_log_length; error_log_length = 8 + rtas_error_extended_log_length(h); - len = max_t(int, error_log_length, RTAS_ERROR_LOG_MAX); + len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX); memset(global_mce_data_buf, 0, RTAS_ERROR_LOG_MAX); memcpy(global_mce_data_buf, h, len); errhdr = (struct rtas_error_log *)global_mce_data_buf; -- GitLab From 0aa49a4ddc8e10c7c924ad72d6c00346f421afad Mon Sep 17 00:00:00 2001 From: Levin Du Date: Sat, 4 Aug 2018 15:31:02 +0800 Subject: [PATCH 0166/2269] clk: rockchip: Add pclk_rkpwm_pmu to PMU critical clocks in rk3399 [ Upstream commit 640332d1a089909df08bc9f3e42888a2019c66e2 ] PWM2 is commonly used to control voltage of PWM regulator of VDD_LOG in RK3399. On the Firefly-RK3399 board, PWM2 outputs 40 KHz square wave from power on and the VDD_LOG is about 0.9V. When the kernel boots normally into the system, the PWM2 keeps outputing PWM signal. But the kernel hangs randomly after "Starting kernel ..." line on that board. When it happens, PWM2 outputs high level which causes VDD_LOG drops to 0.4V below the normal operating voltage. By adding "pclk_rkpwm_pmu" to the rk3399_pmucru_critical_clocks array, PWM clock is ensured to be prepared at startup and the PWM2 output is normal. After repeated tests, the early boot hang is gone. This patch works on both Firefly-RK3399 and ROC-RK3399-PC boards. Signed-off-by: Levin Du Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/clk/rockchip/clk-rk3399.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c index 62d0a69f8da01..3acf5f041e3c3 100644 --- a/drivers/clk/rockchip/clk-rk3399.c +++ b/drivers/clk/rockchip/clk-rk3399.c @@ -1522,6 +1522,7 @@ static const char *const rk3399_pmucru_critical_clocks[] __initconst = { "pclk_pmu_src", "fclk_cm0s_src_pmu", "clk_timer_src_pmu", + "pclk_rkpwm_pmu", }; static void __init rk3399_clk_init(struct device_node *np) -- GitLab From 1187e0186d81f5ae0018f75acf8df979b86095a1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 23 Jul 2018 12:32:42 -0700 Subject: [PATCH 0167/2269] KVM: vmx: track host_state.loaded using a loaded_vmcs pointer [ Upstream commit bd9966de4e14fb559e89a06f7f5c9aab2cc028b9 ] Using 'struct loaded_vmcs*' to track whether the CPU registers contain host or guest state kills two birds with one stone. 1. The (effective) boolean host_state.loaded is poorly named. It does not track whether or not host state is loaded into the CPU registers (which most readers would expect), but rather tracks if host state has been saved AND guest state is loaded. 2. Using a loaded_vmcs pointer provides a more robust framework for the optimized guest/host state switching, especially when consideration per-VMCS enhancements. To that end, WARN_ONCE if we try to switch to host state with a different VMCS than was last used to save host state. Resolve an occurrence of the new WARN by setting loaded_vmcs after the call to vmx_vcpu_put() in vmx_switch_vmcs(). Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a466ee14ad41b..a60d8aafe4214 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -749,17 +749,21 @@ struct vcpu_vmx { /* * loaded_vmcs points to the VMCS currently used in this vcpu. For a * non-nested (L1) guest, it always points to vmcs01. For a nested - * guest (L2), it points to a different VMCS. + * guest (L2), it points to a different VMCS. loaded_cpu_state points + * to the VMCS whose state is loaded into the CPU registers that only + * need to be switched when transitioning to/from the kernel; a NULL + * value indicates that host state is loaded. */ struct loaded_vmcs vmcs01; struct loaded_vmcs *loaded_vmcs; + struct loaded_vmcs *loaded_cpu_state; bool __launched; /* temporary, used in vmx_vcpu_run */ struct msr_autoload { struct vmx_msrs guest; struct vmx_msrs host; } msr_autoload; + struct { - int loaded; u16 fs_sel, gs_sel, ldt_sel; #ifdef CONFIG_X86_64 u16 ds_sel, es_sel; @@ -2336,10 +2340,11 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); int i; - if (vmx->host_state.loaded) + if (vmx->loaded_cpu_state) return; - vmx->host_state.loaded = 1; + vmx->loaded_cpu_state = vmx->loaded_vmcs; + /* * Set host fs and gs selectors. Unfortunately, 22.2.3 does not * allow segment selectors with cpl > 0 or ti == 1. @@ -2390,11 +2395,14 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) static void __vmx_load_host_state(struct vcpu_vmx *vmx) { - if (!vmx->host_state.loaded) + if (!vmx->loaded_cpu_state) return; + WARN_ON_ONCE(vmx->loaded_cpu_state != vmx->loaded_vmcs); + ++vmx->vcpu.stat.host_state_reload; - vmx->host_state.loaded = 0; + vmx->loaded_cpu_state = NULL; + #ifdef CONFIG_X86_64 if (is_long_mode(&vmx->vcpu)) rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); @@ -9944,8 +9952,8 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) return; cpu = get_cpu(); - vmx->loaded_vmcs = vmcs; vmx_vcpu_put(vcpu); + vmx->loaded_vmcs = vmcs; vmx_vcpu_load(vcpu, cpu); vcpu->cpu = cpu; put_cpu(); -- GitLab From 80f97c79f3e1ce183ed5cc1cf08e01cb85999219 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Fri, 27 Jul 2018 09:18:50 -0700 Subject: [PATCH 0168/2269] kvm: nVMX: Fix fault vector for VMX operation at CPL > 0 [ Upstream commit 36090bf43a6b835a42f515cb515ff6fa293a25fe ] The fault that should be raised for a privilege level violation is #GP rather than #UD. Fixes: 727ba748e110b4 ("kvm: nVMX: Enforce cpl=0 for VMX instructions") Signed-off-by: Jim Mattson Reviewed-by: David Hildenbrand Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a60d8aafe4214..4e5a8e30cc4e8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7590,7 +7590,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) /* CPL=0 must be checked manually. */ if (vmx_get_cpl(vcpu)) { - kvm_queue_exception(vcpu, UD_VECTOR); + kvm_inject_gp(vcpu, 0); return 1; } @@ -7654,7 +7654,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) static int nested_vmx_check_permission(struct kvm_vcpu *vcpu) { if (vmx_get_cpl(vcpu)) { - kvm_queue_exception(vcpu, UD_VECTOR); + kvm_inject_gp(vcpu, 0); return 0; } -- GitLab From 145b1f56b99238ec37dfa2b0e9dec039d4d1d2cd Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 1 Aug 2018 10:37:20 +0800 Subject: [PATCH 0169/2269] btrfs: Exit gracefully when chunk map cannot be inserted to the tree [ Upstream commit 64f64f43c89aca1782aa672e0586f6903c5d8979 ] It's entirely possible that a crafted btrfs image contains overlapping chunks. Although we can't detect such problem by tree-checker, it's not a catastrophic problem, current extent map can already detect such problem and return -EEXIST. We just only need to exit gracefully and fail the mount. Reported-by: Xu Wen Link: https://bugzilla.kernel.org/show_bug.cgi?id=200409 Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/volumes.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 08afafb6ecf76..a39b1f0b06066 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6492,10 +6492,14 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, write_lock(&map_tree->map_tree.lock); ret = add_extent_mapping(&map_tree->map_tree, em, 0); write_unlock(&map_tree->map_tree.lock); - BUG_ON(ret); /* Tree corruption */ + if (ret < 0) { + btrfs_err(fs_info, + "failed to add chunk map, start=%llu len=%llu: %d", + em->start, em->len, ret); + } free_extent_map(em); - return 0; + return ret; } static void fill_device_from_item(struct extent_buffer *leaf, -- GitLab From 2f92584bf1f679ea5111625277d837f48ae33386 Mon Sep 17 00:00:00 2001 From: Misono Tomohiro Date: Tue, 31 Jul 2018 16:20:21 +0900 Subject: [PATCH 0170/2269] btrfs: replace: Reset on-disk dev stats value after replace [ Upstream commit 1e7e1f9e3aba00c9b9c323bfeeddafe69ff21ff6 ] on-disk devs stats value is updated in btrfs_run_dev_stats(), which is called during commit transaction, if device->dev_stats_ccnt is not zero. Since current replace operation does not touch dev_stats_ccnt, on-disk dev stats value is not updated. Therefore "btrfs device stats" may return old device's value after umount/mount (Example: See "btrfs ins dump-t -t DEV $DEV" after btrfs/100 finish). Fix this by just incrementing dev_stats_ccnt in btrfs_dev_replace_finishing() when replace is succeeded and this will update the values. Signed-off-by: Misono Tomohiro Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/dev-replace.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 7c655f9a7a504..dd80a1bdf9e27 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -588,6 +588,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, btrfs_rm_dev_replace_unblocked(fs_info); + /* + * Increment dev_stats_ccnt so that btrfs_run_dev_stats() will + * update on-disk dev stats value during commit transaction + */ + atomic_inc(&tgt_device->dev_stats_ccnt); + /* * this is again a consistent state where no dev_replace procedure * is running, the target device is part of the filesystem, the -- GitLab From 0cdbc3faf960de16ebe8a427feb3b0544ad983cc Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 3 Jul 2018 17:10:07 +0800 Subject: [PATCH 0171/2269] btrfs: relocation: Only remove reloc rb_trees if reloc control has been initialized [ Upstream commit 389305b2aa68723c754f88d9dbd268a400e10664 ] Invalid reloc tree can cause kernel NULL pointer dereference when btrfs does some cleanup of the reloc roots. It turns out that fs_info::reloc_ctl can be NULL in btrfs_recover_relocation() as we allocate relocation control after all reloc roots have been verified. So when we hit: note, we haven't called set_reloc_control() thus fs_info::reloc_ctl is still NULL. Link: https://bugzilla.kernel.org/show_bug.cgi?id=199833 Reported-by: Xu Wen Signed-off-by: Qu Wenruo Tested-by: Gu Jinxiang Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/relocation.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 9841faef08ea7..b80b03e0c5d3f 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1334,18 +1334,19 @@ static void __del_reloc_root(struct btrfs_root *root) struct mapping_node *node = NULL; struct reloc_control *rc = fs_info->reloc_ctl; - spin_lock(&rc->reloc_root_tree.lock); - rb_node = tree_search(&rc->reloc_root_tree.rb_root, - root->node->start); - if (rb_node) { - node = rb_entry(rb_node, struct mapping_node, rb_node); - rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); + if (rc) { + spin_lock(&rc->reloc_root_tree.lock); + rb_node = tree_search(&rc->reloc_root_tree.rb_root, + root->node->start); + if (rb_node) { + node = rb_entry(rb_node, struct mapping_node, rb_node); + rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); + } + spin_unlock(&rc->reloc_root_tree.lock); + if (!node) + return; + BUG_ON((struct btrfs_root *)node->data != root); } - spin_unlock(&rc->reloc_root_tree.lock); - - if (!node) - return; - BUG_ON((struct btrfs_root *)node->data != root); spin_lock(&fs_info->trans_lock); list_del_init(&root->root_list); -- GitLab From 33f4c5c680447e8eae9b90c649915ce618a22ad0 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 22 Jun 2018 12:35:00 +0800 Subject: [PATCH 0172/2269] btrfs: Don't remove block group that still has pinned down bytes [ Upstream commit 43794446548730ac8461be30bbe47d5d027d1d16 ] [BUG] Under certain KVM load and LTP tests, it is possible to hit the following calltrace if quota is enabled: BTRFS critical (device vda2): unable to find logical 8820195328 length 4096 BTRFS critical (device vda2): unable to find logical 8820195328 length 4096 WARNING: CPU: 0 PID: 49 at ../block/blk-core.c:172 blk_status_to_errno+0x1a/0x30 CPU: 0 PID: 49 Comm: kworker/u2:1 Not tainted 4.12.14-15-default #1 SLE15 (unreleased) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014 Workqueue: btrfs-endio-write btrfs_endio_write_helper [btrfs] task: ffff9f827b340bc0 task.stack: ffffb4f8c0304000 RIP: 0010:blk_status_to_errno+0x1a/0x30 Call Trace: submit_extent_page+0x191/0x270 [btrfs] ? btrfs_create_repair_bio+0x130/0x130 [btrfs] __do_readpage+0x2d2/0x810 [btrfs] ? btrfs_create_repair_bio+0x130/0x130 [btrfs] ? run_one_async_done+0xc0/0xc0 [btrfs] __extent_read_full_page+0xe7/0x100 [btrfs] ? run_one_async_done+0xc0/0xc0 [btrfs] read_extent_buffer_pages+0x1ab/0x2d0 [btrfs] ? run_one_async_done+0xc0/0xc0 [btrfs] btree_read_extent_buffer_pages+0x94/0xf0 [btrfs] read_tree_block+0x31/0x60 [btrfs] read_block_for_search.isra.35+0xf0/0x2e0 [btrfs] btrfs_search_slot+0x46b/0xa00 [btrfs] ? kmem_cache_alloc+0x1a8/0x510 ? btrfs_get_token_32+0x5b/0x120 [btrfs] find_parent_nodes+0x11d/0xeb0 [btrfs] ? leaf_space_used+0xb8/0xd0 [btrfs] ? btrfs_leaf_free_space+0x49/0x90 [btrfs] ? btrfs_find_all_roots_safe+0x93/0x100 [btrfs] btrfs_find_all_roots_safe+0x93/0x100 [btrfs] btrfs_find_all_roots+0x45/0x60 [btrfs] btrfs_qgroup_trace_extent_post+0x20/0x40 [btrfs] btrfs_add_delayed_data_ref+0x1a3/0x1d0 [btrfs] btrfs_alloc_reserved_file_extent+0x38/0x40 [btrfs] insert_reserved_file_extent.constprop.71+0x289/0x2e0 [btrfs] btrfs_finish_ordered_io+0x2f4/0x7f0 [btrfs] ? pick_next_task_fair+0x2cd/0x530 ? __switch_to+0x92/0x4b0 btrfs_worker_helper+0x81/0x300 [btrfs] process_one_work+0x1da/0x3f0 worker_thread+0x2b/0x3f0 ? process_one_work+0x3f0/0x3f0 kthread+0x11a/0x130 ? kthread_create_on_node+0x40/0x40 ret_from_fork+0x35/0x40 BTRFS critical (device vda2): unable to find logical 8820195328 length 16384 BTRFS: error (device vda2) in btrfs_finish_ordered_io:3023: errno=-5 IO failure BTRFS info (device vda2): forced readonly BTRFS error (device vda2): pending csums is 2887680 [CAUSE] It's caused by race with block group auto removal: - There is a meta block group X, which has only one tree block The tree block belongs to fs tree 257. - In current transaction, some operation modified fs tree 257 The tree block gets COWed, so the block group X is empty, and marked as unused, queued to be deleted. - Some workload (like fsync) wakes up cleaner_kthread() Which will call btrfs_delete_unused_bgs() to remove unused block groups. So block group X along its chunk map get removed. - Some delalloc work finished for fs tree 257 Quota needs to get the original reference of the extent, which will read tree blocks of commit root of 257. Then since the chunk map gets removed, the above warning gets triggered. [FIX] Just let btrfs_delete_unused_bgs() skip block group which still has pinned bytes. However there is a minor side effect: currently we only queue empty blocks at update_block_group(), and such empty block group with pinned bytes won't go through update_block_group() again, such block group won't be removed, until it gets new extent allocated and removed. Signed-off-by: Qu Wenruo Reviewed-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index bbabe37c2e8cd..f96f72659693a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10757,7 +10757,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) /* Don't want to race with allocators so take the groups_sem */ down_write(&space_info->groups_sem); spin_lock(&block_group->lock); - if (block_group->reserved || + if (block_group->reserved || block_group->pinned || btrfs_block_group_used(&block_group->item) || block_group->ro || list_is_singular(&block_group->list)) { -- GitLab From 1f62d35cd2e176d26a2fd9c67e0b60ee9da71239 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 24 Aug 2018 16:06:35 +0100 Subject: [PATCH 0173/2269] arm64: rockchip: Force CONFIG_PM on Rockchip systems [ Upstream commit 7db7a8f5638a2ffe0c0c0d55b5186b6191fd6af7 ] A number of the Rockchip-specific drivers (IOMMU, display controllers) are now assuming that CONFIG_PM is set, and may completely misbehave if that's not the case. Since there is hardly any reason for this configuration option not to be selected anyway, let's require it (in the same way Tegra already does). Signed-off-by: Marc Zyngier Signed-off-by: Olof Johansson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Kconfig.platforms | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 6b54ee8c1262d..456d215422502 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -148,6 +148,7 @@ config ARCH_ROCKCHIP select GPIOLIB select PINCTRL select PINCTRL_ROCKCHIP + select PM select ROCKCHIP_TIMER help This enables support for the ARMv8 based Rockchip chipsets, -- GitLab From a35d3352434d6136b7ad8933bbcc2441cb9f2007 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 24 Aug 2018 16:06:34 +0100 Subject: [PATCH 0174/2269] ARM: rockchip: Force CONFIG_PM on Rockchip systems [ Upstream commit d1558dfd9f22c99a5b8e1354ad881ee40749da89 ] A number of the Rockchip-specific drivers (IOMMU, display controllers) are now assuming that CONFIG_PM is set, and may completely misbehave if that's not the case. Since there is hardly any reason for this configuration option not to be selected anyway, let's require it (in the same way Tegra already does). Signed-off-by: Marc Zyngier Signed-off-by: Olof Johansson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-rockchip/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-rockchip/Kconfig b/arch/arm/mach-rockchip/Kconfig index a4065966881ae..57f0bc4cd9b89 100644 --- a/arch/arm/mach-rockchip/Kconfig +++ b/arch/arm/mach-rockchip/Kconfig @@ -18,6 +18,7 @@ config ARCH_ROCKCHIP select ARM_GLOBAL_TIMER select CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK select ZONE_DMA if ARM_LPAE + select PM help Support for Rockchip's Cortex-A9 Single-to-Quad-Core-SoCs containing the RK2928, RK30xx and RK31xx series. -- GitLab From 7e1a6951ca99e78a7d8f3b3c77c6bbaef2462bf8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Aug 2018 15:04:16 +0100 Subject: [PATCH 0175/2269] drm/i915/lpe: Mark LPE audio runtime pm as "no callbacks" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 75eef0f1ed478284911b8723a5bdb659499a7aac upstream. The LPE audio is a child device of i915, it is powered up and down alongside the igfx and presents no independent runtime interface. This aptly fulfils the description of a "No-Callback" Device, so mark it thus. Fixes: 183c00350ccd ("drm/i915: Fix runtime PM for LPE audio") Testcase: igt/pm_rpm/basic-pci-d3-state Testcase: igt/pm_rpm/basic-rte Signed-off-by: Chris Wilson Cc: Takashi Iwai Cc: Pierre-Louis Bossart Cc: Ville Syrjälä Cc: stable@vger.kernel.org Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20180802140416.6062-1-chris@chris-wilson.co.uk (cherry picked from commit 46e831abe864a6b59fa3de253a681c0f2ee1bf2f) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_lpe_audio.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index 2fdf302ebdad0..8a541d0e3e80d 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -128,9 +128,7 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv) kfree(rsc); - pm_runtime_forbid(&platdev->dev); - pm_runtime_set_active(&platdev->dev); - pm_runtime_enable(&platdev->dev); + pm_runtime_no_callbacks(&platdev->dev); return platdev; -- GitLab From c46a1b2622fa12f328ebb3f62072a04c4bf7100e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Thu, 19 Jul 2018 18:33:39 +0200 Subject: [PATCH 0176/2269] drm/amdgpu: Fix RLC safe mode test in gfx_v9_0_enter_rlc_safe_mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 226127a67e31a9518d9516d3e4890759b379d874 upstream. We were testing the register offset, instead of the value stored in the register, therefore always timing out the loop. This reduces suspend time of the system in the bug report below by ~600 ms. Cc: stable@vger.kernel.org Bugzilla: https://bugs.freedesktop.org/107277 Tested-by: Paul Menzel Reviewed-by: Alex Deucher Reviewed-by: Junwei Zhang Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 1a30c54a0889f..3981915e2311f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3113,7 +3113,7 @@ static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device *adev) /* wait for RLC_SAFE_MODE */ for (i = 0; i < adev->usec_timeout; i++) { - if (!REG_GET_FIELD(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) + if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) break; udelay(1); } -- GitLab From d991f8ae4071e435b0fed4a4313822be6f09e417 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Fri, 20 Jul 2018 16:26:46 +0800 Subject: [PATCH 0177/2269] drm/amd/pp/Polaris12: Fix a chunk of registers missed to program commit 2d227ec2c11c568910299e8f913bac2dda47397c upstream. DIDTConfig_Polaris12[] table missed a big chunk of data. Pointed by aidan.fabius Reviewed-by: Alex Deucher Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- .../drm/amd/powerplay/hwmgr/smu7_powertune.c | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c index 1dc31aa727817..12856de09f572 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c @@ -403,6 +403,49 @@ static const struct gpu_pt_config_reg DIDTConfig_Polaris12[] = { { ixDIDT_SQ_CTRL1, DIDT_SQ_CTRL1__MAX_POWER_MASK, DIDT_SQ_CTRL1__MAX_POWER__SHIFT, 0xffff, GPU_CONFIGREG_DIDT_IND }, { ixDIDT_SQ_CTRL_OCP, DIDT_SQ_CTRL_OCP__UNUSED_0_MASK, DIDT_SQ_CTRL_OCP__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL_OCP, DIDT_SQ_CTRL_OCP__OCP_MAX_POWER_MASK, DIDT_SQ_CTRL_OCP__OCP_MAX_POWER__SHIFT, 0xffff, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__MAX_POWER_DELTA_MASK, DIDT_SQ_CTRL2__MAX_POWER_DELTA__SHIFT, 0x3853, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__UNUSED_0_MASK, DIDT_SQ_CTRL2__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK, DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT, 0x005a, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__UNUSED_1_MASK, DIDT_SQ_CTRL2__UNUSED_1__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK, DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__UNUSED_2_MASK, DIDT_SQ_CTRL2__UNUSED_2__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_SQ_STALL_CTRL, DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK, DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_STALL_CTRL, DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK, DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_STALL_CTRL, DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK, DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_STALL_CTRL, DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK, DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT, 0x0ebb, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_STALL_CTRL, DIDT_SQ_STALL_CTRL__UNUSED_0_MASK, DIDT_SQ_STALL_CTRL__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_SQ_TUNING_CTRL, DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK, DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_TUNING_CTRL, DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK, DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT, 0x3853, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_TUNING_CTRL, DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK, DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT, 0x3153, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_TUNING_CTRL, DIDT_SQ_TUNING_CTRL__UNUSED_0_MASK, DIDT_SQ_TUNING_CTRL__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_CTRL_EN_MASK, DIDT_SQ_CTRL0__DIDT_CTRL_EN__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__USE_REF_CLOCK_MASK, DIDT_SQ_CTRL0__USE_REF_CLOCK__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__PHASE_OFFSET_MASK, DIDT_SQ_CTRL0__PHASE_OFFSET__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_CTRL_RST_MASK, DIDT_SQ_CTRL0__DIDT_CTRL_RST__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK, DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK, DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT, 0x0010, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK, DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT, 0x0010, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__UNUSED_0_MASK, DIDT_SQ_CTRL0__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TD_WEIGHT0_3, DIDT_TD_WEIGHT0_3__WEIGHT0_MASK, DIDT_TD_WEIGHT0_3__WEIGHT0__SHIFT, 0x000a, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_WEIGHT0_3, DIDT_TD_WEIGHT0_3__WEIGHT1_MASK, DIDT_TD_WEIGHT0_3__WEIGHT1__SHIFT, 0x0010, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_WEIGHT0_3, DIDT_TD_WEIGHT0_3__WEIGHT2_MASK, DIDT_TD_WEIGHT0_3__WEIGHT2__SHIFT, 0x0017, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_WEIGHT0_3, DIDT_TD_WEIGHT0_3__WEIGHT3_MASK, DIDT_TD_WEIGHT0_3__WEIGHT3__SHIFT, 0x002f, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TD_WEIGHT4_7, DIDT_TD_WEIGHT4_7__WEIGHT4_MASK, DIDT_TD_WEIGHT4_7__WEIGHT4__SHIFT, 0x0046, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_WEIGHT4_7, DIDT_TD_WEIGHT4_7__WEIGHT5_MASK, DIDT_TD_WEIGHT4_7__WEIGHT5__SHIFT, 0x005d, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_WEIGHT4_7, DIDT_TD_WEIGHT4_7__WEIGHT6_MASK, DIDT_TD_WEIGHT4_7__WEIGHT6__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_WEIGHT4_7, DIDT_TD_WEIGHT4_7__WEIGHT7_MASK, DIDT_TD_WEIGHT4_7__WEIGHT7__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TD_CTRL1, DIDT_TD_CTRL1__MIN_POWER_MASK, DIDT_TD_CTRL1__MIN_POWER__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL1, DIDT_TD_CTRL1__MAX_POWER_MASK, DIDT_TD_CTRL1__MAX_POWER__SHIFT, 0xffff, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TD_CTRL_OCP, DIDT_TD_CTRL_OCP__UNUSED_0_MASK, DIDT_TD_CTRL_OCP__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, { ixDIDT_TD_CTRL_OCP, DIDT_TD_CTRL_OCP__OCP_MAX_POWER_MASK, DIDT_TD_CTRL_OCP__OCP_MAX_POWER__SHIFT, 0x00ff, GPU_CONFIGREG_DIDT_IND }, { ixDIDT_TD_CTRL2, DIDT_TD_CTRL2__MAX_POWER_DELTA_MASK, DIDT_TD_CTRL2__MAX_POWER_DELTA__SHIFT, 0x3fff, GPU_CONFIGREG_DIDT_IND }, -- GitLab From f40ecf3fe04a080a9de1231592f90ddb01de702a Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 23 Aug 2018 05:53:32 +0000 Subject: [PATCH 0178/2269] drm/edid: Add 6 bpc quirk for SDC panel in Lenovo B50-80 commit 25da75043f8690fd083878447c91f289dfb63b87 upstream. Another panel that reports "DFP 1.x compliant TMDS" but it supports 6bpc instead of 8 bpc. Apply 6 bpc quirk for the panel to fix it. BugLink: https://bugs.launchpad.net/bugs/1788308 Cc: # v4.8+ Signed-off-by: Kai-Heng Feng Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20180823055332.7723-1-kai.heng.feng@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_edid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 1f1fd3139c5b8..c29dea8956056 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -114,6 +114,9 @@ static const struct edid_quirk { /* CPT panel of Asus UX303LA reports 8 bpc, but is a 6 bpc panel */ { "CPT", 0x17df, EDID_QUIRK_FORCE_6BPC }, + /* SDC panel of Lenovo B50-80 reports 8 bpc, but is a 6 bpc panel */ + { "SDC", 0x3652, EDID_QUIRK_FORCE_6BPC }, + /* Belinea 10 15 55 */ { "MAX", 1516, EDID_QUIRK_PREFER_LARGE_60 }, { "MAX", 0x77e, EDID_QUIRK_PREFER_LARGE_60 }, -- GitLab From 65d20e40552f5a1f13540ef33d47ec95b22719f3 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Fri, 10 Aug 2018 00:31:39 +0800 Subject: [PATCH 0179/2269] drm/amdgpu: update tmr mc address commit 435198f33b56d7b875a8173a0227ddf0de285aa1 upstream. Update tmr mc address with firmware loading address which is returned from PSP firmware Signed-off-by: James Zhu Reviewed-by: Alex Deucher Acked-by: Huang Rui Reviewed-by: Likun Gao Signed-off-by: Likun Gao Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 8c2204c7b3847..7ad8fa891ce6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -134,6 +134,11 @@ psp_cmd_submit_buf(struct psp_context *psp, msleep(1); } + if (ucode) { + ucode->tmr_mc_addr_lo = psp->cmd_buf_mem->resp.fw_addr_lo; + ucode->tmr_mc_addr_hi = psp->cmd_buf_mem->resp.fw_addr_hi; + } + return ret; } -- GitLab From 997157c4eb3883dba69e5a36c9b5319dfa7666c5 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Fri, 10 Aug 2018 00:31:38 +0800 Subject: [PATCH 0180/2269] drm/amdgpu:add tmr mc address into amdgpu_firmware_info commit abf412b3efb2f943d9b98a489e9aca836be21333 upstream. amdgpu IP blocks booting need Trust Memory Region(tmr) mc address of its firmware which is loaded by PSP Signed-off-by: James Zhu Reviewed-by: Alex Deucher Acked-by: Huang Rui Reviewed-by: Likun Gao Signed-off-by: Likun Gao Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 30b5500dc1521..2526271f3e2b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -204,6 +204,9 @@ struct amdgpu_firmware_info { void *kaddr; /* ucode_size_bytes */ uint32_t ucode_size; + /* starting tmr mc address */ + uint32_t tmr_mc_addr_lo; + uint32_t tmr_mc_addr_hi; }; void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr); -- GitLab From d47d14de2438f007df90c401dc252d04d91ba0bf Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Fri, 10 Aug 2018 00:31:40 +0800 Subject: [PATCH 0181/2269] drm/amdgpu:add new firmware id for VCN commit c9ca989696ff28ffb015cc2b7c5577938ef2626c upstream. Add the new firmware id for VCN into the enum Signed-off-by: James Zhu Reviewed-by: Alex Deucher Acked-by: Huang Rui Reviewed-by: Likun Gao Signed-off-by: Likun Gao Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 2526271f3e2b4..fa7b25e1e5d29 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -172,6 +172,7 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_SMC, AMDGPU_UCODE_ID_UVD, AMDGPU_UCODE_ID_VCE, + AMDGPU_UCODE_ID_VCN, AMDGPU_UCODE_ID_MAXIMUM, }; -- GitLab From 4a4afcf76c96a0cc345c8ab4a6918006d49b9aaa Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Fri, 10 Aug 2018 00:31:41 +0800 Subject: [PATCH 0182/2269] drm/amdgpu:add VCN support in PSP driver commit 235ac9de625a0a586093ad81b3de6f7d7ab913ed upstream. Add VCN support in PSP driver Signed-off-by: James Zhu Reviewed-by: Alex Deucher Acked-by: Huang Rui Reviewed-by: Likun Gao Signed-off-by: Likun Gao Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/psp_v10_0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index f7cf994b1da28..86db90ff693a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -78,6 +78,9 @@ psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type * case AMDGPU_UCODE_ID_VCE: *type = GFX_FW_TYPE_VCE; break; + case AMDGPU_UCODE_ID_VCN: + *type = GFX_FW_TYPE_VCN; + break; case AMDGPU_UCODE_ID_MAXIMUM: default: return -EINVAL; -- GitLab From 589a8eafc74e0c7c21a822cdde868d46a3850b01 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Fri, 10 Aug 2018 00:31:42 +0800 Subject: [PATCH 0183/2269] drm/amdgpu:add VCN booting with firmware loaded by PSP commit 4d77c0f676e910fb1f1870738aa4bd168f253621 upstream. Setup psp firmware loading for VCN, and make VCN block booting from tmr mac address. Signed-off-by: James Zhu Reviewed-by: Alex Deucher Acked-by: Huang Rui Reviewed-by: Likun Gao Signed-off-by: Likun Gao Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 17 ++++++----- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 40 +++++++++++++++++++------ 2 files changed, 41 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 308a9755eae39..1612d8aa6ad60 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -93,9 +93,10 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) version_major, version_minor, family_id); - bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8) - + AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE + bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE + AMDGPU_VCN_SESSION_SIZE * 40; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) + bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo, &adev->vcn.gpu_addr, &adev->vcn.cpu_addr); @@ -191,11 +192,13 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev) unsigned offset; hdr = (const struct common_firmware_header *)adev->vcn.fw->data; - offset = le32_to_cpu(hdr->ucode_array_offset_bytes); - memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset, - le32_to_cpu(hdr->ucode_size_bytes)); - size -= le32_to_cpu(hdr->ucode_size_bytes); - ptr += le32_to_cpu(hdr->ucode_size_bytes); + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + offset = le32_to_cpu(hdr->ucode_array_offset_bytes); + memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset, + le32_to_cpu(hdr->ucode_size_bytes)); + size -= le32_to_cpu(hdr->ucode_size_bytes); + ptr += le32_to_cpu(hdr->ucode_size_bytes); + } memset_io(ptr, 0, size); } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index a098712bdd2ff..f7b8caccab9fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -91,6 +91,16 @@ static int vcn_v1_0_sw_init(void *handle) if (r) return r; + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + const struct common_firmware_header *hdr; + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); + DRM_INFO("PSP loading VCN firmware\n"); + } + r = amdgpu_vcn_resume(adev); if (r) return r; @@ -248,26 +258,38 @@ static int vcn_v1_0_resume(void *handle) static void vcn_v1_0_mc_resume(struct amdgpu_device *adev) { uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); - - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + uint32_t offset; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi)); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0); + offset = 0; + } else { + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, lower_32_bits(adev->vcn.gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, upper_32_bits(adev->vcn.gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, - AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + offset = size; + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + size)); + lower_32_bits(adev->vcn.gpu_addr + offset)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + size)); + upper_32_bits(adev->vcn.gpu_addr + offset)); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_HEAP_SIZE); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + size + AMDGPU_VCN_HEAP_SIZE)); + lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_HEAP_SIZE)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + size + AMDGPU_VCN_HEAP_SIZE)); + upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_HEAP_SIZE)); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_STACK_SIZE + (AMDGPU_VCN_SESSION_SIZE * 40)); -- GitLab From 448b5498f6c6685e0f79dda9dea0ae6d821902a9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 4 Sep 2018 15:46:13 -0700 Subject: [PATCH 0184/2269] uapi/linux/keyctl.h: don't use C++ reserved keyword as a struct member name commit 8a2336e549d385bb0b46880435b411df8d8200e8 upstream. Since this header is in "include/uapi/linux/", apparently people want to use it in userspace programs -- even in C++ ones. However, the header uses a C++ reserved keyword ("private"), so change that to "dh_private" instead to allow the header file to be used in C++ userspace. Fixes https://bugzilla.kernel.org/show_bug.cgi?id=191051 Link: http://lkml.kernel.org/r/0db6c314-1ef4-9bfa-1baa-7214dd2ee061@infradead.org Fixes: ddbb41148724 ("KEYS: Add KEYCTL_DH_COMPUTE command") Signed-off-by: Randy Dunlap Reviewed-by: Andrew Morton Cc: David Howells Cc: James Morris Cc: "Serge E. Hallyn" Cc: Mat Martineau Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/keyctl.h | 2 +- security/keys/dh.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h index 7b8c9e19bad1c..910cc4334b215 100644 --- a/include/uapi/linux/keyctl.h +++ b/include/uapi/linux/keyctl.h @@ -65,7 +65,7 @@ /* keyctl structures */ struct keyctl_dh_params { - __s32 private; + __s32 dh_private; __s32 prime; __s32 base; }; diff --git a/security/keys/dh.c b/security/keys/dh.c index d1ea9f325f947..35543f04e7594 100644 --- a/security/keys/dh.c +++ b/security/keys/dh.c @@ -307,7 +307,7 @@ long __keyctl_dh_compute(struct keyctl_dh_params __user *params, } dh_inputs.g_size = dlen; - dlen = dh_data_from_key(pcopy.private, &dh_inputs.key); + dlen = dh_data_from_key(pcopy.dh_private, &dh_inputs.key); if (dlen < 0) { ret = dlen; goto out2; -- GitLab From 8d015a362a84930994c99dd44bc7dc945520e05c Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Mon, 23 Jul 2018 14:25:31 -0700 Subject: [PATCH 0185/2269] debugobjects: Make stack check warning more informative commit fc91a3c4c27acdca0bc13af6fbb68c35cfd519f2 upstream. While debugging an issue debugobject tracking warned about an annotation issue of an object on stack. It turned out that the issue was due to the object in concern being on a different stack which was due to another issue. Thomas suggested to print the pointers and the location of the stack for the currently running task. This helped to figure out that the object was on the wrong stack. As this is general useful information for debugging similar issues, make the error message more informative by printing the pointers. [ tglx: Massaged changelog ] Signed-off-by: Joel Fernandes (Google) Signed-off-by: Thomas Gleixner Acked-by: Waiman Long Acked-by: Yang Shi Cc: kernel-team@android.com Cc: Arnd Bergmann Cc: astrachan@google.com Link: https://lkml.kernel.org/r/20180723212531.202328-1-joel@joelfernandes.org Signed-off-by: Greg Kroah-Hartman --- lib/debugobjects.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 2f5349c6e81ae..99308479b1c80 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -322,9 +322,12 @@ static void debug_object_is_on_stack(void *addr, int onstack) limit++; if (is_on_stack) - pr_warn("object is on stack, but not annotated\n"); + pr_warn("object %p is on stack %p, but NOT annotated.\n", addr, + task_stack_page(current)); else - pr_warn("object is not on stack, but annotated\n"); + pr_warn("object %p is NOT on stack %p, but annotated.\n", addr, + task_stack_page(current)); + WARN_ON(1); } -- GitLab From fd8cb2e71cdd8e814cbdadddd0d0e6e3d49eaa2c Mon Sep 17 00:00:00 2001 From: Luca Abeni Date: Thu, 7 Sep 2017 12:09:29 +0200 Subject: [PATCH 0186/2269] sched/deadline: Fix switching to -deadline commit 295d6d5e373607729bcc8182c25afe964655714f upstream. Fix a bug introduced in: 72f9f3fdc928 ("sched/deadline: Remove dl_new from struct sched_dl_entity") After that commit, when switching to -deadline if the scheduling deadline of a task is in the past then switched_to_dl() calls setup_new_entity() to properly initialize the scheduling deadline and runtime. The problem is that the task is enqueued _before_ having its parameters initialized by setup_new_entity(), and this can cause problems. For example, a task with its out-of-date deadline in the past will potentially be enqueued as the highest priority one; however, its adjusted deadline may not be the earliest one. This patch fixes the problem by initializing the task's parameters before enqueuing it. Signed-off-by: luca abeni Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Daniel Bristot de Oliveira Cc: Juri Lelli Cc: Linus Torvalds Cc: Mathieu Poirier Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1504778971-13573-3-git-send-email-luca.abeni@santannapisa.it Signed-off-by: Ingo Molnar Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- kernel/sched/deadline.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 501f17c642ab8..b2589c7e9439b 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1365,6 +1365,10 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, update_dl_entity(dl_se, pi_se); } else if (flags & ENQUEUE_REPLENISH) { replenish_dl_entity(dl_se, pi_se); + } else if ((flags & ENQUEUE_RESTORE) && + dl_time_before(dl_se->deadline, + rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) { + setup_new_dl_entity(dl_se); } __enqueue_dl_entity(dl_se); @@ -2256,13 +2260,6 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p) return; } - /* - * If p is boosted we already updated its params in - * rt_mutex_setprio()->enqueue_task(..., ENQUEUE_REPLENISH), - * p's deadline being now already after rq_clock(rq). - */ - if (dl_time_before(p->dl.deadline, rq_clock(rq))) - setup_new_dl_entity(&p->dl); if (rq->curr != p) { #ifdef CONFIG_SMP -- GitLab From 1508043c32a9dfe4af214eb712ab4b5b33158f27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 13 Oct 2017 14:46:03 +0200 Subject: [PATCH 0187/2269] lightnvm: pblk: free padded entries in write buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit cd8ddbf7a5e206fe6995ab0aee245d597dd6a7f2 upstream. When a REQ_FLUSH reaches pblk, the bio cannot be directly completed. Instead, data on the write buffer is flushed and the bio is completed on the completion pah. This might require some sectors to be padded in order to guarantee a successful write. This patch fixes a memory leak on the padded pages. A consequence of this bad free was that internal bios not containing data (only a flush) were not being completed. Fixes: a4bd217b4326 ("lightnvm: physical block device (pblk) target") Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/lightnvm/pblk-core.c | 1 - drivers/lightnvm/pblk-write.c | 7 ++++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 3f0ddc0d7393f..3fb65778e03d2 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -190,7 +190,6 @@ void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off, WARN_ON(off + nr_pages != bio->bi_vcnt); - bio_advance(bio, off * PBLK_EXPOSED_PAGE_SIZE); for (i = off; i < nr_pages + off; i++) { bv = bio->bi_io_vec[i]; mempool_free(bv.bv_page, pblk->page_bio_pool); diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index 3ad9e56d24734..d89ac573f8d83 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -33,6 +33,10 @@ static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd, bio_endio(original_bio); } + if (c_ctx->nr_padded) + pblk_bio_free_pages(pblk, rqd->bio, c_ctx->nr_valid, + c_ctx->nr_padded); + #ifdef CONFIG_NVM_DEBUG atomic_long_add(c_ctx->nr_valid, &pblk->sync_writes); #endif @@ -521,7 +525,8 @@ static void pblk_free_write_rqd(struct pblk *pblk, struct nvm_rq *rqd) struct bio *bio = rqd->bio; if (c_ctx->nr_padded) - pblk_bio_free_pages(pblk, bio, rqd->nr_ppas, c_ctx->nr_padded); + pblk_bio_free_pages(pblk, bio, c_ctx->nr_valid, + c_ctx->nr_padded); } static int pblk_submit_write(struct pblk *pblk) -- GitLab From 801fc191bb15655f7e694ba611ee02189f323291 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= Date: Fri, 19 Jan 2018 16:26:33 -0800 Subject: [PATCH 0188/2269] mm: Fix devm_memremap_pages() collision handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 77dd66a3c67c93ab401ccc15efff25578be281fd upstream. If devm_memremap_pages() detects a collision while adding entries to the radix-tree, we call pgmap_radix_release(). Unfortunately, the function removes *all* entries for the range -- including the entries that caused the collision in the first place. Modify pgmap_radix_release() to take an additional argument to indicate where to stop, so that only newly added entries are removed from the tree. Cc: Fixes: 9476df7d80df ("mm: introduce find_dev_pagemap()") Signed-off-by: Jan H. Schönherr Signed-off-by: Dan Williams Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- kernel/memremap.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/memremap.c b/kernel/memremap.c index 4712ce646e04b..2b136d4988f78 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -248,13 +248,16 @@ int device_private_entry_fault(struct vm_area_struct *vma, EXPORT_SYMBOL(device_private_entry_fault); #endif /* CONFIG_DEVICE_PRIVATE */ -static void pgmap_radix_release(struct resource *res) +static void pgmap_radix_release(struct resource *res, unsigned long end_pgoff) { unsigned long pgoff, order; mutex_lock(&pgmap_lock); - foreach_order_pgoff(res, order, pgoff) + foreach_order_pgoff(res, order, pgoff) { + if (pgoff >= end_pgoff) + break; radix_tree_delete(&pgmap_radix, PHYS_PFN(res->start) + pgoff); + } mutex_unlock(&pgmap_lock); synchronize_rcu(); @@ -309,7 +312,7 @@ static void devm_memremap_pages_release(struct device *dev, void *data) mem_hotplug_done(); untrack_pfn(NULL, PHYS_PFN(align_start), align_size); - pgmap_radix_release(res); + pgmap_radix_release(res, -1); dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc, "%s: failed to free all reserved pages\n", __func__); } @@ -459,7 +462,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, untrack_pfn(NULL, PHYS_PFN(align_start), align_size); err_pfn_remap: err_radix: - pgmap_radix_release(res); + pgmap_radix_release(res, pgoff); devres_free(page_map); return ERR_PTR(error); } -- GitLab From 44e5d0b6ce634ccb74c70e9457c491b75299d96b Mon Sep 17 00:00:00 2001 From: Dave Young Date: Fri, 1 Dec 2017 20:19:34 +0800 Subject: [PATCH 0189/2269] HID: add quirk for another PIXART OEM mouse used by HP commit 01cffe9ded15c0d664e0beb33c594e00c0d57bba upstream. This mouse keep disconnecting in runleve 3 like below, add it needs the quirk to mute the anoying messages. [ 111.230555] usb 2-2: USB disconnect, device number 6 [ 112.718156] usb 2-2: new low-speed USB device number 7 using xhci_hcd [ 112.941594] usb 2-2: New USB device found, idVendor=03f0, idProduct=094a [ 112.984866] usb 2-2: New USB device strings: Mfr=1, Product=2, SerialNumber=0 [ 113.027731] usb 2-2: Product: HP USB Optical Mouse [ 113.069977] usb 2-2: Manufacturer: PixArt [ 113.113500] input: PixArt HP USB Optical Mouse as /devices/pci0000:00/0000:00:14.0/usb2/2-2/2-2:1.0/0003:03F0:094A.0002/input/input14 [ 113.156787] hid-generic 0003:03F0:094A.0002: input: USB HID v1.11 Mouse [PixArt HP USB Optical Mouse] on usb-0000:00:14.0-2/input0 [ 173.262642] usb 2-2: USB disconnect, device number 7 [ 174.750244] usb 2-2: new low-speed USB device number 8 using xhci_hcd [ 174.935740] usb 2-2: New USB device found, idVendor=03f0, idProduct=094a [ 174.990435] usb 2-2: New USB device strings: Mfr=1, Product=2, SerialNumber=0 [ 175.014984] usb 2-2: Product: HP USB Optical Mouse [ 175.037886] usb 2-2: Manufacturer: PixArt [ 175.061794] input: PixArt HP USB Optical Mouse as /devices/pci0000:00/0000:00:14.0/usb2/2-2/2-2:1.0/0003:03F0:094A.0003/input/input15 [ 175.084946] hid-generic 0003:03F0:094A.0003: input: USB HID v1.11 Mouse [PixArt HP USB Optical Mouse] on usb-0000:00:14.0-2/input0 Signed-off-by: Dave Young Cc: stable@vger.kernel.org Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-ids.h | 1 + drivers/hid/usbhid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 9e478f03e8456..81ee1d026648b 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -528,6 +528,7 @@ #define USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0A4A 0x0a4a #define USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A 0x0b4a #define USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE 0x134a +#define USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_094A 0x094a #define USB_VENDOR_ID_HUION 0x256c #define USB_DEVICE_ID_HUION_TABLET 0x006e diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index f489a5cfcb48c..e10eda031b010 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -99,6 +99,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0A4A, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_094A, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_IDEACOM, USB_DEVICE_ID_IDEACOM_IDC6680, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_C007, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_C077, HID_QUIRK_ALWAYS_POLL }, -- GitLab From 685a452ce3bfd030700a0360483ce59515facc41 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 12 Feb 2018 15:30:08 +0200 Subject: [PATCH 0190/2269] usb: dwc3: core: Fix ULPI PHYs and prevent phy_get/ulpi_init during suspend/resume commit 98112041bcca164676367e261c8c1073ef70cb51 upstream. In order for ULPI PHYs to work, dwc3_phy_setup() and dwc3_ulpi_init() must be doene before dwc3_core_get_phy(). commit 541768b08a40 ("usb: dwc3: core: Call dwc3_core_get_phy() before initializing phys") broke this. The other issue is that dwc3_core_get_phy() and dwc3_ulpi_init() should be called only once during the life cycle of the driver. However, as dwc3_core_init() is called during system suspend/resume it will result in multiple calls to dwc3_core_get_phy() and dwc3_ulpi_init() which is wrong. Fix this by moving dwc3_ulpi_init() out of dwc3_phy_setup() into dwc3_core_ulpi_init(). Use a flag 'ulpi_ready' to ensure that dwc3_core_ulpi_init() is called only once from dwc3_core_init(). Use another flag 'phys_ready' to call dwc3_core_get_phy() only once from dwc3_core_init(). Fixes: 541768b08a40 ("usb: dwc3: core: Call dwc3_core_get_phy() before initializing phys") Fixes: f54edb539c11 ("usb: dwc3: core: initialize ULPI before trying to get the PHY") Cc: linux-stable # >= v4.13 Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 47 +++++++++++++++++++++++++++++++---------- drivers/usb/dwc3/core.h | 5 +++++ 2 files changed, 41 insertions(+), 11 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index dca78bb20e5d5..8b323a360e037 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -511,6 +511,22 @@ static void dwc3_cache_hwparams(struct dwc3 *dwc) parms->hwparams8 = dwc3_readl(dwc->regs, DWC3_GHWPARAMS8); } +static int dwc3_core_ulpi_init(struct dwc3 *dwc) +{ + int intf; + int ret = 0; + + intf = DWC3_GHWPARAMS3_HSPHY_IFC(dwc->hwparams.hwparams3); + + if (intf == DWC3_GHWPARAMS3_HSPHY_IFC_ULPI || + (intf == DWC3_GHWPARAMS3_HSPHY_IFC_UTMI_ULPI && + dwc->hsphy_interface && + !strncmp(dwc->hsphy_interface, "ulpi", 4))) + ret = dwc3_ulpi_init(dwc); + + return ret; +} + /** * dwc3_phy_setup - Configure USB PHY Interface of DWC3 Core * @dwc: Pointer to our controller context structure @@ -522,7 +538,6 @@ static void dwc3_cache_hwparams(struct dwc3 *dwc) static int dwc3_phy_setup(struct dwc3 *dwc) { u32 reg; - int ret; reg = dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0)); @@ -593,9 +608,6 @@ static int dwc3_phy_setup(struct dwc3 *dwc) } /* FALLTHROUGH */ case DWC3_GHWPARAMS3_HSPHY_IFC_ULPI: - ret = dwc3_ulpi_init(dwc); - if (ret) - return ret; /* FALLTHROUGH */ default: break; @@ -752,6 +764,7 @@ static void dwc3_core_setup_global_control(struct dwc3 *dwc) } static int dwc3_core_get_phy(struct dwc3 *dwc); +static int dwc3_core_ulpi_init(struct dwc3 *dwc); /** * dwc3_core_init - Low-level initialization of DWC3 Core @@ -783,17 +796,27 @@ static int dwc3_core_init(struct dwc3 *dwc) dwc->maximum_speed = USB_SPEED_HIGH; } - ret = dwc3_core_get_phy(dwc); + ret = dwc3_phy_setup(dwc); if (ret) goto err0; - ret = dwc3_core_soft_reset(dwc); - if (ret) - goto err0; + if (!dwc->ulpi_ready) { + ret = dwc3_core_ulpi_init(dwc); + if (ret) + goto err0; + dwc->ulpi_ready = true; + } - ret = dwc3_phy_setup(dwc); + if (!dwc->phys_ready) { + ret = dwc3_core_get_phy(dwc); + if (ret) + goto err0a; + dwc->phys_ready = true; + } + + ret = dwc3_core_soft_reset(dwc); if (ret) - goto err0; + goto err0a; dwc3_core_setup_global_control(dwc); dwc3_core_num_eps(dwc); @@ -866,6 +889,9 @@ err1: phy_exit(dwc->usb2_generic_phy); phy_exit(dwc->usb3_generic_phy); +err0a: + dwc3_ulpi_exit(dwc); + err0: return ret; } @@ -1256,7 +1282,6 @@ err4: err3: dwc3_free_event_buffers(dwc); - dwc3_ulpi_exit(dwc); err2: pm_runtime_allow(&pdev->dev); diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index b782ba58a7fc6..abd1142c9e4d9 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -805,7 +805,9 @@ struct dwc3_scratchpad_array { * @usb3_phy: pointer to USB3 PHY * @usb2_generic_phy: pointer to USB2 PHY * @usb3_generic_phy: pointer to USB3 PHY + * @phys_ready: flag to indicate that PHYs are ready * @ulpi: pointer to ulpi interface + * @ulpi_ready: flag to indicate that ULPI is initialized * @isoch_delay: wValue from Set Isochronous Delay request; * @u2sel: parameter from Set SEL request. * @u2pel: parameter from Set SEL request. @@ -903,7 +905,10 @@ struct dwc3 { struct phy *usb2_generic_phy; struct phy *usb3_generic_phy; + bool phys_ready; + struct ulpi *ulpi; + bool ulpi_ready; void __iomem *regs; size_t regs_size; -- GitLab From d85c2999a7b5e924172f0cc21f5804bbb1f34d69 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 21 Aug 2018 17:37:55 +0200 Subject: [PATCH 0191/2269] x86/pae: use 64 bit atomic xchg function in native_ptep_get_and_clear commit b2d7a075a1ccef2fb321d595802190c8e9b39004 upstream. Using only 32-bit writes for the pte will result in an intermediate L1TF vulnerable PTE. When running as a Xen PV guest this will at once switch the guest to shadow mode resulting in a loss of performance. Use arch_atomic64_xchg() instead which will perform the requested operation atomically with all 64 bits. Some performance considerations according to: https://software.intel.com/sites/default/files/managed/ad/dc/Intel-Xeon-Scalable-Processor-throughput-latency.pdf The main number should be the latency, as there is no tight loop around native_ptep_get_and_clear(). "lock cmpxchg8b" has a latency of 20 cycles, while "lock xchg" (with a memory operand) isn't mentioned in that document. "lock xadd" (with xadd having 3 cycles less latency than xchg) has a latency of 11, so we can assume a latency of 14 for "lock xchg". Signed-off-by: Juergen Gross Reviewed-by: Thomas Gleixner Reviewed-by: Jan Beulich Tested-by: Jason Andryuk Signed-off-by: Boris Ostrovsky [ Atomic operations gained an arch_ prefix in 8bf705d13039 ("locking/atomic/x86: Switch atomic.h to use atomic-instrumented.h") so s/arch_atomic64_xchg/atomic64_xchg/ for backport.] Signed-off-by: Jason Andryuk Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable-3level.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 9dc19b4a2a870..c5d4931d1ef9b 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_PGTABLE_3LEVEL_H #define _ASM_X86_PGTABLE_3LEVEL_H +#include + /* * Intel Physical Address Extension (PAE) Mode - three-level page * tables on PPro+ CPUs. @@ -147,10 +149,7 @@ static inline pte_t native_ptep_get_and_clear(pte_t *ptep) { pte_t res; - /* xchg acts as a barrier before the setting of the high bits */ - res.pte_low = xchg(&ptep->pte_low, 0); - res.pte_high = ptep->pte_high; - ptep->pte_high = 0; + res.pte = (pteval_t)atomic64_xchg((atomic64_t *)ptep, 0); return res; } -- GitLab From 13b23ccfa28811333b866196e38d0bc35f68240d Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 21 Aug 2018 17:37:54 +0200 Subject: [PATCH 0192/2269] x86/xen: don't write ptes directly in 32-bit PV guests commit f7c90c2aa4004808dff777ba6ae2c7294dd06851 upstream. In some cases 32-bit PAE PV guests still write PTEs directly instead of using hypercalls. This is especially bad when clearing a PTE as this is done via 32-bit writes which will produce intermediate L1TF attackable PTEs. Change the code to use hypercalls instead. Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: Boris Ostrovsky Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/mmu_pv.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index b3526a98a5a51..42cfad67b6acd 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -425,14 +425,13 @@ static void xen_set_pud(pud_t *ptr, pud_t val) static void xen_set_pte_atomic(pte_t *ptep, pte_t pte) { trace_xen_mmu_set_pte_atomic(ptep, pte); - set_64bit((u64 *)ptep, native_pte_val(pte)); + __xen_set_pte(ptep, pte); } static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { trace_xen_mmu_pte_clear(mm, addr, ptep); - if (!xen_batched_set_pte(ptep, native_make_pte(0))) - native_pte_clear(mm, addr, ptep); + __xen_set_pte(ptep, native_make_pte(0)); } static void xen_pmd_clear(pmd_t *pmdp) @@ -1543,7 +1542,7 @@ static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & pte_val_ma(pte)); #endif - native_set_pte(ptep, pte); + __xen_set_pte(ptep, pte); } /* Early in boot, while setting up the initial pagetable, assume -- GitLab From 1e0750745bdffe0364ef8846af0561c9006bc77f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20Sch=C3=B6n?= Date: Fri, 17 Aug 2018 22:07:28 +0200 Subject: [PATCH 0193/2269] drm/i915: Increase LSPCON timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 299c2a904b1e8d5096d4813df6371357d97a6cd1 upstream. 100 ms is not enough time for the LSPCON adapter on Intel NUC devices to settle. This causes dropped display modes at boot or screen reconfiguration. Empirical testing can reproduce the error up to a timeout of 190 ms. Basic boot and stress testing at 200 ms has not (yet) failed. Increase timeout to 400 ms to get some margin of error. Changes from v1: The initial suggestion of 1000 ms was lowered due to concerns about delaying valid timeout cases. Update patch metadata. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107503 Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1570392 Fixes: 357c0ae9198a ("drm/i915/lspcon: Wait for expected LSPCON mode to settle") Cc: Shashank Sharma Cc: Imre Deak Cc: Jani Nikula Cc: # v4.11+ Reviewed-by: Rodrigo Vivi Reviewed-by: Shashank Sharma Signed-off-by: Fredrik Schön Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180817200728.8154-1-fredrik.schon@gmail.com (cherry picked from commit 59f1c8ab30d6f9042562949f42cbd3f3cf69de94) Signed-off-by: Rodrigo Vivi Reviewed-by: Shashank Sharma Signed-off-by: Fredrik Schön Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_lspcon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c index beb9baaf2f2e4..f71fef10ecc61 100644 --- a/drivers/gpu/drm/i915/intel_lspcon.c +++ b/drivers/gpu/drm/i915/intel_lspcon.c @@ -75,7 +75,7 @@ static enum drm_lspcon_mode lspcon_wait_mode(struct intel_lspcon *lspcon, lspcon_mode_name(mode)); wait_for((current_mode = lspcon_get_current_mode(lspcon)) == mode || - current_mode == DRM_LSPCON_MODE_INVALID, 100); + current_mode == DRM_LSPCON_MODE_INVALID, 400); if (current_mode != mode) DRM_DEBUG_KMS("LSPCON mode hasn't settled\n"); -- GitLab From 9370868fd3ca87239332d4c288b2d0b3f91ffd2a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 28 Aug 2018 12:59:10 -0700 Subject: [PATCH 0194/2269] kbuild: make missing $DEPMOD a Warning instead of an Error commit 914b087ff9e0e9a399a4927fa30793064afc0178 upstream. When $DEPMOD is not found, only print a warning instead of exiting with an error message and error status: Warning: 'make modules_install' requires /sbin/depmod. Please install it. This is probably in the kmod package. Change the Error to a Warning because "not all build hosts for cross compiling Linux are Linux systems and are able to provide a working port of depmod, especially at the file patch /sbin/depmod." I.e., "make modules_install" may be used to copy/install the loadable modules files to a target directory on a build system and then transferred to an embedded device where /sbin/depmod is run instead of it being run on the build system. Fixes: 934193a654c1 ("kbuild: verify that $DEPMOD is installed") Signed-off-by: Randy Dunlap Reported-by: H. Nikolaus Schaller Cc: stable@vger.kernel.org Cc: Lucas De Marchi Cc: Lucas De Marchi Cc: Michal Marek Cc: Jessica Yu Cc: Chih-Wei Huang Signed-off-by: Masahiro Yamada Signed-off-by: Maxim Zhukov Signed-off-by: Greg Kroah-Hartman --- scripts/depmod.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/depmod.sh b/scripts/depmod.sh index f41b0a4b575ce..cf5b2b24b3cf1 100755 --- a/scripts/depmod.sh +++ b/scripts/depmod.sh @@ -16,9 +16,9 @@ if ! test -r System.map ; then fi if [ -z $(command -v $DEPMOD) ]; then - echo "'make modules_install' requires $DEPMOD. Please install it." >&2 + echo "Warning: 'make modules_install' requires $DEPMOD. Please install it." >&2 echo "This is probably in the kmod package." >&2 - exit 1 + exit 0 fi # older versions of depmod don't support -P -- GitLab From 63fd9d43c0181d668e632832e66c123faf66cfd3 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 6 Aug 2018 13:49:47 +0200 Subject: [PATCH 0195/2269] s390/lib: use expoline for all bcr instructions commit 5eda25b10297684c1f46a14199ec00210f3c346e upstream. The memove, memset, memcpy, __memset16, __memset32 and __memset64 function have an additional indirect return branch in form of a "bzr" instruction. These need to use expolines as well. Cc: # v4.17+ Fixes: 97489e0663 ("s390/lib: use expoline for indirect branches") Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/lib/mem.S | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S index e1fa974ac5005..37e52118d7e9e 100644 --- a/arch/s390/lib/mem.S +++ b/arch/s390/lib/mem.S @@ -17,7 +17,7 @@ ENTRY(memmove) ltgr %r4,%r4 lgr %r1,%r2 - bzr %r14 + jz .Lmemmove_exit aghi %r4,-1 clgr %r2,%r3 jnh .Lmemmove_forward @@ -36,6 +36,7 @@ ENTRY(memmove) .Lmemmove_forward_remainder: larl %r5,.Lmemmove_mvc ex %r4,0(%r5) +.Lmemmove_exit: BR_EX %r14 .Lmemmove_reverse: ic %r0,0(%r4,%r3) @@ -65,7 +66,7 @@ EXPORT_SYMBOL(memmove) */ ENTRY(memset) ltgr %r4,%r4 - bzr %r14 + jz .Lmemset_exit ltgr %r3,%r3 jnz .Lmemset_fill aghi %r4,-1 @@ -80,12 +81,13 @@ ENTRY(memset) .Lmemset_clear_remainder: larl %r3,.Lmemset_xc ex %r4,0(%r3) +.Lmemset_exit: BR_EX %r14 .Lmemset_fill: stc %r3,0(%r2) cghi %r4,1 lgr %r1,%r2 - ber %r14 + je .Lmemset_fill_exit aghi %r4,-2 srlg %r3,%r4,8 ltgr %r3,%r3 @@ -97,6 +99,7 @@ ENTRY(memset) .Lmemset_fill_remainder: larl %r3,.Lmemset_mvc ex %r4,0(%r3) +.Lmemset_fill_exit: BR_EX %r14 .Lmemset_xc: xc 0(1,%r1),0(%r1) @@ -111,7 +114,7 @@ EXPORT_SYMBOL(memset) */ ENTRY(memcpy) ltgr %r4,%r4 - bzr %r14 + jz .Lmemcpy_exit aghi %r4,-1 srlg %r5,%r4,8 ltgr %r5,%r5 @@ -120,6 +123,7 @@ ENTRY(memcpy) .Lmemcpy_remainder: larl %r5,.Lmemcpy_mvc ex %r4,0(%r5) +.Lmemcpy_exit: BR_EX %r14 .Lmemcpy_loop: mvc 0(256,%r1),0(%r3) -- GitLab From 77be9452d0e5768bab9a041a62116cbeb9dc3174 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Tue, 4 Sep 2018 15:24:04 +0000 Subject: [PATCH 0196/2269] irda: Fix memory leak caused by repeated binds of irda socket The irda_bind() function allocates memory for self->ias_obj without checking to see if the socket is already bound. A userspace process could repeatedly bind the socket, have each new object added into the LM-IAS database, and lose the reference to the old object assigned to the socket to exhaust memory resources. This patch errors out of the bind operation when self->ias_obj is already assigned. CVE-2018-6554 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Tyler Hicks Reviewed-by: Seth Arnold Reviewed-by: Stefan Bader Signed-off-by: Greg Kroah-Hartman --- drivers/staging/irda/net/af_irda.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/staging/irda/net/af_irda.c b/drivers/staging/irda/net/af_irda.c index 23fa7c8b09a58..a08cd3dd7a6ec 100644 --- a/drivers/staging/irda/net/af_irda.c +++ b/drivers/staging/irda/net/af_irda.c @@ -775,6 +775,13 @@ static int irda_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) return -EINVAL; lock_sock(sk); + + /* Ensure that the socket is not already bound */ + if (self->ias_obj) { + err = -EINVAL; + goto out; + } + #ifdef CONFIG_IRDA_ULTRA /* Special care for Ultra sockets */ if ((sk->sk_type == SOCK_DGRAM) && -- GitLab From e37957305de356b4f8719a3a4c7bc1453a2f0ca3 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Tue, 4 Sep 2018 15:24:05 +0000 Subject: [PATCH 0197/2269] irda: Only insert new objects into the global database via setsockopt The irda_setsockopt() function conditionally allocates memory for a new self->ias_object or, in some cases, reuses the existing self->ias_object. Existing objects were incorrectly reinserted into the LM_IAS database which corrupted the doubly linked list used for the hashbin implementation of the LM_IAS database. When combined with a memory leak in irda_bind(), this issue could be leveraged to create a use-after-free vulnerability in the hashbin list. This patch fixes the issue by only inserting newly allocated objects into the database. CVE-2018-6555 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Tyler Hicks Reviewed-by: Seth Arnold Reviewed-by: Stefan Bader Signed-off-by: Greg Kroah-Hartman --- drivers/staging/irda/net/af_irda.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/staging/irda/net/af_irda.c b/drivers/staging/irda/net/af_irda.c index a08cd3dd7a6ec..cebe9878ca036 100644 --- a/drivers/staging/irda/net/af_irda.c +++ b/drivers/staging/irda/net/af_irda.c @@ -2019,7 +2019,11 @@ static int irda_setsockopt(struct socket *sock, int level, int optname, err = -EINVAL; goto out; } - irias_insert_object(ias_obj); + + /* Only insert newly allocated objects */ + if (free_ias) + irias_insert_object(ias_obj); + kfree(ias_opt); break; case IRLMP_IAS_DEL: -- GitLab From 39cff99ba4692f8dc94b501d401cbd8b6ecaf1b2 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 3 Sep 2018 10:39:17 -0300 Subject: [PATCH 0198/2269] Revert "ARM: imx_v6_v7_defconfig: Select ULPI support" This reverts commit 2059e527a659cf16d6bb709f1c8509f7a7623fc4. This commit causes reboot to fail on imx6 wandboard, so let's revert it. Cc: #4.14 Reported-by: Rasmus Villemoes Signed-off-by: Fabio Estevam Signed-off-by: Greg Kroah-Hartman --- arch/arm/configs/imx_v6_v7_defconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 21ac9f02407e2..32acac9ab81aa 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -289,7 +289,6 @@ CONFIG_USB_STORAGE=y CONFIG_USB_CHIPIDEA=y CONFIG_USB_CHIPIDEA_UDC=y CONFIG_USB_CHIPIDEA_HOST=y -CONFIG_USB_CHIPIDEA_ULPI=y CONFIG_USB_SERIAL=m CONFIG_USB_SERIAL_GENERIC=y CONFIG_USB_SERIAL_FTDI_SIO=m @@ -326,7 +325,6 @@ CONFIG_USB_GADGETFS=m CONFIG_USB_FUNCTIONFS=m CONFIG_USB_MASS_STORAGE=m CONFIG_USB_G_SERIAL=m -CONFIG_USB_ULPI_BUS=y CONFIG_MMC=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y -- GitLab From e02c9275beea3b4990b61bdfb684da18aef3029c Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Tue, 14 Aug 2018 10:15:34 -0700 Subject: [PATCH 0199/2269] kvm: x86: Set highest physical address bits in non-present/reserved SPTEs commit 28a1f3ac1d0c8558ee4453d9634dad891a6e922e upstream. Always set the 5 upper-most supported physical address bits to 1 for SPTEs that are marked as non-present or reserved, to make them unusable for L1TF attacks from the guest. Currently, this just applies to MMIO SPTEs. (We do not need to mark PTEs that are completely 0 as physical page 0 is already reserved.) This allows mitigation of L1TF without disabling hyper-threading by using shadow paging mode instead of EPT. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/mmu.c | 43 ++++++++++++++++++++++++++++++++++++++----- arch/x86/kvm/x86.c | 8 ++++++-- 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 00e2ae033a0f5..1dfb808abd23f 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -220,6 +220,17 @@ static const u64 shadow_acc_track_saved_bits_mask = PT64_EPT_READABLE_MASK | PT64_EPT_EXECUTABLE_MASK; static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIFT; +/* + * This mask must be set on all non-zero Non-Present or Reserved SPTEs in order + * to guard against L1TF attacks. + */ +static u64 __read_mostly shadow_nonpresent_or_rsvd_mask; + +/* + * The number of high-order 1 bits to use in the mask above. + */ +static const u64 shadow_nonpresent_or_rsvd_mask_len = 5; + static void mmu_spte_set(u64 *sptep, u64 spte); static void mmu_free_roots(struct kvm_vcpu *vcpu); @@ -308,9 +319,13 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, { unsigned int gen = kvm_current_mmio_generation(vcpu); u64 mask = generation_mmio_spte_mask(gen); + u64 gpa = gfn << PAGE_SHIFT; access &= ACC_WRITE_MASK | ACC_USER_MASK; - mask |= shadow_mmio_value | access | gfn << PAGE_SHIFT; + mask |= shadow_mmio_value | access; + mask |= gpa | shadow_nonpresent_or_rsvd_mask; + mask |= (gpa & shadow_nonpresent_or_rsvd_mask) + << shadow_nonpresent_or_rsvd_mask_len; trace_mark_mmio_spte(sptep, gfn, access, gen); mmu_spte_set(sptep, mask); @@ -323,8 +338,14 @@ static bool is_mmio_spte(u64 spte) static gfn_t get_mmio_spte_gfn(u64 spte) { - u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask; - return (spte & ~mask) >> PAGE_SHIFT; + u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask | + shadow_nonpresent_or_rsvd_mask; + u64 gpa = spte & ~mask; + + gpa |= (spte >> shadow_nonpresent_or_rsvd_mask_len) + & shadow_nonpresent_or_rsvd_mask; + + return gpa >> PAGE_SHIFT; } static unsigned get_mmio_spte_access(u64 spte) @@ -381,7 +402,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, } EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); -void kvm_mmu_clear_all_pte_masks(void) +static void kvm_mmu_reset_all_pte_masks(void) { shadow_user_mask = 0; shadow_accessed_mask = 0; @@ -391,6 +412,18 @@ void kvm_mmu_clear_all_pte_masks(void) shadow_mmio_mask = 0; shadow_present_mask = 0; shadow_acc_track_mask = 0; + + /* + * If the CPU has 46 or less physical address bits, then set an + * appropriate mask to guard against L1TF attacks. Otherwise, it is + * assumed that the CPU is not vulnerable to L1TF. + */ + if (boot_cpu_data.x86_phys_bits < + 52 - shadow_nonpresent_or_rsvd_mask_len) + shadow_nonpresent_or_rsvd_mask = + rsvd_bits(boot_cpu_data.x86_phys_bits - + shadow_nonpresent_or_rsvd_mask_len, + boot_cpu_data.x86_phys_bits - 1); } static int is_cpuid_PSE36(void) @@ -5473,7 +5506,7 @@ static void mmu_destroy_caches(void) int kvm_mmu_module_init(void) { - kvm_mmu_clear_all_pte_masks(); + kvm_mmu_reset_all_pte_masks(); pte_list_desc_cache = kmem_cache_create("pte_list_desc", sizeof(struct pte_list_desc), diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5c2c09f6c1c31..a4f200a069d90 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6194,8 +6194,12 @@ static void kvm_set_mmio_spte_mask(void) * Set the reserved bits and the present bit of an paging-structure * entry to generate page fault with PFER.RSV = 1. */ - /* Mask the reserved physical address bits. */ - mask = rsvd_bits(maxphyaddr, 51); + + /* + * Mask the uppermost physical address bit, which would be reserved as + * long as the supported physical address width is less than 52. + */ + mask = 1ull << 51; /* Set the present bit. */ mask |= 1ull; -- GitLab From 82a0e0f5cf407ba18efa965f0be0af5d8eac0e7c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 20 Aug 2018 23:37:50 +0200 Subject: [PATCH 0200/2269] x86: kvm: avoid unused variable warning commit 7288bde1f9df6c1475675419bdd7725ce84dec56 upstream. Removing one of the two accesses of the maxphyaddr variable led to a harmless warning: arch/x86/kvm/x86.c: In function 'kvm_set_mmio_spte_mask': arch/x86/kvm/x86.c:6563:6: error: unused variable 'maxphyaddr' [-Werror=unused-variable] Removing the #ifdef seems to be the nicest workaround, as it makes the code look cleaner than adding another #ifdef. Fixes: 28a1f3ac1d0c ("kvm: x86: Set highest physical address bits in non-present/reserved SPTEs") Signed-off-by: Arnd Bergmann Cc: stable@vger.kernel.org # L1TF Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a4f200a069d90..3856828ee1dc9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6204,14 +6204,12 @@ static void kvm_set_mmio_spte_mask(void) /* Set the present bit. */ mask |= 1ull; -#ifdef CONFIG_X86_64 /* * If reserved bit is not supported, clear the present bit to disable * mmio page fault. */ - if (maxphyaddr == 52) + if (IS_ENABLED(CONFIG_X86_64) && maxphyaddr == 52) mask &= ~1ull; -#endif kvm_mmu_set_mmio_spte_mask(mask, mask); } -- GitLab From 03717f80cf2cb24618d56d8eaf3aa958d23931fc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Jun 2018 13:50:07 +0200 Subject: [PATCH 0201/2269] arm64: cpu_errata: include required headers commit 94a5d8790e79ab78f499d2d9f1ff2cab63849d9f upstream. Without including psci.h and arm-smccc.h, we now get a build failure in some configurations: arch/arm64/kernel/cpu_errata.c: In function 'arm64_update_smccc_conduit': arch/arm64/kernel/cpu_errata.c:278:10: error: 'psci_ops' undeclared (first use in this function); did you mean 'sysfs_ops'? arch/arm64/kernel/cpu_errata.c: In function 'arm64_set_ssbd_mitigation': arch/arm64/kernel/cpu_errata.c:311:3: error: implicit declaration of function 'arm_smccc_1_1_hvc' [-Werror=implicit-function-declaration] arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); Signed-off-by: Arnd Bergmann Signed-off-by: Catalin Marinas Cc: Jisheng Zhang Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpu_errata.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index eccdb28b4a39c..4bdf72bd79de7 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -16,6 +16,8 @@ * along with this program. If not, see . */ +#include +#include #include #include #include -- GitLab From 0d2e80411a1a24cc39be3449e62d5621db57bed8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 6 Aug 2018 07:14:51 -0500 Subject: [PATCH 0202/2269] ASoC: wm8994: Fix missing break in switch commit ad0eaee6195db1db1749dd46b9e6f4466793d178 upstream. Add missing break statement in order to prevent the code from falling through to the default case. Addresses-Coverity-ID: 115050 ("Missing break in switch") Reported-by: Valdis Kletnieks Signed-off-by: Gustavo A. R. Silva Acked-by: Charles Keepax Signed-off-by: Mark Brown Cc: stable@vger.kernel.org [Gustavo: Backported to 3.16..4.18 - Remove code comment removal] Signed-off-by: Gustavo A. R. Silva Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/wm8994.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c index 3896523b71e9c..f289762cd676e 100644 --- a/sound/soc/codecs/wm8994.c +++ b/sound/soc/codecs/wm8994.c @@ -2431,6 +2431,7 @@ static int wm8994_set_dai_sysclk(struct snd_soc_dai *dai, snd_soc_update_bits(codec, WM8994_POWER_MANAGEMENT_2, WM8994_OPCLK_ENA, 0); } + break; default: return -EINVAL; -- GitLab From 469c89aa5d7e1a616a174d89d085011de5d75b75 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 4 Jul 2018 23:07:45 +0100 Subject: [PATCH 0203/2269] arm64: Fix mismatched cache line size detection commit 4c4a39dd5fe2d13e2d2fa5fceb8ef95d19fc389a upstream. If there is a mismatch in the I/D min line size, we must always use the system wide safe value both in applications and in the kernel, while performing cache operations. However, we have been checking more bits than just the min line sizes, which triggers false negatives. We may need to trap the user accesses in such cases, but not necessarily patch the kernel. This patch fixes the check to do the right thing as advertised. A new capability will be added to check mismatches in other fields and ensure we trap the CTR accesses. Fixes: be68a8aaf925 ("arm64: cpufeature: Fix CTR_EL0 field definitions") Cc: Cc: Mark Rutland Cc: Catalin Marinas Reported-by: Will Deacon Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cache.h | 5 +++++ arch/arm64/kernel/cpu_errata.c | 6 ++++-- arch/arm64/kernel/cpufeature.c | 4 ++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index ea9bb4e0e9bbd..e40f8a2df5457 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -20,9 +20,14 @@ #define CTR_L1IP_SHIFT 14 #define CTR_L1IP_MASK 3 +#define CTR_DMINLINE_SHIFT 16 +#define CTR_IMINLINE_SHIFT 0 #define CTR_CWG_SHIFT 24 #define CTR_CWG_MASK 15 +#define CTR_CACHE_MINLINE_MASK \ + (0xf << CTR_DMINLINE_SHIFT | 0xf << CTR_IMINLINE_SHIFT) + #define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) #define ICACHE_POLICY_VPIPT 0 diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 4bdf72bd79de7..81016cc59d597 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -50,9 +50,11 @@ static bool has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry, int scope) { + u64 mask = CTR_CACHE_MINLINE_MASK; + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); - return (read_cpuid_cachetype() & arm64_ftr_reg_ctrel0.strict_mask) != - (arm64_ftr_reg_ctrel0.sys_val & arm64_ftr_reg_ctrel0.strict_mask); + return (read_cpuid_cachetype() & mask) != + (arm64_ftr_reg_ctrel0.sys_val & mask); } static int cpu_enable_trap_ctr_access(void *__unused) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 376cf12edf0c5..003dd39225a0a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -180,14 +180,14 @@ static const struct arm64_ftr_bits ftr_ctr[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 28, 1, 1), /* IDC */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 20, 4, 0), /* ERG */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DMINLINE_SHIFT, 4, 1), /* * Linux can handle differing I-cache policies. Userspace JITs will * make use of *minLine. * If we have differing I-cache policies, report it as the weakest - VIPT. */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_VIPT), /* L1Ip */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IMINLINE_SHIFT, 4, 0), ARM64_FTR_END, }; -- GitLab From ab088bc2911d773011f45872f1c72671ac8d1352 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 4 Jul 2018 23:07:46 +0100 Subject: [PATCH 0204/2269] arm64: Handle mismatched cache type commit 314d53d297980676011e6fd83dac60db4a01dc70 upstream. Track mismatches in the cache type register (CTR_EL0), other than the D/I min line sizes and trap user accesses if there are any. Fixes: be68a8aaf925 ("arm64: cpufeature: Fix CTR_EL0 field definitions") Cc: Cc: Mark Rutland Cc: Will Deacon Cc: Catalin Marinas Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/kernel/cpu_errata.c | 17 ++++++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 76c0d23ca161f..7d6425d426acb 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -44,7 +44,8 @@ #define ARM64_HARDEN_BRANCH_PREDICTOR 24 #define ARM64_HARDEN_BP_POST_GUEST_EXIT 25 #define ARM64_SSBD 26 +#define ARM64_MISMATCHED_CACHE_TYPE 27 -#define ARM64_NCAPS 27 +#define ARM64_NCAPS 28 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 81016cc59d597..3d6d7fae45de5 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -47,11 +47,15 @@ is_kryo_midr(const struct arm64_cpu_capabilities *entry, int scope) } static bool -has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry, - int scope) +has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry, + int scope) { u64 mask = CTR_CACHE_MINLINE_MASK; + /* Skip matching the min line sizes for cache type check */ + if (entry->capability == ARM64_MISMATCHED_CACHE_TYPE) + mask ^= arm64_ftr_reg_ctrel0.strict_mask; + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); return (read_cpuid_cachetype() & mask) != (arm64_ftr_reg_ctrel0.sys_val & mask); @@ -515,7 +519,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "Mismatched cache line size", .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE, - .matches = has_mismatched_cache_line_size, + .matches = has_mismatched_cache_type, + .def_scope = SCOPE_LOCAL_CPU, + .enable = cpu_enable_trap_ctr_access, + }, + { + .desc = "Mismatched cache type", + .capability = ARM64_MISMATCHED_CACHE_TYPE, + .matches = has_mismatched_cache_type, .def_scope = SCOPE_LOCAL_CPU, .enable = cpu_enable_trap_ctr_access, }, -- GitLab From 5dfe87ac34e2326ae2957fc68b63212d84f78701 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 15 Sep 2018 09:45:37 +0200 Subject: [PATCH 0205/2269] Linux 4.14.70 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3ecda1d2e23a3..aa458afa7fa2c 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 69 +SUBLEVEL = 70 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 831223b294f8531616c2487d139a2f8d7b67d8c4 Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Mon, 3 Sep 2018 15:11:11 +0530 Subject: [PATCH 0206/2269] i2c: xiic: Make the start and the byte count write atomic commit ae7304c3ea28a3ba47a7a8312c76c654ef24967e upstream. Disable interrupts while configuring the transfer and enable them back. We have below as the programming sequence 1. start and slave address 2. byte count and stop In some customer platform there was a lot of interrupts between 1 and 2 and after slave address (around 7 clock cyles) if 2 is not executed then the transaction is nacked. To fix this case make the 2 writes atomic. Signed-off-by: Shubhrajyoti Datta Signed-off-by: Michal Simek [wsa: added a newline for better readability] Signed-off-by: Wolfram Sang Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-xiic.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index ae6ed254e01db..732d6c456a6f1 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -538,6 +538,7 @@ static void xiic_start_recv(struct xiic_i2c *i2c) { u8 rx_watermark; struct i2c_msg *msg = i2c->rx_msg = i2c->tx_msg; + unsigned long flags; /* Clear and enable Rx full interrupt. */ xiic_irq_clr_en(i2c, XIIC_INTR_RX_FULL_MASK | XIIC_INTR_TX_ERROR_MASK); @@ -553,6 +554,7 @@ static void xiic_start_recv(struct xiic_i2c *i2c) rx_watermark = IIC_RX_FIFO_DEPTH; xiic_setreg8(i2c, XIIC_RFD_REG_OFFSET, rx_watermark - 1); + local_irq_save(flags); if (!(msg->flags & I2C_M_NOSTART)) /* write the address */ xiic_setreg16(i2c, XIIC_DTR_REG_OFFSET, @@ -563,6 +565,8 @@ static void xiic_start_recv(struct xiic_i2c *i2c) xiic_setreg16(i2c, XIIC_DTR_REG_OFFSET, msg->len | ((i2c->nmsgs == 1) ? XIIC_TX_DYN_STOP_MASK : 0)); + local_irq_restore(flags); + if (i2c->nmsgs == 1) /* very last, enable bus not busy as well */ xiic_irq_clr_en(i2c, XIIC_INTR_BNB_MASK); -- GitLab From d98b67089c0e34578d31257c08633ea60b087c1e Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Mon, 3 Sep 2018 11:24:57 +0300 Subject: [PATCH 0207/2269] i2c: i801: fix DNV's SMBCTRL register offset commit 851a15114895c5bce163a6f2d57e0aa4658a1be4 upstream. DNV's iTCO is slightly different with SMBCTRL sitting at a different offset when compared to all other devices. Let's fix so that we can properly use iTCO watchdog. Fixes: 84d7f2ebd70d ("i2c: i801: Add support for Intel DNV") Cc: # v4.4+ Signed-off-by: Felipe Balbi Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-i801.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index ba8df2fde1b27..67cbd9f61acce 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -138,6 +138,7 @@ #define SBREG_BAR 0x10 #define SBREG_SMBCTRL 0xc6000c +#define SBREG_SMBCTRL_DNV 0xcf000c /* Host status bits for SMBPCISTS */ #define SMBPCISTS_INTS BIT(3) @@ -1395,7 +1396,11 @@ static void i801_add_tco(struct i801_priv *priv) spin_unlock(&p2sb_spinlock); res = &tco_res[ICH_RES_MEM_OFF]; - res->start = (resource_size_t)base64_addr + SBREG_SMBCTRL; + if (pci_dev->device == PCI_DEVICE_ID_INTEL_DNV_SMBUS) + res->start = (resource_size_t)base64_addr + SBREG_SMBCTRL_DNV; + else + res->start = (resource_size_t)base64_addr + SBREG_SMBCTRL; + res->end = res->start + 3; res->flags = IORESOURCE_MEM; -- GitLab From 425739151e03c00487910a9d9cdfbd55cceb9975 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 16 Aug 2018 16:04:05 -0700 Subject: [PATCH 0208/2269] scsi: lpfc: Correct MDS diag and nvmet configuration commit 53e13ee087a80e8d4fc95436318436e5c2c1f8c2 upstream. A recent change added some MDS processing in the lpfc_drain_txq routine that relies on the fcp_wq being allocated. For nvmet operation the fcp_wq is not allocated because it can only be an nvme-target. When the original MDS support was added LS_MDS_LOOPBACK was defined wrong, (0x16) it should have been 0x10 (decimal value used for hex setting). This incorrect value allowed MDS_LOOPBACK to be set simultaneously with LS_NPIV_FAB_SUPPORTED, causing the driver to crash when it accesses the non-existent fcp_wq. Correct the bad value setting for LS_MDS_LOOPBACK. Fixes: ae9e28f36a6c ("lpfc: Add MDS Diagnostic support.") Cc: # v4.12+ Signed-off-by: Dick Kennedy Signed-off-by: James Smart Tested-by: Ewan D. Milne Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 8eb3f96fe0689..bc61cc8bc6f02 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -676,7 +676,7 @@ struct lpfc_hba { #define LS_NPIV_FAB_SUPPORTED 0x2 /* Fabric supports NPIV */ #define LS_IGNORE_ERATT 0x4 /* intr handler should ignore ERATT */ #define LS_MDS_LINK_DOWN 0x8 /* MDS Diagnostics Link Down */ -#define LS_MDS_LOOPBACK 0x16 /* MDS Diagnostics Link Up (Loopback) */ +#define LS_MDS_LOOPBACK 0x10 /* MDS Diagnostics Link Up (Loopback) */ uint32_t hba_flag; /* hba generic flags */ #define HBA_ERATT_HANDLED 0x1 /* This flag is set when eratt handled */ -- GitLab From 23ecbbad7bf921f83a5c99718ded203676ddd91c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 4 Sep 2018 11:52:34 -0600 Subject: [PATCH 0209/2269] nbd: don't allow invalid blocksize settings commit bc811f05d77f47059c197a98b6ad242eb03999cb upstream. syzbot reports a divide-by-zero off the NBD_SET_BLKSIZE ioctl. We need proper validation of the input here. Not just if it's zero, but also if the value is a power-of-2 and in a valid range. Add that. Cc: stable@vger.kernel.org Reported-by: syzbot Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/nbd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 5e55d03d3d011..fe1414df0f331 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1228,6 +1228,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, case NBD_SET_SOCK: return nbd_add_socket(nbd, arg, false); case NBD_SET_BLKSIZE: + if (!arg || !is_power_of_2(arg) || arg < 512 || + arg > PAGE_SIZE) + return -EINVAL; nbd_size_set(nbd, arg, div_s64(config->bytesize, arg)); return 0; -- GitLab From 381992bcccac4185ccdbbd2d9a3018238982b963 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 6 Sep 2018 11:05:44 +0300 Subject: [PATCH 0210/2269] block: bfq: swap puts in bfqg_and_blkg_put commit d5274b3cd6a814ccb2f56d81ee87cbbf51bd4cf7 upstream. Fix trivial use-after-free. This could be last reference to bfqg. Fixes: 8f9bebc33dd7 ("block, bfq: access and cache blkg data only when safe") Acked-by: Paolo Valente Signed-off-by: Konstantin Khlebnikov Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/bfq-cgroup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 4b571f3ea0096..afbbe5750a1f8 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -224,9 +224,9 @@ static void bfqg_and_blkg_get(struct bfq_group *bfqg) void bfqg_and_blkg_put(struct bfq_group *bfqg) { - bfqg_put(bfqg); - blkg_put(bfqg_to_blkg(bfqg)); + + bfqg_put(bfqg); } void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, -- GitLab From 492519107c9d5ecfe3298586e3f5a33ac968b62c Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 23 Aug 2018 14:29:56 +0900 Subject: [PATCH 0211/2269] android: binder: fix the race mmap and alloc_new_buf_locked commit da1b9564e85b1d7baf66cbfabcab27e183a1db63 upstream. There is RaceFuzzer report like below because we have no lock to close below the race between binder_mmap and binder_alloc_new_buf_locked. To close the race, let's use memory barrier so that if someone see alloc->vma is not NULL, alloc->vma_vm_mm should be never NULL. (I didn't add stable mark intentionallybecause standard android userspace libraries that interact with binder (libbinder & libhwbinder) prevent the mmap/ioctl race. - from Todd) " Thread interleaving: CPU0 (binder_alloc_mmap_handler) CPU1 (binder_alloc_new_buf_locked) ===== ===== // drivers/android/binder_alloc.c // #L718 (v4.18-rc3) alloc->vma = vma; // drivers/android/binder_alloc.c // #L346 (v4.18-rc3) if (alloc->vma == NULL) { ... // alloc->vma is not NULL at this point return ERR_PTR(-ESRCH); } ... // #L438 binder_update_page_range(alloc, 0, (void *)PAGE_ALIGN((uintptr_t)buffer->data), end_page_addr); // In binder_update_page_range() #L218 // But still alloc->vma_vm_mm is NULL here if (need_mm && mmget_not_zero(alloc->vma_vm_mm)) alloc->vma_vm_mm = vma->vm_mm; Crash Log: ================================================================== BUG: KASAN: null-ptr-deref in __atomic_add_unless include/asm-generic/atomic-instrumented.h:89 [inline] BUG: KASAN: null-ptr-deref in atomic_add_unless include/linux/atomic.h:533 [inline] BUG: KASAN: null-ptr-deref in mmget_not_zero include/linux/sched/mm.h:75 [inline] BUG: KASAN: null-ptr-deref in binder_update_page_range+0xece/0x18e0 drivers/android/binder_alloc.c:218 Write of size 4 at addr 0000000000000058 by task syz-executor0/11184 CPU: 1 PID: 11184 Comm: syz-executor0 Not tainted 4.18.0-rc3 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.2-0-g33fbe13 by qemu-project.org 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x16e/0x22c lib/dump_stack.c:113 kasan_report_error mm/kasan/report.c:352 [inline] kasan_report+0x163/0x380 mm/kasan/report.c:412 check_memory_region_inline mm/kasan/kasan.c:260 [inline] check_memory_region+0x140/0x1a0 mm/kasan/kasan.c:267 kasan_check_write+0x14/0x20 mm/kasan/kasan.c:278 __atomic_add_unless include/asm-generic/atomic-instrumented.h:89 [inline] atomic_add_unless include/linux/atomic.h:533 [inline] mmget_not_zero include/linux/sched/mm.h:75 [inline] binder_update_page_range+0xece/0x18e0 drivers/android/binder_alloc.c:218 binder_alloc_new_buf_locked drivers/android/binder_alloc.c:443 [inline] binder_alloc_new_buf+0x467/0xc30 drivers/android/binder_alloc.c:513 binder_transaction+0x125b/0x4fb0 drivers/android/binder.c:2957 binder_thread_write+0xc08/0x2770 drivers/android/binder.c:3528 binder_ioctl_write_read.isra.39+0x24f/0x8e0 drivers/android/binder.c:4456 binder_ioctl+0xa86/0xf34 drivers/android/binder.c:4596 vfs_ioctl fs/ioctl.c:46 [inline] do_vfs_ioctl+0x154/0xd40 fs/ioctl.c:686 ksys_ioctl+0x94/0xb0 fs/ioctl.c:701 __do_sys_ioctl fs/ioctl.c:708 [inline] __se_sys_ioctl fs/ioctl.c:706 [inline] __x64_sys_ioctl+0x43/0x50 fs/ioctl.c:706 do_syscall_64+0x167/0x4b0 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe " Signed-off-by: Todd Kjos Signed-off-by: Minchan Kim Reviewed-by: Martijn Coenen Cc: stable Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 42 +++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 6cb1482686760..58e4658f9dd6c 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -324,6 +324,34 @@ err_no_vma: return vma ? -ENOMEM : -ESRCH; } +static inline void binder_alloc_set_vma(struct binder_alloc *alloc, + struct vm_area_struct *vma) +{ + if (vma) + alloc->vma_vm_mm = vma->vm_mm; + /* + * If we see alloc->vma is not NULL, buffer data structures set up + * completely. Look at smp_rmb side binder_alloc_get_vma. + * We also want to guarantee new alloc->vma_vm_mm is always visible + * if alloc->vma is set. + */ + smp_wmb(); + alloc->vma = vma; +} + +static inline struct vm_area_struct *binder_alloc_get_vma( + struct binder_alloc *alloc) +{ + struct vm_area_struct *vma = NULL; + + if (alloc->vma) { + /* Look at description in binder_alloc_set_vma */ + smp_rmb(); + vma = alloc->vma; + } + return vma; +} + struct binder_buffer *binder_alloc_new_buf_locked(struct binder_alloc *alloc, size_t data_size, size_t offsets_size, @@ -339,7 +367,7 @@ struct binder_buffer *binder_alloc_new_buf_locked(struct binder_alloc *alloc, size_t size, data_offsets_size; int ret; - if (alloc->vma == NULL) { + if (!binder_alloc_get_vma(alloc)) { pr_err("%d: binder_alloc_buf, no vma\n", alloc->pid); return ERR_PTR(-ESRCH); @@ -712,9 +740,7 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc, buffer->free = 1; binder_insert_free_buffer(alloc, buffer); alloc->free_async_space = alloc->buffer_size / 2; - barrier(); - alloc->vma = vma; - alloc->vma_vm_mm = vma->vm_mm; + binder_alloc_set_vma(alloc, vma); mmgrab(alloc->vma_vm_mm); return 0; @@ -741,10 +767,10 @@ void binder_alloc_deferred_release(struct binder_alloc *alloc) int buffers, page_count; struct binder_buffer *buffer; - BUG_ON(alloc->vma); - buffers = 0; mutex_lock(&alloc->mutex); + BUG_ON(alloc->vma); + while ((n = rb_first(&alloc->allocated_buffers))) { buffer = rb_entry(n, struct binder_buffer, rb_node); @@ -886,7 +912,7 @@ int binder_alloc_get_allocated_count(struct binder_alloc *alloc) */ void binder_alloc_vma_close(struct binder_alloc *alloc) { - WRITE_ONCE(alloc->vma, NULL); + binder_alloc_set_vma(alloc, NULL); } /** @@ -921,7 +947,7 @@ enum lru_status binder_alloc_free_page(struct list_head *item, index = page - alloc->pages; page_addr = (uintptr_t)alloc->buffer + index * PAGE_SIZE; - vma = alloc->vma; + vma = binder_alloc_get_vma(alloc); if (vma) { if (!mmget_not_zero(alloc->vma_vm_mm)) goto err_mmget; -- GitLab From 9efcaa7c4afba5628f2650a76f69c798f47eeb18 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 30 Aug 2018 11:01:21 -0700 Subject: [PATCH 0212/2269] MIPS: VDSO: Match data page cache colouring when D$ aliases commit 0f02cfbc3d9e413d450d8d0fd660077c23f67eff upstream. When a system suffers from dcache aliasing a user program may observe stale VDSO data from an aliased cache line. Notably this can break the expectation that clock_gettime(CLOCK_MONOTONIC, ...) is, as its name suggests, monotonic. In order to ensure that users observe updates to the VDSO data page as intended, align the user mappings of the VDSO data page such that their cache colouring matches that of the virtual address range which the kernel will use to update the data page - typically its unmapped address within kseg0. This ensures that we don't introduce aliasing cache lines for the VDSO data page, and therefore that userland will observe updates without requiring cache invalidation. Signed-off-by: Paul Burton Reported-by: Hauke Mehrtens Reported-by: Rene Nielsen Reported-by: Alexandre Belloni Fixes: ebb5e78cc634 ("MIPS: Initial implementation of a VDSO") Patchwork: https://patchwork.linux-mips.org/patch/20344/ Tested-by: Alexandre Belloni Tested-by: Hauke Mehrtens Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v4.4+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/vdso.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c index 019035d7225c4..8f845f6e5f426 100644 --- a/arch/mips/kernel/vdso.c +++ b/arch/mips/kernel/vdso.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +21,7 @@ #include #include +#include #include /* Kernel-provided data used by the VDSO. */ @@ -128,12 +130,30 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) vvar_size = gic_size + PAGE_SIZE; size = vvar_size + image->size; + /* + * Find a region that's large enough for us to perform the + * colour-matching alignment below. + */ + if (cpu_has_dc_aliases) + size += shm_align_mask + 1; + base = get_unmapped_area(NULL, 0, size, 0, 0); if (IS_ERR_VALUE(base)) { ret = base; goto out; } + /* + * If we suffer from dcache aliasing, ensure that the VDSO data page + * mapping is coloured the same as the kernel's mapping of that memory. + * This ensures that when the kernel updates the VDSO data userland + * will observe it without requiring cache invalidations. + */ + if (cpu_has_dc_aliases) { + base = __ALIGN_MASK(base, shm_align_mask); + base += ((unsigned long)&vdso_data - gic_size) & shm_align_mask; + } + data_addr = base + gic_size; vdso_addr = data_addr + PAGE_SIZE; -- GitLab From b0b69369eecc4cdc6680e1bf6e14a632d5a447d9 Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 27 Aug 2018 17:04:13 -0500 Subject: [PATCH 0213/2269] SMB3: Backup intent flag missing for directory opens with backupuid mounts commit 5e19697b56a64004e2d0ff1bb952ea05493c088f upstream. When "backup intent" is requested on the mount (e.g. backupuid or backupgid mount options), the corresponding flag needs to be set on opens of directories (and files) but was missing in some places causing access denied trying to enumerate and backup servers. Fixes kernel bugzilla #200953 https://bugzilla.kernel.org/show_bug.cgi?id=200953 Reported-and-tested-by: Signed-off-by: Steve French CC: Stable Reviewed-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman --- fs/cifs/inode.c | 2 ++ fs/cifs/smb2ops.c | 25 ++++++++++++++++++++----- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index caf9cf91b825a..2cd0b3053439f 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -467,6 +467,8 @@ cifs_sfu_type(struct cifs_fattr *fattr, const char *path, oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_READ; oparms.create_options = CREATE_NOT_DIR; + if (backup_cred(cifs_sb)) + oparms.create_options |= CREATE_OPEN_BACKUP_INTENT; oparms.disposition = FILE_OPEN; oparms.path = path; oparms.fid = &fid; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index e9f246fe9d800..012912219d323 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -385,7 +385,10 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; - oparms.create_options = 0; + if (backup_cred(cifs_sb)) + oparms.create_options = CREATE_OPEN_BACKUP_INTENT; + else + oparms.create_options = 0; oparms.fid = &fid; oparms.reconnect = false; @@ -534,7 +537,10 @@ smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_READ_EA; oparms.disposition = FILE_OPEN; - oparms.create_options = 0; + if (backup_cred(cifs_sb)) + oparms.create_options = CREATE_OPEN_BACKUP_INTENT; + else + oparms.create_options = 0; oparms.fid = &fid; oparms.reconnect = false; @@ -613,7 +619,10 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_WRITE_EA; oparms.disposition = FILE_OPEN; - oparms.create_options = 0; + if (backup_cred(cifs_sb)) + oparms.create_options = CREATE_OPEN_BACKUP_INTENT; + else + oparms.create_options = 0; oparms.fid = &fid; oparms.reconnect = false; @@ -1215,7 +1224,10 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES | FILE_READ_DATA; oparms.disposition = FILE_OPEN; - oparms.create_options = 0; + if (backup_cred(cifs_sb)) + oparms.create_options = CREATE_OPEN_BACKUP_INTENT; + else + oparms.create_options = 0; oparms.fid = fid; oparms.reconnect = false; @@ -1491,7 +1503,10 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; - oparms.create_options = 0; + if (backup_cred(cifs_sb)) + oparms.create_options = CREATE_OPEN_BACKUP_INTENT; + else + oparms.create_options = 0; oparms.fid = &fid; oparms.reconnect = false; -- GitLab From 55bcfe019ac4a2ab2308cec4dae83b7830fa9b6a Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 31 Aug 2018 15:12:10 -0500 Subject: [PATCH 0214/2269] smb3: check for and properly advertise directory lease support commit f801568332321e2b1e7a8bd26c3e4913a312a2ec upstream. Although servers will typically ignore unsupported features, we should advertise the support for directory leases (as Windows e.g. does) in the negotiate protocol capabilities we pass to the server, and should check for the server capability (CAP_DIRECTORY_LEASING) before sending a lease request for an open of a directory. This will prevent us from accidentally sending directory leases to SMB2.1 or SMB2 server for example. Signed-off-by: Steve French CC: Stable Reviewed-by: Ronnie Sahlberg Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2ops.c | 10 +++++----- fs/cifs/smb2pdu.c | 3 +++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 012912219d323..759cbbf7b1af1 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3215,7 +3215,7 @@ struct smb_version_values smb21_values = { struct smb_version_values smb3any_values = { .version_string = SMB3ANY_VERSION_STRING, .protocol_id = SMB302_PROT_ID, /* doesn't matter, send protocol array */ - .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION, + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION | SMB2_GLOBAL_CAP_DIRECTORY_LEASING, .large_lock_type = 0, .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, @@ -3235,7 +3235,7 @@ struct smb_version_values smb3any_values = { struct smb_version_values smbdefault_values = { .version_string = SMBDEFAULT_VERSION_STRING, .protocol_id = SMB302_PROT_ID, /* doesn't matter, send protocol array */ - .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION, + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION | SMB2_GLOBAL_CAP_DIRECTORY_LEASING, .large_lock_type = 0, .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, @@ -3255,7 +3255,7 @@ struct smb_version_values smbdefault_values = { struct smb_version_values smb30_values = { .version_string = SMB30_VERSION_STRING, .protocol_id = SMB30_PROT_ID, - .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION, + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION | SMB2_GLOBAL_CAP_DIRECTORY_LEASING, .large_lock_type = 0, .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, @@ -3275,7 +3275,7 @@ struct smb_version_values smb30_values = { struct smb_version_values smb302_values = { .version_string = SMB302_VERSION_STRING, .protocol_id = SMB302_PROT_ID, - .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION, + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION | SMB2_GLOBAL_CAP_DIRECTORY_LEASING, .large_lock_type = 0, .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, @@ -3296,7 +3296,7 @@ struct smb_version_values smb302_values = { struct smb_version_values smb311_values = { .version_string = SMB311_VERSION_STRING, .protocol_id = SMB311_PROT_ID, - .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION, + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION | SMB2_GLOBAL_CAP_DIRECTORY_LEASING, .large_lock_type = 0, .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 58842b36481d7..078ec705a5cc6 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1816,6 +1816,9 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, if (!(server->capabilities & SMB2_GLOBAL_CAP_LEASING) || *oplock == SMB2_OPLOCK_LEVEL_NONE) req->RequestedOplockLevel = *oplock; + else if (!(server->capabilities & SMB2_GLOBAL_CAP_DIRECTORY_LEASING) && + (oparms->create_options & CREATE_NOT_FILE)) + req->RequestedOplockLevel = *oplock; /* no srv lease support */ else { rc = add_lease_context(server, iov, &n_iov, oplock); if (rc) { -- GitLab From ae3968b416459f3754feefb0bc685e9f3261bb3e Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 17 Aug 2018 09:38:59 +0100 Subject: [PATCH 0215/2269] Btrfs: fix data corruption when deduplicating between different files commit de02b9f6bb65a6a1848f346f7a3617b7a9b930c0 upstream. If we deduplicate extents between two different files we can end up corrupting data if the source range ends at the size of the source file, the source file's size is not aligned to the filesystem's block size and the destination range does not go past the size of the destination file size. Example: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ xfs_io -f -c "pwrite -S 0x6b 0 2518890" /mnt/foo # The first byte with a value of 0xae starts at an offset (2518890) # which is not a multiple of the sector size. $ xfs_io -c "pwrite -S 0xae 2518890 102398" /mnt/foo # Confirm the file content is full of bytes with values 0x6b and 0xae. $ od -t x1 /mnt/foo 0000000 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b * 11467540 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b ae ae ae ae ae ae 11467560 ae ae ae ae ae ae ae ae ae ae ae ae ae ae ae ae * 11777540 ae ae ae ae ae ae ae ae 11777550 # Create a second file with a length not aligned to the sector size, # whose bytes all have the value 0x6b, so that its extent(s) can be # deduplicated with the first file. $ xfs_io -f -c "pwrite -S 0x6b 0 557771" /mnt/bar # Now deduplicate the entire second file into a range of the first file # that also has all bytes with the value 0x6b. The destination range's # end offset must not be aligned to the sector size and must be less # then the offset of the first byte with the value 0xae (byte at offset # 2518890). $ xfs_io -c "dedupe /mnt/bar 0 1957888 557771" /mnt/foo # The bytes in the range starting at offset 2515659 (end of the # deduplication range) and ending at offset 2519040 (start offset # rounded up to the block size) must all have the value 0xae (and not # replaced with 0x00 values). In other words, we should have exactly # the same data we had before we asked for deduplication. $ od -t x1 /mnt/foo 0000000 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b * 11467540 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b ae ae ae ae ae ae 11467560 ae ae ae ae ae ae ae ae ae ae ae ae ae ae ae ae * 11777540 ae ae ae ae ae ae ae ae 11777550 # Unmount the filesystem and mount it again. This guarantees any file # data in the page cache is dropped. $ umount /dev/sdb $ mount /dev/sdb /mnt $ od -t x1 /mnt/foo 0000000 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b * 11461300 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 00 00 00 00 00 11461320 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 11470000 ae ae ae ae ae ae ae ae ae ae ae ae ae ae ae ae * 11777540 ae ae ae ae ae ae ae ae 11777550 # The bytes in range 2515659 to 2519040 have a value of 0x00 and not a # value of 0xae, data corruption happened due to the deduplication # operation. So fix this by rounding down, to the sector size, the length used for the deduplication when the following conditions are met: 1) Source file's range ends at its i_size; 2) Source file's i_size is not aligned to the sector size; 3) Destination range does not cross the i_size of the destination file. Fixes: e1d227a42ea2 ("btrfs: Handle unaligned length in extent_same") CC: stable@vger.kernel.org # 4.2+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7303ba1081122..a507c0d253545 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3158,6 +3158,25 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, same_lock_start = min_t(u64, loff, dst_loff); same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start; + } else { + /* + * If the source and destination inodes are different, the + * source's range end offset matches the source's i_size, that + * i_size is not a multiple of the sector size, and the + * destination range does not go past the destination's i_size, + * we must round down the length to the nearest sector size + * multiple. If we don't do this adjustment we end replacing + * with zeroes the bytes in the range that starts at the + * deduplication range's end offset and ends at the next sector + * size multiple. + */ + if (loff + olen == i_size_read(src) && + dst_loff + len < i_size_read(dst)) { + const u64 sz = BTRFS_I(src)->root->fs_info->sectorsize; + + len = round_down(i_size_read(src), sz) - loff; + olen = len; + } } /* don't make the dst file partly checksummed */ -- GitLab From d5fca5314c4da9d17b2db5d01a57fce48ec0f081 Mon Sep 17 00:00:00 2001 From: Pierre Morel Date: Thu, 23 Aug 2018 12:25:54 +0200 Subject: [PATCH 0216/2269] KVM: s390: vsie: copy wrapping keys to right place commit 204c97245612b6c255edf4e21e24d417c4a0c008 upstream. Copy the key mask to the right offset inside the shadow CRYCB Fixes: bbeaa58b3 ("KVM: s390: vsie: support aes dea wrapping keys") Signed-off-by: Pierre Morel Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Reviewed-by: Janosch Frank Cc: stable@vger.kernel.org # v4.8+ Message-Id: <1535019956-23539-2-git-send-email-pmorel@linux.ibm.com> Signed-off-by: Janosch Frank Signed-off-by: Christian Borntraeger Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/vsie.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 4f1f5fc8139d8..061906f98dc5c 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -170,7 +170,8 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) return set_validity_icpt(scb_s, 0x0039U); /* copy only the wrapping keys */ - if (read_guest_real(vcpu, crycb_addr + 72, &vsie_page->crycb, 56)) + if (read_guest_real(vcpu, crycb_addr + 72, + vsie_page->crycb.dea_wrapping_key_mask, 56)) return set_validity_icpt(scb_s, 0x0035U); scb_s->ecb3 |= ecb3_flags; -- GitLab From a709c46fdcd07940ae75bbca417b5b825bf92bce Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 23 Aug 2018 13:56:46 -0700 Subject: [PATCH 0217/2269] KVM: VMX: Do not allow reexecute_instruction() when skipping MMIO instr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c4409905cd6eb42cfd06126e9226b0150e05a715 upstream. Re-execution after an emulation decode failure is only intended to handle a case where two or vCPUs race to write a shadowed page, i.e. we should never re-execute an instruction as part of MMIO emulation. As handle_ept_misconfig() is only used for MMIO emulation, it should pass EMULTYPE_NO_REEXECUTE when using the emulator to skip an instr in the fast-MMIO case where VM_EXIT_INSTRUCTION_LEN is invalid. And because the cr2 value passed to x86_emulate_instruction() is only destined for use when retrying or reexecuting, we can simply call emulate_instruction(). Fixes: d391f1207067 ("x86/kvm/vmx: do not use vm-exit instruction length for fast MMIO when running nested") Cc: Vitaly Kuznetsov Signed-off-by: Sean Christopherson Cc: stable@vger.kernel.org Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4e5a8e30cc4e8..fd46d890296c9 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6965,8 +6965,8 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) return kvm_skip_emulated_instruction(vcpu); else - return x86_emulate_instruction(vcpu, gpa, EMULTYPE_SKIP, - NULL, 0) == EMULATE_DONE; + return emulate_instruction(vcpu, EMULTYPE_SKIP) == + EMULATE_DONE; } ret = kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); -- GitLab From cc427108861907dffd590598e300b36420de7e75 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 30 Aug 2018 15:13:16 +0200 Subject: [PATCH 0218/2269] ALSA: hda - Fix cancel_work_sync() stall from jackpoll work commit 16037643969e095509cd8446a3f8e406a6dc3a2c upstream. On AMD/ATI controllers, the HD-audio controller driver allows a bus reset upon the error recovery, and its procedure includes the cancellation of pending jack polling work as found in snd_hda_bus_codec_reset(). This works usually fine, but it becomes a problem when the reset happens from the jack poll work itself; then calling cancel_work_sync() from the work being processed tries to wait the finish endlessly. As a workaround, this patch adds the check of current_work() and applies the cancel_work_sync() only when it's not from the jackpoll_work. This doesn't fix the root cause of the reported error below, but at least, it eases the unexpected stall of the whole system. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=200937 Cc: Cc: Lukas Wunner Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_codec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 417abbb1f72ca..8a027973f2adc 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -3923,7 +3923,8 @@ void snd_hda_bus_reset_codecs(struct hda_bus *bus) list_for_each_codec(codec, bus) { /* FIXME: maybe a better way needed for forced reset */ - cancel_delayed_work_sync(&codec->jackpoll_work); + if (current_work() != &codec->jackpoll_work.work) + cancel_delayed_work_sync(&codec->jackpoll_work); #ifdef CONFIG_PM if (hda_codec_is_power_on(codec)) { hda_call_codec_suspend(codec); -- GitLab From cb26258540913d80cc3fe2f36e9b02fe3b91cf0f Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Wed, 5 Sep 2018 11:22:07 +0530 Subject: [PATCH 0219/2269] cpu/hotplug: Adjust misplaced smb() in cpuhp_thread_fun() commit f8b7530aa0a1def79c93101216b5b17cf408a70a upstream. The smp_mb() in cpuhp_thread_fun() is misplaced. It needs to be after the load of st->should_run to prevent reordering of the later load/stores w.r.t. the load of st->should_run. Fixes: 4dddfb5faa61 ("smp/hotplug: Rewrite AP state machine core") Signed-off-by: Neeraj Upadhyay Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: josh@joshtriplett.org Cc: peterz@infradead.org Cc: jiangshanlai@gmail.com Cc: dzickus@redhat.com Cc: brendan.jackman@arm.com Cc: malat@debian.org Cc: mojha@codeaurora.org Cc: sramana@codeaurora.org Cc: linux-arm-msm@vger.kernel.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1536126727-11629-1-git-send-email-neeraju@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- kernel/cpu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index 8f02f9b6e0469..9d9645da94fc5 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -612,15 +612,15 @@ static void cpuhp_thread_fun(unsigned int cpu) bool bringup = st->bringup; enum cpuhp_state state; + if (WARN_ON_ONCE(!st->should_run)) + return; + /* * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures * that if we see ->should_run we also see the rest of the state. */ smp_mb(); - if (WARN_ON_ONCE(!st->should_run)) - return; - cpuhp_lock_acquire(bringup); if (st->single) { -- GitLab From 1d92a611db50f1b19d5d7ed27bd4dec6000d06e4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 6 Sep 2018 15:21:38 +0200 Subject: [PATCH 0220/2269] cpu/hotplug: Prevent state corruption on error rollback commit 69fa6eb7d6a64801ea261025cce9723d9442d773 upstream. When a teardown callback fails, the CPU hotplug code brings the CPU back to the previous state. The previous state becomes the new target state. The rollback happens in undo_cpu_down() which increments the state unconditionally even if the state is already the same as the target. As a consequence the next CPU hotplug operation will start at the wrong state. This is easily to observe when __cpu_disable() fails. Prevent the unconditional undo by checking the state vs. target before incrementing state and fix up the consequently wrong conditional in the unplug code which handles the failure of the final CPU take down on the control CPU side. Fixes: 4dddfb5faa61 ("smp/hotplug: Rewrite AP state machine core") Reported-by: Neeraj Upadhyay Signed-off-by: Thomas Gleixner Tested-by: Geert Uytterhoeven Tested-by: Sudeep Holla Tested-by: Neeraj Upadhyay Cc: josh@joshtriplett.org Cc: peterz@infradead.org Cc: jiangshanlai@gmail.com Cc: dzickus@redhat.com Cc: brendan.jackman@arm.com Cc: malat@debian.org Cc: sramana@codeaurora.org Cc: linux-arm-msm@vger.kernel.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1809051419580.1416@nanos.tec.linutronix.de Signed-off-by: Greg Kroah-Hartman ---- --- kernel/cpu.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index 9d9645da94fc5..f3f389e333430 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -932,7 +932,8 @@ static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL); if (ret) { st->target = prev_state; - undo_cpu_down(cpu, st); + if (st->state < prev_state) + undo_cpu_down(cpu, st); break; } } @@ -985,7 +986,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, * to do the further cleanups. */ ret = cpuhp_down_callbacks(cpu, st, target); - if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) { + if (ret && st->state == CPUHP_TEARDOWN_CPU && st->state < prev_state) { cpuhp_reset_state(st, prev_state); __cpuhp_kick_ap(st); } -- GitLab From 05104410c9d109e1d9b4d2e59f7de9d9722ec47f Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Tue, 31 Jul 2018 07:27:39 -0400 Subject: [PATCH 0221/2269] x86/microcode: Make sure boot_cpu_data.microcode is up-to-date commit 370a132bb2227ff76278f98370e0e701d86ff752 upstream. When preparing an MCE record for logging, boot_cpu_data.microcode is used to read out the microcode revision on the box. However, on systems where late microcode update has happened, the microcode revision output in a MCE log record is wrong because boot_cpu_data.microcode is not updated when the microcode gets updated. But, the microcode revision saved in boot_cpu_data's microcode member should be kept up-to-date, regardless, for consistency. Make it so. Fixes: fa94d0c6e0f3 ("x86/MCE: Save microcode revision in machine check records") Signed-off-by: Prarit Bhargava Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Tony Luck Cc: sironi@amazon.de Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20180731112739.32338-1-prarit@redhat.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/microcode/amd.c | 4 ++++ arch/x86/kernel/cpu/microcode/intel.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 48179928ff38c..ae5003dd24053 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -537,6 +537,10 @@ static enum ucode_state apply_microcode_amd(int cpu) uci->cpu_sig.rev = mc_amd->hdr.patch_id; c->microcode = mc_amd->hdr.patch_id; + /* Update boot_cpu_data's revision too, if we're on the BSP: */ + if (c->cpu_index == boot_cpu_data.cpu_index) + boot_cpu_data.microcode = mc_amd->hdr.patch_id; + return UCODE_UPDATED; } diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 97ccf4c3b45be..256d336cbc045 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -851,6 +851,10 @@ static enum ucode_state apply_microcode_intel(int cpu) uci->cpu_sig.rev = rev; c->microcode = rev; + /* Update boot_cpu_data's revision too, if we're on the BSP: */ + if (c->cpu_index == boot_cpu_data.cpu_index) + boot_cpu_data.microcode = rev; + return UCODE_UPDATED; } -- GitLab From ff225750dc4e70ce5c4288fe3850ae9dc69df024 Mon Sep 17 00:00:00 2001 From: Filippo Sironi Date: Tue, 31 Jul 2018 17:29:30 +0200 Subject: [PATCH 0222/2269] x86/microcode: Update the new microcode revision unconditionally commit 8da38ebaad23fe1b0c4a205438676f6356607cfc upstream. Handle the case where microcode gets loaded on the BSP's hyperthread sibling first and the boot_cpu_data's microcode revision doesn't get updated because of early exit due to the siblings sharing a microcode engine. For that, simply write the updated revision on all CPUs unconditionally. Signed-off-by: Filippo Sironi Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: prarit@redhat.com Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1533050970-14385-1-git-send-email-sironi@amazon.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/microcode/amd.c | 22 +++++++++++++--------- arch/x86/kernel/cpu/microcode/intel.c | 13 ++++++++----- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index ae5003dd24053..9d33dbf2489e2 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -504,6 +504,7 @@ static enum ucode_state apply_microcode_amd(int cpu) struct microcode_amd *mc_amd; struct ucode_cpu_info *uci; struct ucode_patch *p; + enum ucode_state ret; u32 rev, dummy; BUG_ON(raw_smp_processor_id() != cpu); @@ -521,9 +522,8 @@ static enum ucode_state apply_microcode_amd(int cpu) /* need to apply patch? */ if (rev >= mc_amd->hdr.patch_id) { - c->microcode = rev; - uci->cpu_sig.rev = rev; - return UCODE_OK; + ret = UCODE_OK; + goto out; } if (__apply_microcode_amd(mc_amd)) { @@ -531,17 +531,21 @@ static enum ucode_state apply_microcode_amd(int cpu) cpu, mc_amd->hdr.patch_id); return UCODE_ERROR; } - pr_info("CPU%d: new patch_level=0x%08x\n", cpu, - mc_amd->hdr.patch_id); - uci->cpu_sig.rev = mc_amd->hdr.patch_id; - c->microcode = mc_amd->hdr.patch_id; + rev = mc_amd->hdr.patch_id; + ret = UCODE_UPDATED; + + pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev); + +out: + uci->cpu_sig.rev = rev; + c->microcode = rev; /* Update boot_cpu_data's revision too, if we're on the BSP: */ if (c->cpu_index == boot_cpu_data.cpu_index) - boot_cpu_data.microcode = mc_amd->hdr.patch_id; + boot_cpu_data.microcode = rev; - return UCODE_UPDATED; + return ret; } static int install_equiv_cpu_table(const u8 *buf) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 256d336cbc045..16936a24795c8 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -795,6 +795,7 @@ static enum ucode_state apply_microcode_intel(int cpu) struct ucode_cpu_info *uci = ucode_cpu_info + cpu; struct cpuinfo_x86 *c = &cpu_data(cpu); struct microcode_intel *mc; + enum ucode_state ret; static int prev_rev; u32 rev; @@ -817,9 +818,8 @@ static enum ucode_state apply_microcode_intel(int cpu) */ rev = intel_get_microcode_revision(); if (rev >= mc->hdr.rev) { - uci->cpu_sig.rev = rev; - c->microcode = rev; - return UCODE_OK; + ret = UCODE_OK; + goto out; } /* @@ -848,14 +848,17 @@ static enum ucode_state apply_microcode_intel(int cpu) prev_rev = rev; } + ret = UCODE_UPDATED; + +out: uci->cpu_sig.rev = rev; - c->microcode = rev; + c->microcode = rev; /* Update boot_cpu_data's revision too, if we're on the BSP: */ if (c->cpu_index == boot_cpu_data.cpu_index) boot_cpu_data.microcode = rev; - return UCODE_UPDATED; + return ret; } static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, -- GitLab From 0fad94cf10165084189bc160cc48f8004ea11ee8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 16 Aug 2018 14:06:46 -0500 Subject: [PATCH 0223/2269] switchtec: Fix Spectre v1 vulnerability commit 46feb6b495f7628a6dbf36c4e6d80faf378372d4 upstream. p.port can is indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/pci/switch/switchtec.c:912 ioctl_port_to_pff() warn: potential spectre issue 'pcfg->dsp_pff_inst_id' [r] Fix this by sanitizing p.port before using it to index pcfg->dsp_pff_inst_id Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Bjorn Helgaas Acked-by: Logan Gunthorpe Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/switch/switchtec.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index af81b2dec42e9..620f5b995a129 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -24,6 +24,8 @@ #include #include +#include + MODULE_DESCRIPTION("Microsemi Switchtec(tm) PCIe Management Driver"); MODULE_VERSION("0.1"); MODULE_LICENSE("GPL"); @@ -1173,6 +1175,8 @@ static int ioctl_port_to_pff(struct switchtec_dev *stdev, default: if (p.port > ARRAY_SIZE(pcfg->dsp_pff_inst_id)) return -EINVAL; + p.port = array_index_nospec(p.port, + ARRAY_SIZE(pcfg->dsp_pff_inst_id) + 1); p.pff = ioread32(&pcfg->dsp_pff_inst_id[p.port - 1]); break; } -- GitLab From 64def6f35348f67c0ae99c38f5bd506fd396a1a2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 15 Jan 2018 17:07:22 +0100 Subject: [PATCH 0224/2269] crypto: aes-generic - fix aes-generic regression on powerpc commit 6e36719fbe90213fbba9f50093fa2d4d69b0e93c upstream. My last bugfix added -Os on the command line, which unfortunately caused a build regression on powerpc in some configurations. I've done some more analysis of the original problem and found slightly different workaround that avoids this regression and also results in better performance on gcc-7.0: -fcode-hoisting is an optimization step that got added in gcc-7 and that for all gcc-7 versions causes worse performance. This disables -fcode-hoisting on all compilers that understand the option. For gcc-7.1 and 7.2 I found the same performance as my previous patch (using -Os), in gcc-7.0 it was even better. On gcc-8 I could see no change in performance from this patch. In theory, code hoisting should not be able make things better for the AES cipher, so leaving it disabled for gcc-8 only serves to simplify the Makefile change. Reported-by: kbuild test robot Link: https://www.mail-archive.com/linux-crypto@vger.kernel.org/msg30418.html Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83356 Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83651 Fixes: 148b974deea9 ("crypto: aes-generic - build with -Os on gcc-7+") Signed-off-by: Arnd Bergmann Signed-off-by: Herbert Xu Cc: Horia Geanta Signed-off-by: Greg Kroah-Hartman --- crypto/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/Makefile b/crypto/Makefile index adaf2c63baeb6..56282e2d75ad9 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -98,7 +98,7 @@ obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o obj-$(CONFIG_CRYPTO_SERPENT) += serpent_generic.o CFLAGS_serpent_generic.o := $(call cc-option,-fsched-pressure) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149 obj-$(CONFIG_CRYPTO_AES) += aes_generic.o -CFLAGS_aes_generic.o := $(call cc-ifversion, -ge, 0701, -Os) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83356 +CFLAGS_aes_generic.o := $(call cc-option,-fno-code-hoisting) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83356 obj-$(CONFIG_CRYPTO_AES_TI) += aes_ti.o obj-$(CONFIG_CRYPTO_CAMELLIA) += camellia_generic.o obj-$(CONFIG_CRYPTO_CAST_COMMON) += cast_common.o -- GitLab From e5d857d5f1fc0893a2efa46a7acdec1ef67363d7 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Thu, 28 Jun 2018 18:13:33 +0300 Subject: [PATCH 0225/2269] tpm: separate cmd_ready/go_idle from runtime_pm commit 627448e85c766587f6fdde1ea3886d6615081c77 upstream. Fix tpm ptt initialization error: tpm tpm0: A TPM error (378) occurred get tpm pcr allocation. We cannot use go_idle cmd_ready commands via runtime_pm handles as with the introduction of localities this is no longer an optional feature, while runtime pm can be not enabled. Though cmd_ready/go_idle provides a power saving, it's also a part of TPM2 protocol and should be called explicitly. This patch exposes cmd_read/go_idle via tpm class ops and removes runtime pm support as it is not used by any driver. When calling from nested context always use both flags: TPM_TRANSMIT_UNLOCKED and TPM_TRANSMIT_RAW. Both are needed to resolve tpm spaces and locality request recursive calls to tpm_transmit(). TPM_TRANSMIT_RAW should never be used standalone as it will fail on double locking. While TPM_TRANSMIT_UNLOCKED standalone should be called from non-recursive locked contexts. New wrappers are added tpm_cmd_ready() and tpm_go_idle() to streamline tpm_try_transmit code. tpm_crb no longer needs own power saving functions and can drop using tpm_pm_suspend/resume. This patch cannot be really separated from the locality fix. Fixes: 888d867df441 (tpm: cmd_ready command can be issued only after granting locality) Cc: stable@vger.kernel.org Fixes: 888d867df441 (tpm: cmd_ready command can be issued only after granting locality) Signed-off-by: Tomas Winkler Tested-by: Jarkko Sakkinen Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm-interface.c | 50 ++++++++++++--- drivers/char/tpm/tpm.h | 12 +++- drivers/char/tpm/tpm2-space.c | 16 +++-- drivers/char/tpm/tpm_crb.c | 101 +++++++++---------------------- include/linux/tpm.h | 2 + 5 files changed, 90 insertions(+), 91 deletions(-) diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index 86b526b7d9900..a2070ab86c824 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -369,10 +369,13 @@ err_len: return -EINVAL; } -static int tpm_request_locality(struct tpm_chip *chip) +static int tpm_request_locality(struct tpm_chip *chip, unsigned int flags) { int rc; + if (flags & TPM_TRANSMIT_RAW) + return 0; + if (!chip->ops->request_locality) return 0; @@ -385,10 +388,13 @@ static int tpm_request_locality(struct tpm_chip *chip) return 0; } -static void tpm_relinquish_locality(struct tpm_chip *chip) +static void tpm_relinquish_locality(struct tpm_chip *chip, unsigned int flags) { int rc; + if (flags & TPM_TRANSMIT_RAW) + return; + if (!chip->ops->relinquish_locality) return; @@ -399,6 +405,28 @@ static void tpm_relinquish_locality(struct tpm_chip *chip) chip->locality = -1; } +static int tpm_cmd_ready(struct tpm_chip *chip, unsigned int flags) +{ + if (flags & TPM_TRANSMIT_RAW) + return 0; + + if (!chip->ops->cmd_ready) + return 0; + + return chip->ops->cmd_ready(chip); +} + +static int tpm_go_idle(struct tpm_chip *chip, unsigned int flags) +{ + if (flags & TPM_TRANSMIT_RAW) + return 0; + + if (!chip->ops->go_idle) + return 0; + + return chip->ops->go_idle(chip); +} + static ssize_t tpm_try_transmit(struct tpm_chip *chip, struct tpm_space *space, u8 *buf, size_t bufsiz, @@ -449,14 +477,15 @@ static ssize_t tpm_try_transmit(struct tpm_chip *chip, /* Store the decision as chip->locality will be changed. */ need_locality = chip->locality == -1; - if (!(flags & TPM_TRANSMIT_RAW) && need_locality) { - rc = tpm_request_locality(chip); + if (need_locality) { + rc = tpm_request_locality(chip, flags); if (rc < 0) goto out_no_locality; } - if (chip->dev.parent) - pm_runtime_get_sync(chip->dev.parent); + rc = tpm_cmd_ready(chip, flags); + if (rc) + goto out; rc = tpm2_prepare_space(chip, space, ordinal, buf); if (rc) @@ -516,13 +545,16 @@ out_recv: } rc = tpm2_commit_space(chip, space, ordinal, buf, &len); + if (rc) + dev_err(&chip->dev, "tpm2_commit_space: error %d\n", rc); out: - if (chip->dev.parent) - pm_runtime_put_sync(chip->dev.parent); + rc = tpm_go_idle(chip, flags); + if (rc) + goto out; if (need_locality) - tpm_relinquish_locality(chip); + tpm_relinquish_locality(chip, flags); out_no_locality: if (chip->ops->clk_enable != NULL) diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index b83b30a3eea5e..4bb9b4aa9b49c 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -511,9 +511,17 @@ extern const struct file_operations tpm_fops; extern const struct file_operations tpmrm_fops; extern struct idr dev_nums_idr; +/** + * enum tpm_transmit_flags + * + * @TPM_TRANSMIT_UNLOCKED: used to lock sequence of tpm_transmit calls. + * @TPM_TRANSMIT_RAW: prevent recursive calls into setup steps + * (go idle, locality,..). Always use with UNLOCKED + * as it will fail on double locking. + */ enum tpm_transmit_flags { - TPM_TRANSMIT_UNLOCKED = BIT(0), - TPM_TRANSMIT_RAW = BIT(1), + TPM_TRANSMIT_UNLOCKED = BIT(0), + TPM_TRANSMIT_RAW = BIT(1), }; ssize_t tpm_transmit(struct tpm_chip *chip, struct tpm_space *space, diff --git a/drivers/char/tpm/tpm2-space.c b/drivers/char/tpm/tpm2-space.c index d26ea7513226c..dabb2ae4e779a 100644 --- a/drivers/char/tpm/tpm2-space.c +++ b/drivers/char/tpm/tpm2-space.c @@ -39,7 +39,8 @@ static void tpm2_flush_sessions(struct tpm_chip *chip, struct tpm_space *space) for (i = 0; i < ARRAY_SIZE(space->session_tbl); i++) { if (space->session_tbl[i]) tpm2_flush_context_cmd(chip, space->session_tbl[i], - TPM_TRANSMIT_UNLOCKED); + TPM_TRANSMIT_UNLOCKED | + TPM_TRANSMIT_RAW); } } @@ -84,7 +85,7 @@ static int tpm2_load_context(struct tpm_chip *chip, u8 *buf, tpm_buf_append(&tbuf, &buf[*offset], body_size); rc = tpm_transmit_cmd(chip, NULL, tbuf.data, PAGE_SIZE, 4, - TPM_TRANSMIT_UNLOCKED, NULL); + TPM_TRANSMIT_UNLOCKED | TPM_TRANSMIT_RAW, NULL); if (rc < 0) { dev_warn(&chip->dev, "%s: failed with a system error %d\n", __func__, rc); @@ -133,7 +134,7 @@ static int tpm2_save_context(struct tpm_chip *chip, u32 handle, u8 *buf, tpm_buf_append_u32(&tbuf, handle); rc = tpm_transmit_cmd(chip, NULL, tbuf.data, PAGE_SIZE, 0, - TPM_TRANSMIT_UNLOCKED, NULL); + TPM_TRANSMIT_UNLOCKED | TPM_TRANSMIT_RAW, NULL); if (rc < 0) { dev_warn(&chip->dev, "%s: failed with a system error %d\n", __func__, rc); @@ -170,7 +171,8 @@ static void tpm2_flush_space(struct tpm_chip *chip) for (i = 0; i < ARRAY_SIZE(space->context_tbl); i++) if (space->context_tbl[i] && ~space->context_tbl[i]) tpm2_flush_context_cmd(chip, space->context_tbl[i], - TPM_TRANSMIT_UNLOCKED); + TPM_TRANSMIT_UNLOCKED | + TPM_TRANSMIT_RAW); tpm2_flush_sessions(chip, space); } @@ -377,7 +379,8 @@ static int tpm2_map_response_header(struct tpm_chip *chip, u32 cc, u8 *rsp, return 0; out_no_slots: - tpm2_flush_context_cmd(chip, phandle, TPM_TRANSMIT_UNLOCKED); + tpm2_flush_context_cmd(chip, phandle, + TPM_TRANSMIT_UNLOCKED | TPM_TRANSMIT_RAW); dev_warn(&chip->dev, "%s: out of slots for 0x%08X\n", __func__, phandle); return -ENOMEM; @@ -465,7 +468,8 @@ static int tpm2_save_space(struct tpm_chip *chip) return rc; tpm2_flush_context_cmd(chip, space->context_tbl[i], - TPM_TRANSMIT_UNLOCKED); + TPM_TRANSMIT_UNLOCKED | + TPM_TRANSMIT_RAW); space->context_tbl[i] = ~0; } diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c index bb756ad7897e7..5c7ce5aaaf6fb 100644 --- a/drivers/char/tpm/tpm_crb.c +++ b/drivers/char/tpm/tpm_crb.c @@ -137,7 +137,7 @@ static bool crb_wait_for_reg_32(u32 __iomem *reg, u32 mask, u32 value, } /** - * crb_go_idle - request tpm crb device to go the idle state + * __crb_go_idle - request tpm crb device to go the idle state * * @dev: crb device * @priv: crb private data @@ -151,7 +151,7 @@ static bool crb_wait_for_reg_32(u32 __iomem *reg, u32 mask, u32 value, * * Return: 0 always */ -static int crb_go_idle(struct device *dev, struct crb_priv *priv) +static int __crb_go_idle(struct device *dev, struct crb_priv *priv) { if ((priv->flags & CRB_FL_ACPI_START) || (priv->flags & CRB_FL_CRB_SMC_START)) @@ -166,11 +166,20 @@ static int crb_go_idle(struct device *dev, struct crb_priv *priv) dev_warn(dev, "goIdle timed out\n"); return -ETIME; } + return 0; } +static int crb_go_idle(struct tpm_chip *chip) +{ + struct device *dev = &chip->dev; + struct crb_priv *priv = dev_get_drvdata(dev); + + return __crb_go_idle(dev, priv); +} + /** - * crb_cmd_ready - request tpm crb device to enter ready state + * __crb_cmd_ready - request tpm crb device to enter ready state * * @dev: crb device * @priv: crb private data @@ -183,7 +192,7 @@ static int crb_go_idle(struct device *dev, struct crb_priv *priv) * * Return: 0 on success -ETIME on timeout; */ -static int crb_cmd_ready(struct device *dev, struct crb_priv *priv) +static int __crb_cmd_ready(struct device *dev, struct crb_priv *priv) { if ((priv->flags & CRB_FL_ACPI_START) || (priv->flags & CRB_FL_CRB_SMC_START)) @@ -201,6 +210,14 @@ static int crb_cmd_ready(struct device *dev, struct crb_priv *priv) return 0; } +static int crb_cmd_ready(struct tpm_chip *chip) +{ + struct device *dev = &chip->dev; + struct crb_priv *priv = dev_get_drvdata(dev); + + return __crb_cmd_ready(dev, priv); +} + static int __crb_request_locality(struct device *dev, struct crb_priv *priv, int loc) { @@ -393,6 +410,8 @@ static const struct tpm_class_ops tpm_crb = { .send = crb_send, .cancel = crb_cancel, .req_canceled = crb_req_canceled, + .go_idle = crb_go_idle, + .cmd_ready = crb_cmd_ready, .request_locality = crb_request_locality, .relinquish_locality = crb_relinquish_locality, .req_complete_mask = CRB_DRV_STS_COMPLETE, @@ -508,7 +527,7 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv, * PTT HW bug w/a: wake up the device to access * possibly not retained registers. */ - ret = crb_cmd_ready(dev, priv); + ret = __crb_cmd_ready(dev, priv); if (ret) return ret; @@ -553,7 +572,7 @@ out: if (!ret) priv->cmd_size = cmd_size; - crb_go_idle(dev, priv); + __crb_go_idle(dev, priv); __crb_relinquish_locality(dev, priv, 0); @@ -624,32 +643,7 @@ static int crb_acpi_add(struct acpi_device *device) chip->acpi_dev_handle = device->handle; chip->flags = TPM_CHIP_FLAG_TPM2; - rc = __crb_request_locality(dev, priv, 0); - if (rc) - return rc; - - rc = crb_cmd_ready(dev, priv); - if (rc) - goto out; - - pm_runtime_get_noresume(dev); - pm_runtime_set_active(dev); - pm_runtime_enable(dev); - - rc = tpm_chip_register(chip); - if (rc) { - crb_go_idle(dev, priv); - pm_runtime_put_noidle(dev); - pm_runtime_disable(dev); - goto out; - } - - pm_runtime_put_sync(dev); - -out: - __crb_relinquish_locality(dev, priv, 0); - - return rc; + return tpm_chip_register(chip); } static int crb_acpi_remove(struct acpi_device *device) @@ -659,52 +653,11 @@ static int crb_acpi_remove(struct acpi_device *device) tpm_chip_unregister(chip); - pm_runtime_disable(dev); - return 0; } -static int __maybe_unused crb_pm_runtime_suspend(struct device *dev) -{ - struct tpm_chip *chip = dev_get_drvdata(dev); - struct crb_priv *priv = dev_get_drvdata(&chip->dev); - - return crb_go_idle(dev, priv); -} - -static int __maybe_unused crb_pm_runtime_resume(struct device *dev) -{ - struct tpm_chip *chip = dev_get_drvdata(dev); - struct crb_priv *priv = dev_get_drvdata(&chip->dev); - - return crb_cmd_ready(dev, priv); -} - -static int __maybe_unused crb_pm_suspend(struct device *dev) -{ - int ret; - - ret = tpm_pm_suspend(dev); - if (ret) - return ret; - - return crb_pm_runtime_suspend(dev); -} - -static int __maybe_unused crb_pm_resume(struct device *dev) -{ - int ret; - - ret = crb_pm_runtime_resume(dev); - if (ret) - return ret; - - return tpm_pm_resume(dev); -} - static const struct dev_pm_ops crb_pm = { - SET_SYSTEM_SLEEP_PM_OPS(crb_pm_suspend, crb_pm_resume) - SET_RUNTIME_PM_OPS(crb_pm_runtime_suspend, crb_pm_runtime_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(tpm_pm_suspend, tpm_pm_resume) }; static const struct acpi_device_id crb_device_ids[] = { diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 2a6c3d96b31f0..7f7b29f86c599 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -48,6 +48,8 @@ struct tpm_class_ops { u8 (*status) (struct tpm_chip *chip); bool (*update_timeouts)(struct tpm_chip *chip, unsigned long *timeout_cap); + int (*go_idle)(struct tpm_chip *chip); + int (*cmd_ready)(struct tpm_chip *chip); int (*request_locality)(struct tpm_chip *chip, int loc); int (*relinquish_locality)(struct tpm_chip *chip, int loc); void (*clk_enable)(struct tpm_chip *chip, bool value); -- GitLab From 0b2d28449e68ec90b811a36a3f5115995238978f Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Thu, 2 Aug 2018 11:50:16 +0300 Subject: [PATCH 0226/2269] ARC: [plat-axs*]: Enable SWAP commit c83532fb0fe053d2e43e9387354cb1b52ba26427 upstream. SWAP support on ARC was fixed earlier by commit 6e3761145a9b ("ARC: Fix CONFIG_SWAP") so now we may safely enable it on platforms that have external media like USB and SD-card. Note: it was already allowed for HSDK Signed-off-by: Alexey Brodkin Cc: stable@vger.kernel.org # 6e3761145a9b: ARC: Fix CONFIG_SWAP Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/configs/axs101_defconfig | 1 - arch/arc/configs/axs103_defconfig | 1 - arch/arc/configs/axs103_smp_defconfig | 1 - 3 files changed, 3 deletions(-) diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index a8242362e5519..ece78630d7112 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -1,5 +1,4 @@ CONFIG_DEFAULT_HOSTNAME="ARCLinux" -# CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y # CONFIG_CROSS_MEMORY_ATTACH is not set diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index ef3c31cd77378..240c9251a7d4a 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -1,5 +1,4 @@ CONFIG_DEFAULT_HOSTNAME="ARCLinux" -# CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y # CONFIG_CROSS_MEMORY_ATTACH is not set diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index 1757ac9cecbc1..af54b96abee04 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -1,5 +1,4 @@ CONFIG_DEFAULT_HOSTNAME="ARCLinux" -# CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y # CONFIG_CROSS_MEMORY_ATTACH is not set -- GitLab From 96e5b8cc3cfd367fd85d6f720eed456645dacade Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 2 Aug 2018 11:42:22 +0300 Subject: [PATCH 0227/2269] misc: mic: SCIF Fix scif_get_new_port() error handling [ Upstream commit a39284ae9d2ad09975c8ae33f1bd0f05fbfbf6ee ] There are only 2 callers of scif_get_new_port() and both appear to get the error handling wrong. Both treat zero returns as error, but it actually returns negative error codes and >= 0 on success. Fixes: e9089f43c9a7 ("misc: mic: SCIF open close bind and listen APIs") Signed-off-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mic/scif/scif_api.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/misc/mic/scif/scif_api.c b/drivers/misc/mic/scif/scif_api.c index ddc9e4b08b5cd..56efa9d18a9af 100644 --- a/drivers/misc/mic/scif/scif_api.c +++ b/drivers/misc/mic/scif/scif_api.c @@ -370,11 +370,10 @@ int scif_bind(scif_epd_t epd, u16 pn) goto scif_bind_exit; } } else { - pn = scif_get_new_port(); - if (!pn) { - ret = -ENOSPC; + ret = scif_get_new_port(); + if (ret < 0) goto scif_bind_exit; - } + pn = ret; } ep->state = SCIFEP_BOUND; @@ -648,13 +647,12 @@ int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block) err = -EISCONN; break; case SCIFEP_UNBOUND: - ep->port.port = scif_get_new_port(); - if (!ep->port.port) { - err = -ENOSPC; - } else { - ep->port.node = scif_info.nodeid; - ep->conn_async_state = ASYNC_CONN_IDLE; - } + err = scif_get_new_port(); + if (err < 0) + break; + ep->port.port = err; + ep->port.node = scif_info.nodeid; + ep->conn_async_state = ASYNC_CONN_IDLE; /* Fall through */ case SCIFEP_BOUND: /* -- GitLab From 5ffdd121d0c4de83a13e42be1e52203e7582dde1 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 4 Aug 2018 14:20:40 -0700 Subject: [PATCH 0228/2269] ethtool: Remove trailing semicolon for static inline [ Upstream commit d89d41556141a527030a15233135ba622ba3350d ] Android's header sanitization tool chokes on static inline functions having a trailing semicolon, leading to an incorrectly parsed header file. While the tool should obviously be fixed, also fix the header files for the two affected functions: ethtool_get_flow_spec_ring() and ethtool_get_flow_spec_ring_vf(). Fixes: 8cf6f497de40 ("ethtool: Add helper routines to pass vf to rx_flow_spec") Reporetd-by: Blair Prescott Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/ethtool.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index ac71559314e73..9eae13eefc49e 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -898,13 +898,13 @@ struct ethtool_rx_flow_spec { static inline __u64 ethtool_get_flow_spec_ring(__u64 ring_cookie) { return ETHTOOL_RX_FLOW_SPEC_RING & ring_cookie; -}; +} static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie) { return (ETHTOOL_RX_FLOW_SPEC_RING_VF & ring_cookie) >> ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF; -}; +} /** * struct ethtool_rxnfc - command to get or set RX flow classification rules -- GitLab From 0455f5d63b3e8ddeaaf20d026e4d2b15e3263e0a Mon Sep 17 00:00:00 2001 From: Jae Hyun Yoo Date: Tue, 24 Jul 2018 13:36:15 -0700 Subject: [PATCH 0229/2269] i2c: aspeed: Add an explicit type casting for *get_clk_reg_val [ Upstream commit 5799c4b2f1dbc0166d9b1d94443deaafc6e7a070 ] This commit fixes this sparse warning: drivers/i2c/busses/i2c-aspeed.c:875:38: warning: incorrect type in assignment (different modifiers) drivers/i2c/busses/i2c-aspeed.c:875:38: expected unsigned int ( *get_clk_reg_val )( ... ) drivers/i2c/busses/i2c-aspeed.c:875:38: got void const *const data Reported-by: Wolfram Sang Signed-off-by: Jae Hyun Yoo Reviewed-by: Brendan Higgins Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-aspeed.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c index 284f8670dbeb7..2feae9a421e6c 100644 --- a/drivers/i2c/busses/i2c-aspeed.c +++ b/drivers/i2c/busses/i2c-aspeed.c @@ -859,7 +859,7 @@ static int aspeed_i2c_probe_bus(struct platform_device *pdev) if (!match) bus->get_clk_reg_val = aspeed_i2c_24xx_get_clk_reg_val; else - bus->get_clk_reg_val = match->data; + bus->get_clk_reg_val = (u32 (*)(u32))match->data; /* Initialize the I2C adapter */ spin_lock_init(&bus->lock); -- GitLab From fb281ed2e4dc876f749bad95128678d9b724d933 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sat, 4 Aug 2018 23:40:26 +0300 Subject: [PATCH 0230/2269] Bluetooth: h5: Fix missing dependency on BT_HCIUART_SERDEV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6c3711ec64fd23a9abc8aaf59a9429569a6282df ] This driver was recently updated to use serdev, so add the appropriate dependency. Without this one can get compiler warnings like this if CONFIG_SERIAL_DEV_BUS is not enabled: CC [M] drivers/bluetooth/hci_h5.o drivers/bluetooth/hci_h5.c:934:36: warning: ‘h5_serdev_driver’ defined but not used [-Wunused-variable] static struct serdev_device_driver h5_serdev_driver = { ^~~~~~~~~~~~~~~~ Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index b33c8d6eb8c75..500d4d632e486 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -146,6 +146,7 @@ config BT_HCIUART_LL config BT_HCIUART_3WIRE bool "Three-wire UART (H5) protocol support" depends on BT_HCIUART + depends on BT_HCIUART_SERDEV help The HCI Three-wire UART Transport Layer makes it possible to user the Bluetooth HCI over a serial port interface. The HCI -- GitLab From 16aa222d2293a9af37e33b350042ab2997a30dac Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 2 Aug 2018 14:11:44 +0300 Subject: [PATCH 0231/2269] gpio: tegra: Move driver registration to subsys_init level [ Upstream commit 40b25bce0adbe641a744d1291bc0e51fb7f3c3d8 ] There is a bug in regards to deferred probing within the drivers core that causes GPIO-driver to suspend after its users. The bug appears if GPIO-driver probe is getting deferred, which happens after introducing dependency on PINCTRL-driver for the GPIO-driver by defining "gpio-ranges" property in device-tree. The bug in the drivers core is old (more than 4 years now) and is well known, unfortunately there is no easy fix for it. The good news is that we can workaround the deferred probe issue by changing GPIO / PINCTRL drivers registration order and hence by moving PINCTRL driver registration to the arch_init level and GPIO to the subsys_init. Signed-off-by: Dmitry Osipenko Acked-by: Stefan Agner Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c index fbaf974277dff..1eb857e2f62f1 100644 --- a/drivers/gpio/gpio-tegra.c +++ b/drivers/gpio/gpio-tegra.c @@ -728,4 +728,4 @@ static int __init tegra_gpio_init(void) { return platform_driver_register(&tegra_gpio_driver); } -postcore_initcall(tegra_gpio_init); +subsys_initcall(tegra_gpio_init); -- GitLab From 274977d99c195f662ebd48142ac65c38708a7928 Mon Sep 17 00:00:00 2001 From: Reza Arbab Date: Thu, 2 Aug 2018 23:03:36 -0500 Subject: [PATCH 0232/2269] powerpc/powernv: Fix concurrency issue with npu->mmio_atsd_usage [ Upstream commit 9eab9901b015f489199105c470de1ffc337cfabb ] We've encountered a performance issue when multiple processors stress {get,put}_mmio_atsd_reg(). These functions contend for mmio_atsd_usage, an unsigned long used as a bitmask. The accesses to mmio_atsd_usage are done using test_and_set_bit_lock() and clear_bit_unlock(). As implemented, both of these will require a (successful) stwcx to that same cache line. What we end up with is thread A, attempting to unlock, being slowed by other threads repeatedly attempting to lock. A's stwcx instructions fail and retry because the memory reservation is lost every time a different thread beats it to the punch. There may be a long-term way to fix this at a larger scale, but for now resolve the immediate problem by gating our call to test_and_set_bit_lock() with one to test_bit(), which is obviously implemented without using a store. Fixes: 1ab66d1fbada ("powerpc/powernv: Introduce address translation services for Nvlink2") Signed-off-by: Reza Arbab Acked-by: Alistair Popple Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/npu-dma.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 63f007f2de7eb..4b95bdde22aa7 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -427,8 +427,9 @@ static int get_mmio_atsd_reg(struct npu *npu) int i; for (i = 0; i < npu->mmio_atsd_count; i++) { - if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage)) - return i; + if (!test_bit(i, &npu->mmio_atsd_usage)) + if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage)) + return i; } return -ENOSPC; -- GitLab From 77e120a9c64afb52b3ca199d23ef45b4c840dccc Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Thu, 2 Aug 2018 15:47:10 -0700 Subject: [PATCH 0233/2269] selftests/bpf: fix a typo in map in map test [ Upstream commit 0069fb854364da79fd99236ea620affc8e1152d5 ] Commit fbeb1603bf4e ("bpf: verifier: MOV64 don't mark dst reg unbounded") revealed a typo in commit fb30d4b71214 ("bpf: Add tests for map-in-map"): BPF_MOV64_REG(BPF_REG_0, 0) was used instead of BPF_MOV64_IMM(BPF_REG_0, 0). I've noticed the problem by running bpf kselftests. Fixes: fb30d4b71214 ("bpf: Add tests for map-in-map") Signed-off-by: Roman Gushchin Cc: Martin KaFai Lau Cc: Arthur Fabre Cc: Daniel Borkmann Cc: Alexei Starovoitov Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/bpf/test_verifier.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 9167ee9763147..041dbbb30ff0a 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -5895,7 +5895,7 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, .fixup_map_in_map = { 3 }, @@ -5918,7 +5918,7 @@ static struct bpf_test tests[] = { BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, .fixup_map_in_map = { 3 }, @@ -5941,7 +5941,7 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, .fixup_map_in_map = { 3 }, -- GitLab From cbd5e67820a998cfb7f3d7a1b995d0bc2a1f00f7 Mon Sep 17 00:00:00 2001 From: Anton Vasilyev Date: Fri, 27 Jul 2018 07:52:20 -0400 Subject: [PATCH 0234/2269] media: davinci: vpif_display: Mix memory leak on probe error path [ Upstream commit 61e641f36ed81ae473177c085f0bfd83ad3b55ed ] If vpif_probe() fails on v4l2_device_register() then memory allocated at initialize_vpif() for global vpif_obj.dev[i] become unreleased. The patch adds deallocation of vpif_obj.dev[i] on the error path and removes duplicated check on platform_data presence. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Anton Vasilyev Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/davinci/vpif_display.c | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/media/platform/davinci/vpif_display.c b/drivers/media/platform/davinci/vpif_display.c index 56fe4e5b396e1..4a65861433d68 100644 --- a/drivers/media/platform/davinci/vpif_display.c +++ b/drivers/media/platform/davinci/vpif_display.c @@ -1114,6 +1114,14 @@ vpif_init_free_channel_objects: return err; } +static void free_vpif_objs(void) +{ + int i; + + for (i = 0; i < VPIF_DISPLAY_MAX_DEVICES; i++) + kfree(vpif_obj.dev[i]); +} + static int vpif_async_bound(struct v4l2_async_notifier *notifier, struct v4l2_subdev *subdev, struct v4l2_async_subdev *asd) @@ -1250,11 +1258,6 @@ static __init int vpif_probe(struct platform_device *pdev) return -EINVAL; } - if (!pdev->dev.platform_data) { - dev_warn(&pdev->dev, "Missing platform data. Giving up.\n"); - return -EINVAL; - } - vpif_dev = &pdev->dev; err = initialize_vpif(); @@ -1266,7 +1269,7 @@ static __init int vpif_probe(struct platform_device *pdev) err = v4l2_device_register(vpif_dev, &vpif_obj.v4l2_dev); if (err) { v4l2_err(vpif_dev->driver, "Error registering v4l2 device\n"); - return err; + goto vpif_free; } while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, res_idx))) { @@ -1309,7 +1312,10 @@ static __init int vpif_probe(struct platform_device *pdev) if (vpif_obj.sd[i]) vpif_obj.sd[i]->grp_id = 1 << i; } - vpif_probe_complete(); + err = vpif_probe_complete(); + if (err) { + goto probe_subdev_out; + } } else { vpif_obj.notifier.subdevs = vpif_obj.config->asd; vpif_obj.notifier.num_subdevs = vpif_obj.config->asd_sizes[0]; @@ -1330,6 +1336,8 @@ probe_subdev_out: kfree(vpif_obj.sd); vpif_unregister: v4l2_device_unregister(&vpif_obj.v4l2_dev); +vpif_free: + free_vpif_objs(); return err; } @@ -1351,8 +1359,8 @@ static int vpif_remove(struct platform_device *device) ch = vpif_obj.dev[i]; /* Unregister video device */ video_unregister_device(&ch->video_dev); - kfree(vpif_obj.dev[i]); } + free_vpif_objs(); return 0; } -- GitLab From 6b7c7186c210df8fd3f2bd6f074715f4ac07979b Mon Sep 17 00:00:00 2001 From: Anton Vasilyev Date: Mon, 23 Jul 2018 13:04:54 -0400 Subject: [PATCH 0235/2269] media: dw2102: Fix memleak on sequence of probes [ Upstream commit 299c7007e93645067e1d2743f4e50156de78c4ff ] Each call to dw2102_probe() allocates memory by kmemdup for structures p1100, s660, p7500 and s421, but there is no their deallocation. dvb_usb_device_init() copies the corresponding structure into dvb_usb_device->props, so there is no use of original structure after dvb_usb_device_init(). The patch moves structures from global scope to local and adds their deallocation. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Anton Vasilyev Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb/dw2102.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/media/usb/dvb-usb/dw2102.c b/drivers/media/usb/dvb-usb/dw2102.c index b421329b21fae..3d09e1c879217 100644 --- a/drivers/media/usb/dvb-usb/dw2102.c +++ b/drivers/media/usb/dvb-usb/dw2102.c @@ -2103,14 +2103,12 @@ static struct dvb_usb_device_properties s6x0_properties = { } }; -static struct dvb_usb_device_properties *p1100; static const struct dvb_usb_device_description d1100 = { "Prof 1100 USB ", {&dw2102_table[PROF_1100], NULL}, {NULL}, }; -static struct dvb_usb_device_properties *s660; static const struct dvb_usb_device_description d660 = { "TeVii S660 USB", {&dw2102_table[TEVII_S660], NULL}, @@ -2129,14 +2127,12 @@ static const struct dvb_usb_device_description d480_2 = { {NULL}, }; -static struct dvb_usb_device_properties *p7500; static const struct dvb_usb_device_description d7500 = { "Prof 7500 USB DVB-S2", {&dw2102_table[PROF_7500], NULL}, {NULL}, }; -static struct dvb_usb_device_properties *s421; static const struct dvb_usb_device_description d421 = { "TeVii S421 PCI", {&dw2102_table[TEVII_S421], NULL}, @@ -2336,6 +2332,11 @@ static int dw2102_probe(struct usb_interface *intf, const struct usb_device_id *id) { int retval = -ENOMEM; + struct dvb_usb_device_properties *p1100; + struct dvb_usb_device_properties *s660; + struct dvb_usb_device_properties *p7500; + struct dvb_usb_device_properties *s421; + p1100 = kmemdup(&s6x0_properties, sizeof(struct dvb_usb_device_properties), GFP_KERNEL); if (!p1100) @@ -2404,8 +2405,16 @@ static int dw2102_probe(struct usb_interface *intf, 0 == dvb_usb_device_init(intf, &t220_properties, THIS_MODULE, NULL, adapter_nr) || 0 == dvb_usb_device_init(intf, &tt_s2_4600_properties, - THIS_MODULE, NULL, adapter_nr)) + THIS_MODULE, NULL, adapter_nr)) { + + /* clean up copied properties */ + kfree(s421); + kfree(p7500); + kfree(s660); + kfree(p1100); + return 0; + } retval = -ENODEV; kfree(s421); -- GitLab From 47a6917f4b7220cbabe9d9e11219c589abd8902d Mon Sep 17 00:00:00 2001 From: Arun Parameswaran Date: Wed, 1 Aug 2018 17:53:47 -0700 Subject: [PATCH 0236/2269] net: phy: Fix the register offsets in Broadcom iProc mdio mux driver [ Upstream commit 77fefa93bfebe4df44f154f2aa5938e32630d0bf ] Modify the register offsets in the Broadcom iProc mdio mux to start from the top of the register address space. Earlier, the base address pointed to the end of the block's register space. The base address will now point to the start of the mdio's address space. The offsets have been fixed to match this. Signed-off-by: Arun Parameswaran Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/mdio-mux-bcm-iproc.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/mdio-mux-bcm-iproc.c b/drivers/net/phy/mdio-mux-bcm-iproc.c index 0c5b68e7da51a..9b31670548433 100644 --- a/drivers/net/phy/mdio-mux-bcm-iproc.c +++ b/drivers/net/phy/mdio-mux-bcm-iproc.c @@ -22,7 +22,7 @@ #include #include -#define MDIO_PARAM_OFFSET 0x00 +#define MDIO_PARAM_OFFSET 0x23c #define MDIO_PARAM_MIIM_CYCLE 29 #define MDIO_PARAM_INTERNAL_SEL 25 #define MDIO_PARAM_BUS_ID 22 @@ -30,20 +30,22 @@ #define MDIO_PARAM_PHY_ID 16 #define MDIO_PARAM_PHY_DATA 0 -#define MDIO_READ_OFFSET 0x04 +#define MDIO_READ_OFFSET 0x240 #define MDIO_READ_DATA_MASK 0xffff -#define MDIO_ADDR_OFFSET 0x08 +#define MDIO_ADDR_OFFSET 0x244 -#define MDIO_CTRL_OFFSET 0x0C +#define MDIO_CTRL_OFFSET 0x248 #define MDIO_CTRL_WRITE_OP 0x1 #define MDIO_CTRL_READ_OP 0x2 -#define MDIO_STAT_OFFSET 0x10 +#define MDIO_STAT_OFFSET 0x24c #define MDIO_STAT_DONE 1 #define BUS_MAX_ADDR 32 #define EXT_BUS_START_ADDR 16 +#define MDIO_REG_ADDR_SPACE_SIZE 0x250 + struct iproc_mdiomux_desc { void *mux_handle; void __iomem *base; @@ -169,6 +171,14 @@ static int mdio_mux_iproc_probe(struct platform_device *pdev) md->dev = &pdev->dev; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (res->start & 0xfff) { + /* For backward compatibility in case the + * base address is specified with an offset. + */ + dev_info(&pdev->dev, "fix base address in dt-blob\n"); + res->start &= ~0xfff; + res->end = res->start + MDIO_REG_ADDR_SPACE_SIZE - 1; + } md->base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(md->base)) { dev_err(&pdev->dev, "failed to ioremap register\n"); -- GitLab From 3ddbcd49bbb3e50145a9fdefb3bfd6ca6a12c353 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 2 Aug 2018 18:23:26 +0800 Subject: [PATCH 0237/2269] blk-mq: fix updating tags depth [ Upstream commit 75d6e175fc511e95ae3eb8f708680133bc211ed3 ] The passed 'nr' from userspace represents the total depth, meantime inside 'struct blk_mq_tags', 'nr_tags' stores the total tag depth, and 'nr_reserved_tags' stores the reserved part. There are two issues in blk_mq_tag_update_depth() now: 1) for growing tags, we should have used the passed 'nr', and keep the number of reserved tags not changed. 2) the passed 'nr' should have been used for checking against 'tags->nr_tags', instead of number of the normal part. This patch fixes the above two cases, and avoids kernel crash caused by wrong resizing sbitmap queue. Cc: "Ewan D. Milne" Cc: Christoph Hellwig Cc: Bart Van Assche Cc: Omar Sandoval Tested by: Marco Patalano Signed-off-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- block/blk-mq-tag.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 6714507aa6c75..3d2ab65d2dd15 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -416,8 +416,6 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, if (tdepth <= tags->nr_reserved_tags) return -EINVAL; - tdepth -= tags->nr_reserved_tags; - /* * If we are allowed to grow beyond the original size, allocate * a new set of tags before freeing the old one. @@ -437,7 +435,8 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, if (tdepth > 16 * BLKDEV_MAX_RQ) return -EINVAL; - new = blk_mq_alloc_rq_map(set, hctx->queue_num, tdepth, 0); + new = blk_mq_alloc_rq_map(set, hctx->queue_num, tdepth, + tags->nr_reserved_tags); if (!new) return -ENOMEM; ret = blk_mq_alloc_rqs(set, new, hctx->queue_num, tdepth); @@ -454,7 +453,8 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, * Don't need (or can't) update reserved tags here, they * remain static and should never need resizing. */ - sbitmap_queue_resize(&tags->bitmap_tags, tdepth); + sbitmap_queue_resize(&tags->bitmap_tags, + tdepth - tags->nr_reserved_tags); } return 0; -- GitLab From 1f6324f4ea5b496fcb4d20350956b808ad43b977 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 2 Aug 2018 12:12:20 -0500 Subject: [PATCH 0238/2269] scsi: target: fix __transport_register_session locking [ Upstream commit 6a64f6e1591322beb8ce16e952a53582caf2a15c ] When __transport_register_session is called from transport_register_session irqs will already have been disabled, so we do not want the unlock irq call to enable them until the higher level has done the final spin_unlock_irqrestore/ spin_unlock_irq. This has __transport_register_session use the save/restore call. Signed-off-by: Mike Christie Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_transport.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index e6d51135d1055..0d0be7d8b9d64 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -317,6 +317,7 @@ void __transport_register_session( { const struct target_core_fabric_ops *tfo = se_tpg->se_tpg_tfo; unsigned char buf[PR_REG_ISID_LEN]; + unsigned long flags; se_sess->se_tpg = se_tpg; se_sess->fabric_sess_ptr = fabric_sess_ptr; @@ -353,7 +354,7 @@ void __transport_register_session( se_sess->sess_bin_isid = get_unaligned_be64(&buf[0]); } - spin_lock_irq(&se_nacl->nacl_sess_lock); + spin_lock_irqsave(&se_nacl->nacl_sess_lock, flags); /* * The se_nacl->nacl_sess pointer will be set to the * last active I_T Nexus for each struct se_node_acl. @@ -362,7 +363,7 @@ void __transport_register_session( list_add_tail(&se_sess->sess_acl_list, &se_nacl->acl_sess_list); - spin_unlock_irq(&se_nacl->nacl_sess_lock); + spin_unlock_irqrestore(&se_nacl->nacl_sess_lock, flags); } list_add_tail(&se_sess->sess_list, &se_tpg->tpg_sess_list); -- GitLab From d1060bfcdc72353a48530674d344466eea7a0946 Mon Sep 17 00:00:00 2001 From: BingJing Chang Date: Wed, 1 Aug 2018 17:08:36 +0800 Subject: [PATCH 0239/2269] md/raid5: fix data corruption of replacements after originals dropped [ Upstream commit d63e2fc804c46e50eee825c5d3a7228e07048b47 ] During raid5 replacement, the stripes can be marked with R5_NeedReplace flag. Data can be read from being-replaced devices and written to replacing spares without reading all other devices. (It's 'replace' mode. s.replacing = 1) If a being-replaced device is dropped, the replacement progress will be interrupted and resumed with pure recovery mode. However, existing stripes before being interrupted cannot read from the dropped device anymore. It prints lots of WARN_ON messages. And it results in data corruption because existing stripes write problematic data into its replacement device and update the progress. \# Erase disks (1MB + 2GB) dd if=/dev/zero of=/dev/sda bs=1MB count=2049 dd if=/dev/zero of=/dev/sdb bs=1MB count=2049 dd if=/dev/zero of=/dev/sdc bs=1MB count=2049 dd if=/dev/zero of=/dev/sdd bs=1MB count=2049 mdadm -C /dev/md0 -amd -R -l5 -n3 -x0 /dev/sd[abc] -z 2097152 \# Ensure array stores non-zero data dd if=/root/data_4GB.iso of=/dev/md0 bs=1MB \# Start replacement mdadm /dev/md0 -a /dev/sdd mdadm /dev/md0 --replace /dev/sda Then, Hot-plug out /dev/sda during recovery, and wait for recovery done. echo check > /sys/block/md0/md/sync_action cat /sys/block/md0/md/mismatch_cnt # it will be greater than 0. Soon after you hot-plug out /dev/sda, you will see many WARN_ON messages. The replacement recovery will be interrupted shortly. After the recovery finishes, it will result in data corruption. Actually, it's just an unhandled case of replacement. In commit (md/raid5: fix interaction of 'replace' and 'recovery'.), if a NeedReplace device is not UPTODATE then that is an error, the commit just simply print WARN_ON but also mark these corrupted stripes with R5_WantReplace. (it means it's ready for writes.) To fix this case, we can leverage 'sync and replace' mode mentioned in commit <9a3e1101b827> (md/raid5: detect and handle replacements during recovery.). We can add logics to detect and use 'sync and replace' mode for these stripes. Reported-by: Alex Chen Reviewed-by: Alex Wu Reviewed-by: Chung-Chiang Cheng Signed-off-by: BingJing Chang Signed-off-by: Shaohua Li Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 07ca2fd10189f..5018fb2352c27 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4516,6 +4516,12 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) s->failed++; if (rdev && !test_bit(Faulty, &rdev->flags)) do_recovery = 1; + else if (!rdev) { + rdev = rcu_dereference( + conf->disks[i].replacement); + if (rdev && !test_bit(Faulty, &rdev->flags)) + do_recovery = 1; + } } if (test_bit(R5_InJournal, &dev->flags)) -- GitLab From cb71229f64835c6c570c7697884054e589630bfd Mon Sep 17 00:00:00 2001 From: Gaurav Kohli Date: Thu, 2 Aug 2018 14:21:03 +0530 Subject: [PATCH 0240/2269] timers: Clear timer_base::must_forward_clk with timer_base::lock held [ Upstream commit 363e934d8811d799c88faffc5bfca782fd728334 ] timer_base::must_forward_clock is indicating that the base clock might be stale due to a long idle sleep. The forwarding of the base clock takes place in the timer softirq or when a timer is enqueued to a base which is idle. If the enqueue of timer to an idle base happens from a remote CPU, then the following race can happen: CPU0 CPU1 run_timer_softirq mod_timer base = lock_timer_base(timer); base->must_forward_clk = false if (base->must_forward_clk) forward(base); -> skipped enqueue_timer(base, timer, idx); -> idx is calculated high due to stale base unlock_timer_base(timer); base = lock_timer_base(timer); forward(base); The root cause is that timer_base::must_forward_clk is cleared outside the timer_base::lock held region, so the remote queuing CPU observes it as cleared, but the base clock is still stale. This can cause large granularity values for timers, i.e. the accuracy of the expiry time suffers. Prevent this by clearing the flag with timer_base::lock held, so that the forwarding takes place before the cleared flag is observable by a remote CPU. Signed-off-by: Gaurav Kohli Signed-off-by: Thomas Gleixner Cc: john.stultz@linaro.org Cc: sboyd@kernel.org Cc: linux-arm-msm@vger.kernel.org Link: https://lkml.kernel.org/r/1533199863-22748-1-git-send-email-gkohli@codeaurora.org Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/time/timer.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 9fe525f410bf9..f17c76a1a05f5 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1609,6 +1609,22 @@ static inline void __run_timers(struct timer_base *base) raw_spin_lock_irq(&base->lock); + /* + * timer_base::must_forward_clk must be cleared before running + * timers so that any timer functions that call mod_timer() will + * not try to forward the base. Idle tracking / clock forwarding + * logic is only used with BASE_STD timers. + * + * The must_forward_clk flag is cleared unconditionally also for + * the deferrable base. The deferrable base is not affected by idle + * tracking and never forwarded, so clearing the flag is a NOOP. + * + * The fact that the deferrable base is never forwarded can cause + * large variations in granularity for deferrable timers, but they + * can be deferred for long periods due to idle anyway. + */ + base->must_forward_clk = false; + while (time_after_eq(jiffies, base->clk)) { levels = collect_expired_timers(base, heads); @@ -1628,19 +1644,6 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h) { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); - /* - * must_forward_clk must be cleared before running timers so that any - * timer functions that call mod_timer will not try to forward the - * base. idle trcking / clock forwarding logic is only used with - * BASE_STD timers. - * - * The deferrable base does not do idle tracking at all, so we do - * not forward it. This can result in very large variations in - * granularity for deferrable timers, but they can be deferred for - * long periods due to idle. - */ - base->must_forward_clk = false; - __run_timers(base); if (IS_ENABLED(CONFIG_NO_HZ_COMMON)) __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); -- GitLab From 3b9909896570390b8f3c35e7261fcfa0ce5abe54 Mon Sep 17 00:00:00 2001 From: Todor Tomov Date: Wed, 25 Jul 2018 12:38:20 -0400 Subject: [PATCH 0241/2269] media: camss: csid: Configure data type and decode format properly [ Upstream commit c628e78899ff8006b5f9d8206da54ed3bb994342 ] The CSID decodes the input data stream. When the input comes from the Test Generator the format of the stream is set on the source media pad. When the input comes from the CSIPHY the format is the one on the sink media pad. Use the proper format for each case. Signed-off-by: Todor Tomov Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../media/platform/qcom/camss-8x16/camss-csid.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/media/platform/qcom/camss-8x16/camss-csid.c b/drivers/media/platform/qcom/camss-8x16/camss-csid.c index 64df82817de3c..4882ee25bd754 100644 --- a/drivers/media/platform/qcom/camss-8x16/camss-csid.c +++ b/drivers/media/platform/qcom/camss-8x16/camss-csid.c @@ -392,9 +392,6 @@ static int csid_set_stream(struct v4l2_subdev *sd, int enable) !media_entity_remote_pad(&csid->pads[MSM_CSID_PAD_SINK])) return -ENOLINK; - dt = csid_get_fmt_entry(csid->fmt[MSM_CSID_PAD_SRC].code)-> - data_type; - if (tg->enabled) { /* Config Test Generator */ struct v4l2_mbus_framefmt *f = @@ -416,6 +413,9 @@ static int csid_set_stream(struct v4l2_subdev *sd, int enable) writel_relaxed(val, csid->base + CAMSS_CSID_TG_DT_n_CGG_0(0)); + dt = csid_get_fmt_entry( + csid->fmt[MSM_CSID_PAD_SRC].code)->data_type; + /* 5:0 data type */ val = dt; writel_relaxed(val, csid->base + @@ -425,6 +425,9 @@ static int csid_set_stream(struct v4l2_subdev *sd, int enable) val = tg->payload_mode; writel_relaxed(val, csid->base + CAMSS_CSID_TG_DT_n_CGG_2(0)); + + df = csid_get_fmt_entry( + csid->fmt[MSM_CSID_PAD_SRC].code)->decode_format; } else { struct csid_phy_config *phy = &csid->phy; @@ -439,13 +442,16 @@ static int csid_set_stream(struct v4l2_subdev *sd, int enable) writel_relaxed(val, csid->base + CAMSS_CSID_CORE_CTRL_1); + + dt = csid_get_fmt_entry( + csid->fmt[MSM_CSID_PAD_SINK].code)->data_type; + df = csid_get_fmt_entry( + csid->fmt[MSM_CSID_PAD_SINK].code)->decode_format; } /* Config LUT */ dt_shift = (cid % 4) * 8; - df = csid_get_fmt_entry(csid->fmt[MSM_CSID_PAD_SINK].code)-> - decode_format; val = readl_relaxed(csid->base + CAMSS_CSID_CID_LUT_VC_n(vc)); val &= ~(0xff << dt_shift); -- GitLab From 8e6ee30ad80f8a7acb371a0ce3599c435dcc9a15 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Thu, 21 Jun 2018 21:13:38 +0200 Subject: [PATCH 0242/2269] gpu: ipu-v3: default to id 0 on missing OF alias [ Upstream commit 2d87e6c1b99c402360fdfe19ce4f579ab2f96adf ] This is better than storing -ENODEV in the id number. This fixes SoCs with only one IPU that don't specify an IPU alias in the device tree. Signed-off-by: Philipp Zabel Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/ipu-v3/ipu-common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 658fa2d3e40c2..2c8411b8d050d 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -1401,6 +1401,8 @@ static int ipu_probe(struct platform_device *pdev) return -ENODEV; ipu->id = of_alias_get_id(np, "ipu"); + if (ipu->id < 0) + ipu->id = 0; if (of_device_is_compatible(np, "fsl,imx6qp-ipu") && IS_ENABLED(CONFIG_DRM)) { -- GitLab From 2458e91302aab6ee8333886b4bff820d92033a5e Mon Sep 17 00:00:00 2001 From: Anton Vasilyev Date: Fri, 27 Jul 2018 18:45:36 +0300 Subject: [PATCH 0243/2269] misc: ti-st: Fix memory leak in the error path of probe() [ Upstream commit 81ae962d7f180c0092859440c82996cccb254976 ] Free resources instead of direct return of the error code if kim_probe fails. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Anton Vasilyev Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ti-st/st_kim.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c index b77aacafc3fcd..dda3ed72d05bc 100644 --- a/drivers/misc/ti-st/st_kim.c +++ b/drivers/misc/ti-st/st_kim.c @@ -756,14 +756,14 @@ static int kim_probe(struct platform_device *pdev) err = gpio_request(kim_gdata->nshutdown, "kim"); if (unlikely(err)) { pr_err(" gpio %d request failed ", kim_gdata->nshutdown); - return err; + goto err_sysfs_group; } /* Configure nShutdown GPIO as output=0 */ err = gpio_direction_output(kim_gdata->nshutdown, 0); if (unlikely(err)) { pr_err(" unable to configure gpio %d", kim_gdata->nshutdown); - return err; + goto err_sysfs_group; } /* get reference of pdev for request_firmware */ -- GitLab From 16c6e01a4a98f3b212990746145e5021058c166b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 2 Aug 2018 11:24:47 +0300 Subject: [PATCH 0244/2269] uio: potential double frees if __uio_register_device() fails [ Upstream commit f019f07ecf6a6b8bd6d7853bce70925d90af02d1 ] The uio_unregister_device() function assumes that if "info->uio_dev" is non-NULL that means "info" is fully allocated. Setting info->uio_de has to be the last thing in the function. In the current code, if request_threaded_irq() fails then we return with info->uio_dev set to non-NULL but info is not fully allocated and it can lead to double frees. Fixes: beafc54c4e2f ("UIO: Add the User IO core code") Signed-off-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index ff04b7f8549f0..41784798c789b 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -841,8 +841,6 @@ int __uio_register_device(struct module *owner, if (ret) goto err_uio_dev_add_attributes; - info->uio_dev = idev; - if (info->irq && (info->irq != UIO_IRQ_CUSTOM)) { /* * Note that we deliberately don't use devm_request_irq @@ -858,6 +856,7 @@ int __uio_register_device(struct module *owner, goto err_request_irq; } + info->uio_dev = idev; return 0; err_request_irq: -- GitLab From eec63d96d0e40c8a2aac9e0b1e777e089983d692 Mon Sep 17 00:00:00 2001 From: Anton Vasilyev Date: Tue, 24 Jul 2018 18:10:38 +0300 Subject: [PATCH 0245/2269] firmware: vpd: Fix section enabled flag on vpd_section_destroy [ Upstream commit 45ca3f76de0507ecf143f770570af2942f263812 ] static struct ro_vpd and rw_vpd are initialized by vpd_sections_init() in vpd_probe() based on header's ro and rw sizes. In vpd_remove() vpd_section_destroy() performs deinitialization based on enabled flag, which is set to true by vpd_sections_init(). This leads to call of vpd_section_destroy() on already destroyed section for probe-release-probe-release sequence if first probe performs ro_vpd initialization and second probe does not initialize it. The patch adds changing enabled flag on vpd_section_destroy and adds cleanup on the error path of vpd_sections_init. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Anton Vasilyev Reviewed-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/google/vpd.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/google/vpd.c b/drivers/firmware/google/vpd.c index e4b40f2b46274..9c0f7cf920afe 100644 --- a/drivers/firmware/google/vpd.c +++ b/drivers/firmware/google/vpd.c @@ -246,6 +246,7 @@ static int vpd_section_destroy(struct vpd_section *sec) sysfs_remove_bin_file(vpd_kobj, &sec->bin_attr); kfree(sec->raw_name); memunmap(sec->baseaddr); + sec->enabled = false; } return 0; @@ -279,8 +280,10 @@ static int vpd_sections_init(phys_addr_t physaddr) ret = vpd_section_init("rw", &rw_vpd, physaddr + sizeof(struct vpd_cbmem) + header.ro_size, header.rw_size); - if (ret) + if (ret) { + vpd_section_destroy(&ro_vpd); return ret; + } } return 0; -- GitLab From 4bb1d3ec658c9a1f071bab691c6e5778860efb6a Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Thu, 2 Aug 2018 03:08:25 +0000 Subject: [PATCH 0246/2269] Drivers: hv: vmbus: Cleanup synic memory free path [ Upstream commit 572086325ce9a9e348b8748e830653f3959e88b6 ] clk_evt memory is not being freed when the synic is shutdown or when there is an allocation error. Add the appropriate kfree() call, along with a comment to clarify how the memory gets freed after an allocation error. Make the free path consistent by removing checks for NULL since kfree() and free_page() already do the check. Signed-off-by: Michael Kelley Reported-by: Dan Carpenter Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 8267439dd1eec..d8101cd28dfa3 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -196,6 +196,10 @@ int hv_synic_alloc(void) return 0; err: + /* + * Any memory allocations that succeeded will be freed when + * the caller cleans up by calling hv_synic_free() + */ return -ENOMEM; } @@ -208,12 +212,10 @@ void hv_synic_free(void) struct hv_per_cpu_context *hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); - if (hv_cpu->synic_event_page) - free_page((unsigned long)hv_cpu->synic_event_page); - if (hv_cpu->synic_message_page) - free_page((unsigned long)hv_cpu->synic_message_page); - if (hv_cpu->post_msg_page) - free_page((unsigned long)hv_cpu->post_msg_page); + kfree(hv_cpu->clk_evt); + free_page((unsigned long)hv_cpu->synic_event_page); + free_page((unsigned long)hv_cpu->synic_message_page); + free_page((unsigned long)hv_cpu->post_msg_page); } kfree(hv_context.hv_numa_map); -- GitLab From ca21de151016d69c3415d95731761c9dd0bf1e00 Mon Sep 17 00:00:00 2001 From: Anton Vasilyev Date: Fri, 27 Jul 2018 16:39:31 +0300 Subject: [PATCH 0247/2269] tty: rocket: Fix possible buffer overwrite on register_PCI [ Upstream commit 0419056ec8fd01ddf5460d2dba0491aad22657dd ] If number of isa and pci boards exceed NUM_BOARDS on the path rp_init()->init_PCI()->register_PCI() then buffer overwrite occurs in register_PCI() on assign rcktpt_io_addr[i]. The patch adds check on upper bound for index of registered board in register_PCI. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Anton Vasilyev Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/tty/rocket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/rocket.c b/drivers/tty/rocket.c index 20d79a6007d50..070733ca94d5e 100644 --- a/drivers/tty/rocket.c +++ b/drivers/tty/rocket.c @@ -1894,7 +1894,7 @@ static __init int register_PCI(int i, struct pci_dev *dev) ByteIO_t UPCIRingInd = 0; if (!dev || !pci_match_id(rocket_pci_ids, dev) || - pci_enable_device(dev)) + pci_enable_device(dev) || i >= NUM_BOARDS) return 0; rcktpt_io_addr[i] = pci_resource_start(dev, 0); -- GitLab From d85e49570227411fd47a915bfefd4b58d64417b4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 27 Jul 2018 18:15:14 +0800 Subject: [PATCH 0248/2269] f2fs: fix to active page in lru list for read path [ Upstream commit 82cf4f132e6d16dca6fc3bd955019246141bc645 ] If config CONFIG_F2FS_FAULT_INJECTION is on, for both read or write path we will call find_lock_page() to get the page, but for read path, it missed to passing FGP_ACCESSED to allocator to active the page in LRU list, result in being reclaimed in advance incorrectly, fix it. Reported-by: Xianrong Zhou Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/f2fs.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3b34004a71c15..54f8520ad7a2f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1766,8 +1766,13 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping, pgoff_t index, bool for_write) { #ifdef CONFIG_F2FS_FAULT_INJECTION - struct page *page = find_lock_page(mapping, index); + struct page *page; + if (!for_write) + page = find_get_page_flags(mapping, index, + FGP_LOCK | FGP_ACCESSED); + else + page = find_lock_page(mapping, index); if (page) return page; -- GitLab From e04910746abe90f9fc8e9d8991a611649c536640 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Thu, 12 Jul 2018 23:09:26 +0800 Subject: [PATCH 0249/2269] f2fs: do not set free of current section [ Upstream commit 3611ce9911267cb93d364bd71ddea6821278d11f ] For the case when sbi->segs_per_sec > 1, take section:segment = 5 for example, if segment 1 is just used and allocate new segment 2, and the blocks of segment 1 is invalidated, at this time, the previous code will use __set_test_and_free to free the free_secmap and free_sections++, this is not correct since it is still a current section, so fix it. Signed-off-by: Yunlong Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/segment.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 39ada30889b62..4dfb5080098ff 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -414,6 +414,8 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, if (test_and_clear_bit(segno, free_i->free_segmap)) { free_i->free_segments++; + if (IS_CURSEC(sbi, secno)) + goto skip_free; next = find_next_bit(free_i->free_segmap, start_segno + sbi->segs_per_sec, start_segno); if (next >= start_segno + sbi->segs_per_sec) { @@ -421,6 +423,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, free_i->free_sections++; } } +skip_free: spin_unlock(&free_i->segmap_lock); } -- GitLab From e296ac45b5687817e3ab39a421e4ee71507e5103 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 6 Jul 2018 20:50:57 -0700 Subject: [PATCH 0250/2269] f2fs: fix defined but not used build warnings [ Upstream commit cb15d1e43db0a6341c1e26ac6a2c74e61b74f1aa ] Fix build warnings in f2fs when CONFIG_PROC_FS is not enabled by marking the unused functions as __maybe_unused. ../fs/f2fs/sysfs.c:519:12: warning: 'segment_info_seq_show' defined but not used [-Wunused-function] ../fs/f2fs/sysfs.c:546:12: warning: 'segment_bits_seq_show' defined but not used [-Wunused-function] ../fs/f2fs/sysfs.c:570:12: warning: 'iostat_info_seq_show' defined but not used [-Wunused-function] Signed-off-by: Randy Dunlap Cc: Jaegeuk Kim Cc: Chao Yu Cc: linux-f2fs-devel@lists.sourceforge.net Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/sysfs.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index e2c258f717cd1..93af9d7dfcdca 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -9,6 +9,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include #include #include #include @@ -381,7 +382,8 @@ static struct kobject f2fs_feat = { .kset = &f2fs_kset, }; -static int segment_info_seq_show(struct seq_file *seq, void *offset) +static int __maybe_unused segment_info_seq_show(struct seq_file *seq, + void *offset) { struct super_block *sb = seq->private; struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -408,7 +410,8 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset) return 0; } -static int segment_bits_seq_show(struct seq_file *seq, void *offset) +static int __maybe_unused segment_bits_seq_show(struct seq_file *seq, + void *offset) { struct super_block *sb = seq->private; struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -432,7 +435,8 @@ static int segment_bits_seq_show(struct seq_file *seq, void *offset) return 0; } -static int iostat_info_seq_show(struct seq_file *seq, void *offset) +static int __maybe_unused iostat_info_seq_show(struct seq_file *seq, + void *offset) { struct super_block *sb = seq->private; struct f2fs_sb_info *sbi = F2FS_SB(sb); -- GitLab From c39273ce0d85f076a98cc79d151ef83bc75f4049 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 22 Sep 2017 13:20:43 +0200 Subject: [PATCH 0251/2269] perf tools: Allow overriding MAX_NR_CPUS at compile time [ Upstream commit 21b8732eb4479b579bda9ee38e62b2c312c2a0e5 ] After update of kernel, the perf tool doesn't run anymore on my 32MB RAM powerpc board, but still runs on a 128MB RAM board: ~# strace perf execve("/usr/sbin/perf", ["perf"], [/* 12 vars */]) = -1 ENOMEM (Cannot allocate memory) --- SIGSEGV {si_signo=SIGSEGV, si_code=SI_KERNEL, si_addr=0} --- +++ killed by SIGSEGV +++ Segmentation fault objdump -x shows that .bss section has a huge size of 24Mbytes: 27 .bss 016baca8 101cebb8 101cebb8 001cd988 2**3 With especially the following objects having quite big size: 10205f80 l O .bss 00140000 runtime_cycles_stats 10345f80 l O .bss 00140000 runtime_stalled_cycles_front_stats 10485f80 l O .bss 00140000 runtime_stalled_cycles_back_stats 105c5f80 l O .bss 00140000 runtime_branches_stats 10705f80 l O .bss 00140000 runtime_cacherefs_stats 10845f80 l O .bss 00140000 runtime_l1_dcache_stats 10985f80 l O .bss 00140000 runtime_l1_icache_stats 10ac5f80 l O .bss 00140000 runtime_ll_cache_stats 10c05f80 l O .bss 00140000 runtime_itlb_cache_stats 10d45f80 l O .bss 00140000 runtime_dtlb_cache_stats 10e85f80 l O .bss 00140000 runtime_cycles_in_tx_stats 10fc5f80 l O .bss 00140000 runtime_transaction_stats 11105f80 l O .bss 00140000 runtime_elision_stats 11245f80 l O .bss 00140000 runtime_topdown_total_slots 11385f80 l O .bss 00140000 runtime_topdown_slots_retired 114c5f80 l O .bss 00140000 runtime_topdown_slots_issued 11605f80 l O .bss 00140000 runtime_topdown_fetch_bubbles 11745f80 l O .bss 00140000 runtime_topdown_recovery_bubbles This is due to commit 4d255766d28b1 ("perf: Bump max number of cpus to 1024"), because many tables are sized with MAX_NR_CPUS This patch gives the opportunity to redefine MAX_NR_CPUS via $ make EXTRA_CFLAGS=-DMAX_NR_CPUS=1 Signed-off-by: Christophe Leroy Cc: Alexander Shishkin Cc: Peter Zijlstra Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20170922112043.8349468C57@po15668-vm-win7.idsi0.si.c-s.fr Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/perf.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 55086389fc06d..96f62dd7e3ed3 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -24,7 +24,9 @@ static inline unsigned long long rdclock(void) return ts.tv_sec * 1000000000ULL + ts.tv_nsec; } +#ifndef MAX_NR_CPUS #define MAX_NR_CPUS 1024 +#endif extern const char *input_name; extern bool perf_host, perf_guest; -- GitLab From 521aedea92cddf019ce5d0a26e90363209d6f023 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 26 Jul 2018 16:04:47 -0400 Subject: [PATCH 0252/2269] NFSv4.0 fix client reference leak in callback [ Upstream commit 32cd3ee511f4e07ca25d71163b50e704808d22f4 ] If there is an error during processing of a callback message, it leads to refrence leak on the client structure and eventually an unclean superblock. Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfs/callback_xdr.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 123c069429a7b..57de914630bc9 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -904,16 +904,21 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp) if (hdr_arg.minorversion == 0) { cps.clp = nfs4_find_client_ident(SVC_NET(rqstp), hdr_arg.cb_ident); - if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) + if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) { + if (cps.clp) + nfs_put_client(cps.clp); goto out_invalidcred; + } } cps.minorversion = hdr_arg.minorversion; hdr_res.taglen = hdr_arg.taglen; hdr_res.tag = hdr_arg.tag; - if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0) + if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0) { + if (cps.clp) + nfs_put_client(cps.clp); return rpc_system_err; - + } while (status == 0 && nops != hdr_arg.nops) { status = process_op(nops, rqstp, &xdr_in, rqstp->rq_argp, &xdr_out, rqstp->rq_resp, -- GitLab From cc33476b67b8d08268948bf16b17d455ded3c6ee Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 24 Jul 2018 08:20:08 +0200 Subject: [PATCH 0253/2269] perf c2c report: Fix crash for empty browser [ Upstream commit 73978332572ccf5e364c31e9a70ba953f8202b46 ] 'perf c2c' scans read/write accesses and tries to find false sharing cases, so when the events it wants were not asked for or ended up not taking place, we get no histograms. So do not try to display entry details if there's not any. Currently this ends up in crash: $ perf c2c report # then press 'd' perf: Segmentation fault $ Committer testing: Before: Record a perf.data file without events of interest to 'perf c2c report', then call it and press 'd': # perf record sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data (6 samples) ] # perf c2c report perf: Segmentation fault -------- backtrace -------- perf[0x5b1d2a] /lib64/libc.so.6(+0x346df)[0x7fcb566e36df] perf[0x46fcae] perf[0x4a9f1e] perf[0x4aa220] perf(main+0x301)[0x42c561] /lib64/libc.so.6(__libc_start_main+0xe9)[0x7fcb566cff29] perf(_start+0x29)[0x42c999] # After the patch the segfault doesn't take place, a follow up patch to tell the user why nothing changes when 'd' is pressed would be good. Reported-by: rodia@autistici.org Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: f1c5fd4d0bb9 ("perf c2c report: Add TUI cacheline browser") Link: http://lkml.kernel.org/r/20180724062008.26126-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/builtin-c2c.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 3479a1bc7caa1..fb76423022e86 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2229,6 +2229,9 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he) " s Togle full lenght of symbol and source line columns \n" " q Return back to cacheline list \n"; + if (!he) + return 0; + /* Display compact version first. */ c2c.symbol_full = false; -- GitLab From 968f03158db561485f37334dfc342330ec28497b Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 9 Jul 2018 07:15:22 -0700 Subject: [PATCH 0254/2269] perf evlist: Fix error out while applying initial delay and LBR [ Upstream commit 95035c5e167ae6e740b1ddd30210ae0eaf39a5db ] 'perf record' will error out if both --delay and LBR are applied. For example: # perf record -D 1000 -a -e cycles -j any -- sleep 2 Error: dummy:HG: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat' # A dummy event is added implicitly for initial delay, which has the same configurations as real sampling events. The dummy event is a software event. If LBR is configured, perf must error out. The dummy event will only be used to track PERF_RECORD_MMAP while perf waits for the initial delay to enable the real events. The BRANCH_STACK bit can be safely cleared for the dummy event. After applying the patch: # perf record -D 1000 -a -e cycles -j any -- sleep 2 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 1.054 MB perf.data (828 samples) ] # Reported-by: Sunil K Pandey Signed-off-by: Kan Liang Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1531145722-16404-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/evsel.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 226a9245d1db7..2227ee92d8e21 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -824,6 +824,12 @@ static void apply_config_terms(struct perf_evsel *evsel, } } +static bool is_dummy_event(struct perf_evsel *evsel) +{ + return (evsel->attr.type == PERF_TYPE_SOFTWARE) && + (evsel->attr.config == PERF_COUNT_SW_DUMMY); +} + /* * The enable_on_exec/disabled value strategy: * @@ -1054,6 +1060,14 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, else perf_evsel__reset_sample_bit(evsel, PERIOD); } + + /* + * For initial_delay, a dummy event is added implicitly. + * The software event will trigger -EOPNOTSUPP error out, + * if BRANCH_STACK bit is set. + */ + if (opts->initial_delay && is_dummy_event(evsel)) + perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); } static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) -- GitLab From 40992419f135b568a9dab8d3c4a827331158b7db Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Mon, 2 Jul 2018 04:21:18 -0400 Subject: [PATCH 0255/2269] macintosh/via-pmu: Add missing mmio accessors [ Upstream commit 576d5290d678a651b9f36050fc1717e0573aca13 ] Add missing in_8() accessors to init_pmu() and pmu_sr_intr(). This fixes several sparse warnings: drivers/macintosh/via-pmu.c:536:29: warning: dereference of noderef expression drivers/macintosh/via-pmu.c:537:33: warning: dereference of noderef expression drivers/macintosh/via-pmu.c:1455:17: warning: dereference of noderef expression drivers/macintosh/via-pmu.c:1456:69: warning: dereference of noderef expression Tested-by: Stan Johnson Signed-off-by: Finn Thain Reviewed-by: Geert Uytterhoeven Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/macintosh/via-pmu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index c4c2b3b85ebc0..f6e040fcad9a4 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -532,8 +532,9 @@ init_pmu(void) int timeout; struct adb_request req; - out_8(&via[B], via[B] | TREQ); /* negate TREQ */ - out_8(&via[DIRB], (via[DIRB] | TREQ) & ~TACK); /* TACK in, TREQ out */ + /* Negate TREQ. Set TACK to input and TREQ to output. */ + out_8(&via[B], in_8(&via[B]) | TREQ); + out_8(&via[DIRB], (in_8(&via[DIRB]) | TREQ) & ~TACK); pmu_request(&req, NULL, 2, PMU_SET_INTR_MASK, pmu_intr_mask); timeout = 100000; @@ -1455,8 +1456,8 @@ pmu_sr_intr(void) struct adb_request *req; int bite = 0; - if (via[B] & TREQ) { - printk(KERN_ERR "PMU: spurious SR intr (%x)\n", via[B]); + if (in_8(&via[B]) & TREQ) { + printk(KERN_ERR "PMU: spurious SR intr (%x)\n", in_8(&via[B])); out_8(&via[IFR], SR_INT); return NULL; } -- GitLab From 473983f319cc88dbecc4651c3e87f2ae131de93a Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 30 Jul 2018 21:31:23 +0300 Subject: [PATCH 0256/2269] ath9k: report tx status on EOSP [ Upstream commit 36e14a787dd0b459760de3622e9709edb745a6af ] Fixes missed indications of end of U-APSD service period to mac80211 Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/xmit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index d8b041f48ca8e..fa64c1cc94aed 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -86,7 +86,8 @@ static void ath_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_sta *sta = info->status.status_driver_data[0]; - if (info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS) { + if (info->flags & (IEEE80211_TX_CTL_REQ_TX_STATUS | + IEEE80211_TX_STATUS_EOSP)) { ieee80211_tx_status(hw, skb); return; } -- GitLab From 2dacb8cc67ef4364440b2fbee3a845cc1a99e5c8 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 30 Jul 2018 21:31:28 +0300 Subject: [PATCH 0257/2269] ath9k_hw: fix channel maximum power level test [ Upstream commit 461d8a6bb9879b0e619752d040292e67aa06f1d2 ] The tx power applied by set_txpower is limited by the CTL (conformance test limit) entries in the EEPROM. These can change based on the user configured regulatory domain. Depending on the EEPROM data this can cause the tx power to become too limited, if the original regdomain CTLs impose lower limits than the CTLs of the user configured regdomain. To fix this issue, set the initial channel limits without any CTL restrictions and only apply the CTL at run time when setting the channel and the real tx power. Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/hw.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index 8c5c2dd8fa7f1..a7f506eb7b366 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -2915,16 +2915,19 @@ void ath9k_hw_apply_txpower(struct ath_hw *ah, struct ath9k_channel *chan, struct ath_regulatory *reg = ath9k_hw_regulatory(ah); struct ieee80211_channel *channel; int chan_pwr, new_pwr; + u16 ctl = NO_CTL; if (!chan) return; + if (!test) + ctl = ath9k_regd_get_ctl(reg, chan); + channel = chan->chan; chan_pwr = min_t(int, channel->max_power * 2, MAX_RATE_POWER); new_pwr = min_t(int, chan_pwr, reg->power_limit); - ah->eep_ops->set_txpower(ah, chan, - ath9k_regd_get_ctl(reg, chan), + ah->eep_ops->set_txpower(ah, chan, ctl, get_antenna_gain(ah, chan), new_pwr, test); } -- GitLab From bd21eb8aa705bb97a7eb33f2a2c6dc306dfa3916 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 26 Jul 2018 15:59:48 +0200 Subject: [PATCH 0258/2269] ath10k: prevent active scans on potential unusable channels [ Upstream commit 3f259111583801013cb605bb4414aa529adccf1c ] The QCA4019 hw1.0 firmware 10.4-3.2.1-00050 and 10.4-3.5.3-00053 (and most likely all other) seem to ignore the WMI_CHAN_FLAG_DFS flag during the scan. This results in transmission (probe requests) on channels which are not "available" for transmissions. Since the firmware is closed source and nothing can be done from our side to fix the problem in it, the driver has to work around this problem. The WMI_CHAN_FLAG_PASSIVE seems to be interpreted by the firmware to not scan actively on a channel unless an AP was detected on it. Simple probe requests will then be transmitted by the STA on the channel. ath10k must therefore also use this flag when it queues a radar channel for scanning. This should reduce the chance of an active scan when the channel might be "unusable" for transmissions. Fixes: e8a50f8ba44b ("ath10k: introduce DFS implementation") Signed-off-by: Sven Eckelmann Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/mac.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 52ebed1f55a17..6fa9c223ff93b 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -3074,6 +3074,13 @@ static int ath10k_update_channel_list(struct ath10k *ar) passive = channel->flags & IEEE80211_CHAN_NO_IR; ch->passive = passive; + /* the firmware is ignoring the "radar" flag of the + * channel and is scanning actively using Probe Requests + * on "Radar detection"/DFS channels which are not + * marked as "available" + */ + ch->passive |= ch->chan_radar; + ch->freq = channel->center_freq; ch->band_center_freq1 = channel->center_freq; ch->min_power = 0; -- GitLab From 0e890d1cf491ee34dc82f0841f1126e5bb750b7f Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Fri, 27 Jul 2018 18:30:23 +0200 Subject: [PATCH 0259/2269] wlcore: Set rx_status boottime_ns field on rx [ Upstream commit 37a634f60fd6dfbda2c312657eec7ef0750546e7 ] When receiving a beacon or probe response, we should update the boottime_ns field which is the timestamp the frame was received at. (cf mac80211.h) This fixes a scanning issue with Android since it relies on this timestamp to determine when the AP has been seen for the last time (via the nl80211 BSS_LAST_SEEN_BOOTTIME parameter). Signed-off-by: Loic Poulain Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ti/wlcore/rx.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/rx.c b/drivers/net/wireless/ti/wlcore/rx.c index 0f15696195f88..078a4940bc5c3 100644 --- a/drivers/net/wireless/ti/wlcore/rx.c +++ b/drivers/net/wireless/ti/wlcore/rx.c @@ -59,7 +59,7 @@ static u32 wlcore_rx_get_align_buf_size(struct wl1271 *wl, u32 pkt_len) static void wl1271_rx_status(struct wl1271 *wl, struct wl1271_rx_descriptor *desc, struct ieee80211_rx_status *status, - u8 beacon) + u8 beacon, u8 probe_rsp) { memset(status, 0, sizeof(struct ieee80211_rx_status)); @@ -106,6 +106,9 @@ static void wl1271_rx_status(struct wl1271 *wl, } } + if (beacon || probe_rsp) + status->boottime_ns = ktime_get_boot_ns(); + if (beacon) wlcore_set_pending_regdomain_ch(wl, (u16)desc->channel, status->band); @@ -191,7 +194,8 @@ static int wl1271_rx_handle_data(struct wl1271 *wl, u8 *data, u32 length, if (ieee80211_is_data_present(hdr->frame_control)) is_data = 1; - wl1271_rx_status(wl, desc, IEEE80211_SKB_RXCB(skb), beacon); + wl1271_rx_status(wl, desc, IEEE80211_SKB_RXCB(skb), beacon, + ieee80211_is_probe_resp(hdr->frame_control)); wlcore_hw_set_rx_csum(wl, desc, skb); seq_num = (le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_SEQ) >> 4; -- GitLab From 1ed3a93072307265d6385031b72929a904b50f87 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Fri, 15 Jun 2018 10:59:39 +0100 Subject: [PATCH 0260/2269] rpmsg: core: add support to power domains for devices [ Upstream commit fe782affd0f440a4e60e2cc81b8f2eccb2923113 ] Some of the rpmsg devices need to switch on power domains to communicate with remote processor. For example on Qualcomm DB820c platform LPASS power domain needs to switched on for any kind of audio services. This patch adds the missing power domain support in rpmsg core. Without this patch attempting to play audio via QDSP on DB820c would reboot the system. Signed-off-by: Srinivas Kandagatla Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/rpmsg/rpmsg_core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c index dffa3aab71782..cec4c3223044c 100644 --- a/drivers/rpmsg/rpmsg_core.c +++ b/drivers/rpmsg/rpmsg_core.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "rpmsg_internal.h" @@ -418,6 +419,10 @@ static int rpmsg_dev_probe(struct device *dev) struct rpmsg_endpoint *ept = NULL; int err; + err = dev_pm_domain_attach(dev, true); + if (err) + goto out; + if (rpdrv->callback) { strncpy(chinfo.name, rpdev->id.name, RPMSG_NAME_SIZE); chinfo.src = rpdev->src; @@ -459,6 +464,8 @@ static int rpmsg_dev_remove(struct device *dev) rpdrv->remove(rpdev); + dev_pm_domain_detach(dev, true); + if (rpdev->ept) rpmsg_destroy_ept(rpdev->ept); -- GitLab From 3f3d6c0608b7aaf504e5d1f6f85a94c38b91f581 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 27 Jul 2018 18:23:19 -0700 Subject: [PATCH 0261/2269] MIPS: Fix ISA virt/bus conversion for non-zero PHYS_OFFSET [ Upstream commit 0494d7ffdcebc6935410ea0719b24ab626675351 ] isa_virt_to_bus() & isa_bus_to_virt() claim to treat ISA bus addresses as being identical to physical addresses, but they fail to do so in the presence of a non-zero PHYS_OFFSET. Correct this by having them use virt_to_phys() & phys_to_virt(), which consolidates the calculations to one place & ensures that ISA bus addresses do indeed match physical addresses. Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/20047/ Cc: James Hogan Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: Vladimir Kondratiev Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/io.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h index cea8ad864b3f6..57b34257be2bf 100644 --- a/arch/mips/include/asm/io.h +++ b/arch/mips/include/asm/io.h @@ -141,14 +141,14 @@ static inline void * phys_to_virt(unsigned long address) /* * ISA I/O bus memory addresses are 1:1 with the physical address. */ -static inline unsigned long isa_virt_to_bus(volatile void * address) +static inline unsigned long isa_virt_to_bus(volatile void *address) { - return (unsigned long)address - PAGE_OFFSET; + return virt_to_phys(address); } -static inline void * isa_bus_to_virt(unsigned long address) +static inline void *isa_bus_to_virt(unsigned long address) { - return (void *)(address + PAGE_OFFSET); + return phys_to_virt(address); } #define isa_page_to_bus page_to_phys -- GitLab From 7cadaaa96c07553dce8e39dae64d8235be7f31eb Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 2 Jul 2018 12:01:54 -0700 Subject: [PATCH 0262/2269] ata: libahci: Allow reconfigure of DEVSLP register [ Upstream commit 11c291461b6ea8d1195a96d6bba6673a94aacebc ] There are two modes in which DEVSLP can be entered. The OS initiated or hardware autonomous. In hardware autonomous mode, BIOS configures the AHCI controller and the device to enable DEVSLP. But they may not be ideal for all cases. So in this case, OS should be able to reconfigure DEVSLP register. Currently if the DEVSLP is already enabled, we can't set again as it will simply return. There are some systems where the firmware is setting high DITO by default, in this case we can't modify here to correct settings. With the default in several seconds, we are not able to transition to DEVSLP. This change will allow reconfiguration of devslp register if DITO is different. Signed-off-by: Srinivas Pandruvada Reviewed-by: Hans de Goede Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libahci.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index bc562fd2b0a0d..25edaf8f96292 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -2096,7 +2096,7 @@ static void ahci_set_aggressive_devslp(struct ata_port *ap, bool sleep) struct ahci_host_priv *hpriv = ap->host->private_data; void __iomem *port_mmio = ahci_port_base(ap); struct ata_device *dev = ap->link.device; - u32 devslp, dm, dito, mdat, deto; + u32 devslp, dm, dito, mdat, deto, dito_conf; int rc; unsigned int err_mask; @@ -2120,8 +2120,15 @@ static void ahci_set_aggressive_devslp(struct ata_port *ap, bool sleep) return; } - /* device sleep was already enabled */ - if (devslp & PORT_DEVSLP_ADSE) + dm = (devslp & PORT_DEVSLP_DM_MASK) >> PORT_DEVSLP_DM_OFFSET; + dito = devslp_idle_timeout / (dm + 1); + if (dito > 0x3ff) + dito = 0x3ff; + + dito_conf = (devslp >> PORT_DEVSLP_DITO_OFFSET) & 0x3FF; + + /* device sleep was already enabled and same dito */ + if ((devslp & PORT_DEVSLP_ADSE) && (dito_conf == dito)) return; /* set DITO, MDAT, DETO and enable DevSlp, need to stop engine first */ @@ -2129,11 +2136,6 @@ static void ahci_set_aggressive_devslp(struct ata_port *ap, bool sleep) if (rc) return; - dm = (devslp & PORT_DEVSLP_DM_MASK) >> PORT_DEVSLP_DM_OFFSET; - dito = devslp_idle_timeout / (dm + 1); - if (dito > 0x3ff) - dito = 0x3ff; - /* Use the nominal value 10 ms if the read MDAT is zero, * the nominal value of DETO is 20 ms. */ -- GitLab From 62128a8d8489cf0918ca1de0388ab5efa93ed023 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 2 Jul 2018 12:01:53 -0700 Subject: [PATCH 0263/2269] ata: libahci: Correct setting of DEVSLP register [ Upstream commit 2dbb3ec29a6c069035857a2fc4c24e80e5dfe3cc ] We have seen that on some platforms, SATA device never show any DEVSLP residency. This prevent power gating of SATA IP, which prevent system to transition to low power mode in systems with SLP_S0 aka modern standby systems. The PHY logic is off only in DEVSLP not in slumber. Reference: https://www.intel.com/content/dam/www/public/us/en/documents/datasheets /332995-skylake-i-o-platform-datasheet-volume-1.pdf Section 28.7.6.1 Here driver is trying to do read-modify-write the devslp register. But not resetting the bits for which this driver will modify values (DITO, MDAT and DETO). So simply reset those bits before updating to new values. Signed-off-by: Srinivas Pandruvada Reviewed-by: Rafael J. Wysocki Reviewed-by: Hans de Goede Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libahci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 25edaf8f96292..cda9a0b5bdaaa 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -2153,6 +2153,8 @@ static void ahci_set_aggressive_devslp(struct ata_port *ap, bool sleep) deto = 20; } + /* Make dito, mdat, deto bits to 0s */ + devslp &= ~GENMASK_ULL(24, 2); devslp |= ((dito << PORT_DEVSLP_DITO_OFFSET) | (mdat << PORT_DEVSLP_MDAT_OFFSET) | (deto << PORT_DEVSLP_DETO_OFFSET) | -- GitLab From 383195f9fecab79d35e5e27f2e4d24275c95ffcc Mon Sep 17 00:00:00 2001 From: Anton Vasilyev Date: Fri, 27 Jul 2018 16:51:57 +0300 Subject: [PATCH 0264/2269] scsi: 3ware: fix return 0 on the error path of probe [ Upstream commit 4dc98c1995482262e70e83ef029135247fafe0f2 ] tw_probe() returns 0 in case of fail of tw_initialize_device_extension(), pci_resource_start() or tw_reset_sequence() and releases resources. twl_probe() returns 0 in case of fail of twl_initialize_device_extension(), pci_iomap() and twl_reset_sequence(). twa_probe() returns 0 in case of fail of tw_initialize_device_extension(), ioremap() and twa_reset_sequence(). The patch adds retval initialization for these cases. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Anton Vasilyev Acked-by: Adam Radford Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/3w-9xxx.c | 6 +++++- drivers/scsi/3w-sas.c | 3 +++ drivers/scsi/3w-xxxx.c | 2 ++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c index a1388842e17e5..dd342207095af 100644 --- a/drivers/scsi/3w-9xxx.c +++ b/drivers/scsi/3w-9xxx.c @@ -2042,6 +2042,7 @@ static int twa_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) if (twa_initialize_device_extension(tw_dev)) { TW_PRINTK(tw_dev->host, TW_DRIVER, 0x25, "Failed to initialize device extension"); + retval = -ENOMEM; goto out_free_device_extension; } @@ -2064,6 +2065,7 @@ static int twa_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) tw_dev->base_addr = ioremap(mem_addr, mem_len); if (!tw_dev->base_addr) { TW_PRINTK(tw_dev->host, TW_DRIVER, 0x35, "Failed to ioremap"); + retval = -ENOMEM; goto out_release_mem_region; } @@ -2071,8 +2073,10 @@ static int twa_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) TW_DISABLE_INTERRUPTS(tw_dev); /* Initialize the card */ - if (twa_reset_sequence(tw_dev, 0)) + if (twa_reset_sequence(tw_dev, 0)) { + retval = -ENOMEM; goto out_iounmap; + } /* Set host specific parameters */ if ((pdev->device == PCI_DEVICE_ID_3WARE_9650SE) || diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c index b150e131b2e76..aa317d6909e8f 100644 --- a/drivers/scsi/3w-sas.c +++ b/drivers/scsi/3w-sas.c @@ -1597,6 +1597,7 @@ static int twl_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) if (twl_initialize_device_extension(tw_dev)) { TW_PRINTK(tw_dev->host, TW_DRIVER, 0x1a, "Failed to initialize device extension"); + retval = -ENOMEM; goto out_free_device_extension; } @@ -1611,6 +1612,7 @@ static int twl_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) tw_dev->base_addr = pci_iomap(pdev, 1, 0); if (!tw_dev->base_addr) { TW_PRINTK(tw_dev->host, TW_DRIVER, 0x1c, "Failed to ioremap"); + retval = -ENOMEM; goto out_release_mem_region; } @@ -1620,6 +1622,7 @@ static int twl_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) /* Initialize the card */ if (twl_reset_sequence(tw_dev, 0)) { TW_PRINTK(tw_dev->host, TW_DRIVER, 0x1d, "Controller reset failed during probe"); + retval = -ENOMEM; goto out_iounmap; } diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c index f6179e3d69539..961ea6f7def87 100644 --- a/drivers/scsi/3w-xxxx.c +++ b/drivers/scsi/3w-xxxx.c @@ -2280,6 +2280,7 @@ static int tw_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) if (tw_initialize_device_extension(tw_dev)) { printk(KERN_WARNING "3w-xxxx: Failed to initialize device extension."); + retval = -ENOMEM; goto out_free_device_extension; } @@ -2294,6 +2295,7 @@ static int tw_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) tw_dev->base_addr = pci_resource_start(pdev, 0); if (!tw_dev->base_addr) { printk(KERN_WARNING "3w-xxxx: Failed to get io address."); + retval = -ENOMEM; goto out_release_mem_region; } -- GitLab From 6158c2b70a8a555b6257ea350f99803bf5cf49bf Mon Sep 17 00:00:00 2001 From: Huaisheng Ye Date: Mon, 30 Jul 2018 15:15:45 +0800 Subject: [PATCH 0265/2269] tools/testing/nvdimm: kaddr and pfn can be NULL to ->direct_access() [ Upstream commit 45df5d3dc0c7289c1e67afe6d2ba806ad5174314 ] The mock / test version of pmem_direct_access() needs to check the validity of pointers kaddr and pfn for NULL assignment. If anyone equals to NULL, it doesn't need to calculate the value. If pointer equals to NULL, that is to say callers may have no need for kaddr or pfn, so this patch is prepared for allowing them to pass in NULL instead of having to pass in a local pointer or variable that they then just throw away. Suggested-by: Dan Williams Signed-off-by: Huaisheng Ye Reviewed-by: Ross Zwisler Signed-off-by: Dave Jiang Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/nvdimm/pmem-dax.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/testing/nvdimm/pmem-dax.c b/tools/testing/nvdimm/pmem-dax.c index b53596ad601bb..2e7fd82279691 100644 --- a/tools/testing/nvdimm/pmem-dax.c +++ b/tools/testing/nvdimm/pmem-dax.c @@ -31,17 +31,21 @@ long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, if (get_nfit_res(pmem->phys_addr + offset)) { struct page *page; - *kaddr = pmem->virt_addr + offset; + if (kaddr) + *kaddr = pmem->virt_addr + offset; page = vmalloc_to_page(pmem->virt_addr + offset); - *pfn = page_to_pfn_t(page); + if (pfn) + *pfn = page_to_pfn_t(page); pr_debug_ratelimited("%s: pmem: %p pgoff: %#lx pfn: %#lx\n", __func__, pmem, pgoff, page_to_pfn(page)); return 1; } - *kaddr = pmem->virt_addr + offset; - *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags); + if (kaddr) + *kaddr = pmem->virt_addr + offset; + if (pfn) + *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags); /* * If badblocks are present, limit known good range to the -- GitLab From 1dac27c707c871f0a9352016d78d880979fa41d8 Mon Sep 17 00:00:00 2001 From: Surabhi Vishnoi Date: Wed, 25 Jul 2018 10:59:41 +0300 Subject: [PATCH 0266/2269] ath10k: disable bundle mgmt tx completion event support [ Upstream commit 673bc519c55843c68c3aecff71a4101e79d28d2b ] The tx completion of multiple mgmt frames can be bundled in a single event and sent by the firmware to host, if this capability is not disabled explicitly by the host. If the host cannot handle the bundled mgmt tx completion, this capability support needs to be disabled in the wmi init cmd, sent to the firmware. Add the host capability indication flag in the wmi ready command, to let firmware know the features supported by the host driver. This field is ignored if it is not supported by firmware. Set the host capability indication flag(i.e. host_capab) to zero, for disabling the support of bundle mgmt tx completion. This will indicate the firmware to send completion event for every mgmt tx completion, instead of bundling them together and sending in a single event. Tested HW: WCN3990 Tested FW: WLAN.HL.2.0-01188-QCAHLSWMTPLZ-1 Signed-off-by: Surabhi Vishnoi Signed-off-by: Rakesh Pillai Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/wmi-tlv.c | 5 +++++ drivers/net/wireless/ath/ath10k/wmi-tlv.h | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c index 7616c1c4bbd32..baec856af90ff 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c +++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c @@ -1451,6 +1451,11 @@ static struct sk_buff *ath10k_wmi_tlv_op_gen_init(struct ath10k *ar) cfg->keep_alive_pattern_size = __cpu_to_le32(0); cfg->max_tdls_concurrent_sleep_sta = __cpu_to_le32(1); cfg->max_tdls_concurrent_buffer_sta = __cpu_to_le32(1); + cfg->wmi_send_separate = __cpu_to_le32(0); + cfg->num_ocb_vdevs = __cpu_to_le32(0); + cfg->num_ocb_channels = __cpu_to_le32(0); + cfg->num_ocb_schedules = __cpu_to_le32(0); + cfg->host_capab = __cpu_to_le32(0); ath10k_wmi_put_host_mem_chunks(ar, chunks); diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.h b/drivers/net/wireless/ath/ath10k/wmi-tlv.h index 22cf011e839af..e75bba0bbf674 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-tlv.h +++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.h @@ -1228,6 +1228,11 @@ struct wmi_tlv_resource_config { __le32 keep_alive_pattern_size; __le32 max_tdls_concurrent_sleep_sta; __le32 max_tdls_concurrent_buffer_sta; + __le32 wmi_send_separate; + __le32 num_ocb_vdevs; + __le32 num_ocb_channels; + __le32 num_ocb_schedules; + __le32 host_capab; } __packed; struct wmi_tlv_init_cmd { -- GitLab From 60deae3d9fc96d4080ccd0a7f406ec20e1364cd2 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 30 Jul 2018 13:57:41 +0200 Subject: [PATCH 0267/2269] Bluetooth: hidp: Fix handling of strncpy for hid->name information MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b3cadaa485f0c20add1644a5c877b0765b285c0c ] This fixes two issues with setting hid->name information. CC net/bluetooth/hidp/core.o In function ‘hidp_setup_hid’, inlined from ‘hidp_session_dev_init’ at net/bluetooth/hidp/core.c:815:9, inlined from ‘hidp_session_new’ at net/bluetooth/hidp/core.c:953:8, inlined from ‘hidp_connection_add’ at net/bluetooth/hidp/core.c:1366:8: net/bluetooth/hidp/core.c:778:2: warning: ‘strncpy’ output may be truncated copying 127 bytes from a string of length 127 [-Wstringop-truncation] strncpy(hid->name, req->name, sizeof(req->name) - 1); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CC net/bluetooth/hidp/core.o net/bluetooth/hidp/core.c: In function ‘hidp_setup_hid’: net/bluetooth/hidp/core.c:778:38: warning: argument to ‘sizeof’ in ‘strncpy’ call is the same expression as the source; did you mean to use the size of the destination? [-Wsizeof-pointer-memaccess] strncpy(hid->name, req->name, sizeof(req->name)); ^ Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hidp/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index cef3754408d44..b21fcc838784d 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -775,7 +775,7 @@ static int hidp_setup_hid(struct hidp_session *session, hid->version = req->version; hid->country = req->country; - strncpy(hid->name, req->name, sizeof(req->name) - 1); + strncpy(hid->name, req->name, sizeof(hid->name)); snprintf(hid->phys, sizeof(hid->phys), "%pMR", &l2cap_pi(session->ctrl_sock->sk)->chan->src); -- GitLab From 591ee8d9cd2f3c47771a3b363b383669753fb64a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 25 Jul 2018 17:48:01 +0200 Subject: [PATCH 0268/2269] x86/mm: Remove in_nmi() warning from vmalloc_fault() [ Upstream commit 6863ea0cda8725072522cd78bda332d9a0b73150 ] It is perfectly okay to take page-faults, especially on the vmalloc area while executing an NMI handler. Remove the warning. Signed-off-by: Joerg Roedel Signed-off-by: Thomas Gleixner Tested-by: David H. Gutteridge Cc: "H . Peter Anvin" Cc: linux-mm@kvack.org Cc: Linus Torvalds Cc: Andy Lutomirski Cc: Dave Hansen Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Jiri Kosina Cc: Boris Ostrovsky Cc: Brian Gerst Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: Andrea Arcangeli Cc: Waiman Long Cc: Pavel Machek Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: joro@8bytes.org Link: https://lkml.kernel.org/r/1532533683-5988-2-git-send-email-joro@8bytes.org Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/fault.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index c2faff548f59a..794c35c4ca736 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -317,8 +317,6 @@ static noinline int vmalloc_fault(unsigned long address) if (!(address >= VMALLOC_START && address < VMALLOC_END)) return -1; - WARN_ON_ONCE(in_nmi()); - /* * Synchronize this task's top level page-table * with the 'reference' page table. -- GitLab From 1fc16c07d63b8113146b1e3d06dcc003eb3fc6ea Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 19 Jul 2018 11:16:48 +0300 Subject: [PATCH 0269/2269] pinctrl: imx: off by one in imx_pinconf_group_dbg_show() [ Upstream commit b4859f3edb47825f62d1b2efdd75fe7945996f49 ] The > should really be >= here. It's harmless because pinctrl_generic_get_group() will return a NULL if group is invalid. Fixes: ae75ff814538 ("pinctrl: pinctrl-imx: add imx pinctrl core driver") Reported-by: Dong Aisheng Signed-off-by: Dan Carpenter Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/freescale/pinctrl-imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/freescale/pinctrl-imx.c b/drivers/pinctrl/freescale/pinctrl-imx.c index 6e472691d8eed..17f2c5a505b25 100644 --- a/drivers/pinctrl/freescale/pinctrl-imx.c +++ b/drivers/pinctrl/freescale/pinctrl-imx.c @@ -389,7 +389,7 @@ static void imx_pinconf_group_dbg_show(struct pinctrl_dev *pctldev, const char *name; int i, ret; - if (group > pctldev->num_groups) + if (group >= pctldev->num_groups) return; seq_printf(s, "\n"); -- GitLab From acd73639c38c44a24c3892c1e258d5957c58c955 Mon Sep 17 00:00:00 2001 From: Anton Vasilyev Date: Mon, 23 Jul 2018 19:53:30 +0300 Subject: [PATCH 0270/2269] gpio: ml-ioh: Fix buffer underwrite on probe error path [ Upstream commit 4bf4eed44bfe288f459496eaf38089502ef91a79 ] If ioh_gpio_probe() fails on devm_irq_alloc_descs() then chip may point to any element of chip_save array, so reverse iteration from pointer chip may become chip_save[-1] and gpiochip_remove() will operate with wrong memory. The patch fix the error path of ioh_gpio_probe() to correctly bypass chip_save array. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Anton Vasilyev Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-ml-ioh.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-ml-ioh.c b/drivers/gpio/gpio-ml-ioh.c index 4b80e996d9765..1022fe8d09c79 100644 --- a/drivers/gpio/gpio-ml-ioh.c +++ b/drivers/gpio/gpio-ml-ioh.c @@ -497,9 +497,10 @@ static int ioh_gpio_probe(struct pci_dev *pdev, return 0; err_gpiochip_add: + chip = chip_save; while (--i >= 0) { - chip--; gpiochip_remove(&chip->gpio); + chip++; } kfree(chip_save); -- GitLab From 0cfe17c2a90928f6bedd935188c8c7feea252c82 Mon Sep 17 00:00:00 2001 From: Daniel Kurtz Date: Mon, 16 Jul 2018 18:57:18 -0600 Subject: [PATCH 0271/2269] pinctrl/amd: only handle irq if it is pending and unmasked [ Upstream commit 8bbed1eef001fdfc0ee9595f64cc4f769d265af4 ] The AMD pinctrl driver demultiplexes GPIO interrupts and fires off their individual handlers. If one of these GPIO irqs is configured as a level interrupt, and its downstream handler is a threaded ONESHOT interrupt, the GPIO interrupt source is masked by handle_level_irq() until the eventual return of the threaded irq handler. During this time the level GPIO interrupt status will still report as high until the actual gpio source is cleared - both in the individual GPIO interrupt status bit (INTERRUPT_STS_OFF) and in its corresponding "WAKE_INT_STATUS_REG" bit. Thus, if another GPIO interrupt occurs during this time, amd_gpio_irq_handler() will see that the (masked-and-not-yet-cleared) level irq is still pending and incorrectly call its handler again. To fix this, have amd_gpio_irq_handler() check for both interrupts status and mask before calling generic_handle_irq(). Note: Is it possible that this bug was the source of the interrupt storm on Ryzen when using chained interrupts before commit ba714a9c1dea85 ("pinctrl/amd: Use regular interrupt instead of chained")? Signed-off-by: Daniel Kurtz Acked-by: Thomas Gleixner Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-amd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 433af328d9817..b78f42abff2f8 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -530,7 +530,8 @@ static irqreturn_t amd_gpio_irq_handler(int irq, void *dev_id) /* Each status bit covers four pins */ for (i = 0; i < 4; i++) { regval = readl(regs + i); - if (!(regval & PIN_IRQ_PENDING)) + if (!(regval & PIN_IRQ_PENDING) || + !(regval & BIT(INTERRUPT_MASK_OFF))) continue; irq = irq_find_mapping(gc->irqdomain, irqnr + i); generic_handle_irq(irq); -- GitLab From fd4e3615936cb2cce9d96c32211fa21f0aaea673 Mon Sep 17 00:00:00 2001 From: Yelena Krivosheev Date: Wed, 18 Jul 2018 18:10:51 +0200 Subject: [PATCH 0272/2269] net: mvneta: fix mtu change on port without link [ Upstream commit 8466baf788ec3e18836bd9c91ba0b1a07af25878 ] It is incorrect to enable TX/RX queues (call by mvneta_port_up()) for port without link. Indeed MTU change for interface without link causes TX queues to stuck. Fixes: c5aff18204da ("net: mvneta: driver for Marvell Armada 370/XP network unit") Signed-off-by: Yelena Krivosheev [gregory.clement: adding Fixes tags and rewording commit log] Signed-off-by: Gregory CLEMENT Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvneta.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 3deaa34133137..074a5b79d6913 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3195,7 +3195,6 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu) on_each_cpu(mvneta_percpu_enable, pp, true); mvneta_start_dev(pp); - mvneta_port_up(pp); netdev_update_features(dev); -- GitLab From 5f91efc475c8c9fc70e429d8fbb4c69c58e4f9ef Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 4 Jul 2018 18:04:10 +0800 Subject: [PATCH 0273/2269] f2fs: try grabbing node page lock aggressively in sync scenario [ Upstream commit 4b270a8cc5047682f0a3f3f9af3b498408dbd2bc ] In synchronous scenario, like in checkpoint(), we are going to flush dirty node pages to device synchronously, we can easily failed writebacking node page due to trylock_page() failure, especially in condition of intensive lock competition, which can cause long latency of checkpoint(). So let's use lock_page() in synchronous scenario to avoid this issue. Signed-off-by: Yunlei He Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/node.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f623da26159f6..712505ec5de48 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1610,7 +1610,9 @@ next_step: !is_cold_node(page))) continue; lock_node: - if (!trylock_page(page)) + if (wbc->sync_mode == WB_SYNC_ALL) + lock_page(page); + else if (!trylock_page(page)) continue; if (unlikely(page->mapping != NODE_MAPPING(sbi))) { -- GitLab From 7141f97cdd831e88223fb44b0f09e92b95bf7c35 Mon Sep 17 00:00:00 2001 From: Jinbum Park Date: Sat, 28 Jul 2018 13:20:44 +0900 Subject: [PATCH 0274/2269] pktcdvd: Fix possible Spectre-v1 for pkt_devs [ Upstream commit 55690c07b44a82cc3359ce0c233f4ba7d80ba145 ] User controls @dev_minor which to be used as index of pkt_devs. So, It can be exploited via Spectre-like attack. (speculative execution) This kind of attack leaks address of pkt_devs, [1] It leads an attacker to bypass security mechanism such as KASLR. So sanitize @dev_minor before using it to prevent attack. [1] https://github.com/jinb-park/linux-exploit/ tree/master/exploit-remaining-spectre-gadget/leak_pkt_devs.c Signed-off-by: Jinbum Park Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/block/pktcdvd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 531a0915066b3..11ec92e47455a 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -67,7 +67,7 @@ #include #include #include - +#include #include #define DRIVER_NAME "pktcdvd" @@ -2231,6 +2231,8 @@ static struct pktcdvd_device *pkt_find_dev_from_minor(unsigned int dev_minor) { if (dev_minor >= MAX_WRITERS) return NULL; + + dev_minor = array_index_nospec(dev_minor, MAX_WRITERS); return pkt_devs[dev_minor]; } -- GitLab From 894b7c6584ed5efa83ee717a657064d80258e685 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 4 Jul 2018 21:20:05 +0800 Subject: [PATCH 0275/2269] f2fs: fix to skip GC if type in SSA and SIT is inconsistent [ Upstream commit 10d255c3540239c7920f52d2eb223756e186af56 ] If segment type in SSA and SIT is inconsistent, we will encounter below BUG_ON during GC, to avoid this panic, let's just skip doing GC on such segment. The bug is triggered with image reported in below link: https://bugzilla.kernel.org/show_bug.cgi?id=200223 [ 388.060262] ------------[ cut here ]------------ [ 388.060268] kernel BUG at /home/y00370721/git/devf2fs/gc.c:989! [ 388.061172] invalid opcode: 0000 [#1] SMP [ 388.061773] Modules linked in: f2fs(O) bluetooth ecdh_generic xt_tcpudp iptable_filter ip_tables x_tables lp ttm drm_kms_helper drm intel_rapl sb_edac crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel fb_sys_fops ppdev aes_x86_64 syscopyarea crypto_simd sysfillrect parport_pc joydev sysimgblt glue_helper parport cryptd i2c_piix4 serio_raw mac_hid btrfs hid_generic usbhid hid raid6_pq psmouse pata_acpi floppy [ 388.064247] CPU: 7 PID: 4151 Comm: f2fs_gc-7:0 Tainted: G O 4.13.0-rc1+ #26 [ 388.065306] Hardware name: Xen HVM domU, BIOS 4.1.2_115-900.260_ 11/06/2015 [ 388.066058] task: ffff880201583b80 task.stack: ffffc90004d7c000 [ 388.069948] RIP: 0010:do_garbage_collect+0xcc8/0xcd0 [f2fs] [ 388.070766] RSP: 0018:ffffc90004d7fc68 EFLAGS: 00010202 [ 388.071783] RAX: ffff8801ed227000 RBX: 0000000000000001 RCX: ffffea0007b489c0 [ 388.072700] RDX: ffff880000000000 RSI: 0000000000000001 RDI: ffffea0007b489c0 [ 388.073607] RBP: ffffc90004d7fd58 R08: 0000000000000003 R09: ffffea0007b489dc [ 388.074619] R10: 0000000000000000 R11: 0052782ab317138d R12: 0000000000000018 [ 388.075625] R13: 0000000000000018 R14: ffff880211ceb000 R15: ffff880211ceb000 [ 388.076687] FS: 0000000000000000(0000) GS:ffff880214fc0000(0000) knlGS:0000000000000000 [ 388.083277] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 388.084536] CR2: 0000000000e18c60 CR3: 00000001ecf2e000 CR4: 00000000001406e0 [ 388.085748] Call Trace: [ 388.086690] ? find_next_bit+0xb/0x10 [ 388.088091] f2fs_gc+0x1a8/0x9d0 [f2fs] [ 388.088888] ? lock_timer_base+0x7d/0xa0 [ 388.090213] ? try_to_del_timer_sync+0x44/0x60 [ 388.091698] gc_thread_func+0x342/0x4b0 [f2fs] [ 388.092892] ? wait_woken+0x80/0x80 [ 388.094098] kthread+0x109/0x140 [ 388.095010] ? f2fs_gc+0x9d0/0x9d0 [f2fs] [ 388.096043] ? kthread_park+0x60/0x60 [ 388.097281] ret_from_fork+0x25/0x30 [ 388.098401] Code: ff ff 48 83 e8 01 48 89 44 24 58 e9 27 f8 ff ff 48 83 e8 01 e9 78 fc ff ff 48 8d 78 ff e9 17 fb ff ff 48 83 ef 01 e9 4d f4 ff ff <0f> 0b 66 0f 1f 44 00 00 0f 1f 44 00 00 55 48 89 e5 41 56 41 55 [ 388.100864] RIP: do_garbage_collect+0xcc8/0xcd0 [f2fs] RSP: ffffc90004d7fc68 [ 388.101810] ---[ end trace 81c73d6e6b7da61d ]--- Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/gc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index f2f897cd23c96..f22884418e92d 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -958,7 +958,13 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, goto next; sum = page_address(sum_page); - f2fs_bug_on(sbi, type != GET_SUM_TYPE((&sum->footer))); + if (type != GET_SUM_TYPE((&sum->footer))) { + f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent segment (%u) " + "type [%d, %d] in SSA and SIT", + segno, type, GET_SUM_TYPE((&sum->footer))); + set_sbi_flag(sbi, SBI_NEED_FSCK); + goto next; + } /* * this is to avoid deadlock: -- GitLab From cf503dbe5c22c6ab9797e1cf864a11597d711c3a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 8 Jun 2018 09:09:07 +0200 Subject: [PATCH 0276/2269] tpm_tis_spi: Pass the SPI IRQ down to the driver [ Upstream commit 1a339b658d9dbe1471f67b78237cf8fa08bbbeb5 ] An SPI TPM device managed directly on an embedded board using the SPI bus and some GPIO or similar line as IRQ handler will pass the IRQn from the TPM device associated with the SPI device. This is already handled by the SPI core, so make sure to pass this down to the core as well. (The TPM core habit of using -1 to signal no IRQ is dubious (as IRQ 0 is NO_IRQ) but I do not want to mess with that semantic in this patch.) Cc: Mark Brown Signed-off-by: Linus Walleij Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_tis_spi.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm_tis_spi.c b/drivers/char/tpm/tpm_tis_spi.c index 8ab0bd8445f6d..b00388fc41c8d 100644 --- a/drivers/char/tpm/tpm_tis_spi.c +++ b/drivers/char/tpm/tpm_tis_spi.c @@ -188,6 +188,7 @@ static const struct tpm_tis_phy_ops tpm_spi_phy_ops = { static int tpm_tis_spi_probe(struct spi_device *dev) { struct tpm_tis_spi_phy *phy; + int irq; phy = devm_kzalloc(&dev->dev, sizeof(struct tpm_tis_spi_phy), GFP_KERNEL); @@ -200,7 +201,13 @@ static int tpm_tis_spi_probe(struct spi_device *dev) if (!phy->iobuf) return -ENOMEM; - return tpm_tis_core_init(&dev->dev, &phy->priv, -1, &tpm_spi_phy_ops, + /* If the SPI device has an IRQ then use that */ + if (dev->irq > 0) + irq = dev->irq; + else + irq = -1; + + return tpm_tis_core_init(&dev->dev, &phy->priv, irq, &tpm_spi_phy_ops, NULL); } -- GitLab From ee5067c60606cba72a571333411ff0941b6e04f0 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 20 Jun 2018 07:17:54 +0200 Subject: [PATCH 0277/2269] tpm/tpm_i2c_infineon: switch to i2c_lock_bus(..., I2C_LOCK_SEGMENT) [ Upstream commit bb853aac2c478ce78116128263801189408ad2a8 ] Locking the root adapter for __i2c_transfer will deadlock if the device sits behind a mux-locked I2C mux. Switch to the finer-grained i2c_lock_bus with the I2C_LOCK_SEGMENT flag. If the device does not sit behind a mux-locked mux, the two locking variants are equivalent. Signed-off-by: Peter Rosin Reviewed-by: Jarkko Sakkinen Tested-by: Alexander Steffen Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_i2c_infineon.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/char/tpm/tpm_i2c_infineon.c b/drivers/char/tpm/tpm_i2c_infineon.c index d5b44cadac566..c619e76ce8276 100644 --- a/drivers/char/tpm/tpm_i2c_infineon.c +++ b/drivers/char/tpm/tpm_i2c_infineon.c @@ -117,7 +117,7 @@ static int iic_tpm_read(u8 addr, u8 *buffer, size_t len) /* Lock the adapter for the duration of the whole sequence. */ if (!tpm_dev.client->adapter->algo->master_xfer) return -EOPNOTSUPP; - i2c_lock_adapter(tpm_dev.client->adapter); + i2c_lock_bus(tpm_dev.client->adapter, I2C_LOCK_SEGMENT); if (tpm_dev.chip_type == SLB9645) { /* use a combined read for newer chips @@ -192,7 +192,7 @@ static int iic_tpm_read(u8 addr, u8 *buffer, size_t len) } out: - i2c_unlock_adapter(tpm_dev.client->adapter); + i2c_unlock_bus(tpm_dev.client->adapter, I2C_LOCK_SEGMENT); /* take care of 'guard time' */ usleep_range(SLEEP_DURATION_LOW, SLEEP_DURATION_HI); @@ -224,7 +224,7 @@ static int iic_tpm_write_generic(u8 addr, u8 *buffer, size_t len, if (!tpm_dev.client->adapter->algo->master_xfer) return -EOPNOTSUPP; - i2c_lock_adapter(tpm_dev.client->adapter); + i2c_lock_bus(tpm_dev.client->adapter, I2C_LOCK_SEGMENT); /* prepend the 'register address' to the buffer */ tpm_dev.buf[0] = addr; @@ -243,7 +243,7 @@ static int iic_tpm_write_generic(u8 addr, u8 *buffer, size_t len, usleep_range(sleep_low, sleep_hi); } - i2c_unlock_adapter(tpm_dev.client->adapter); + i2c_unlock_bus(tpm_dev.client->adapter, I2C_LOCK_SEGMENT); /* take care of 'guard time' */ usleep_range(SLEEP_DURATION_LOW, SLEEP_DURATION_HI); -- GitLab From 7fb2b50ee59689578d5a712633d1e6755fc98933 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 30 Jun 2018 18:13:40 +0800 Subject: [PATCH 0278/2269] f2fs: fix to do sanity check with reserved blkaddr of inline inode [ Upstream commit 4dbe38dc386910c668c75ae616b99b823b59f3eb ] As Wen Xu reported in bugzilla, after image was injected with random data by fuzzing, inline inode would contain invalid reserved blkaddr, then during inline conversion, we will encounter illegal memory accessing reported by KASAN, the root cause of this is when writing out converted inline page, we will use invalid reserved blkaddr to update sit bitmap, result in accessing memory beyond sit bitmap boundary. In order to fix this issue, let's do sanity check with reserved block address of inline inode to avoid above condition. https://bugzilla.kernel.org/show_bug.cgi?id=200179 [ 1428.846352] BUG: KASAN: use-after-free in update_sit_entry+0x80/0x7f0 [ 1428.846618] Read of size 4 at addr ffff880194483540 by task a.out/2741 [ 1428.846855] CPU: 0 PID: 2741 Comm: a.out Tainted: G W 4.17.0+ #1 [ 1428.846858] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 1428.846860] Call Trace: [ 1428.846868] dump_stack+0x71/0xab [ 1428.846875] print_address_description+0x6b/0x290 [ 1428.846881] kasan_report+0x28e/0x390 [ 1428.846888] ? update_sit_entry+0x80/0x7f0 [ 1428.846898] update_sit_entry+0x80/0x7f0 [ 1428.846906] f2fs_allocate_data_block+0x6db/0xc70 [ 1428.846914] ? f2fs_get_node_info+0x14f/0x590 [ 1428.846920] do_write_page+0xc8/0x150 [ 1428.846928] f2fs_outplace_write_data+0xfe/0x210 [ 1428.846935] ? f2fs_do_write_node_page+0x170/0x170 [ 1428.846941] ? radix_tree_tag_clear+0xff/0x130 [ 1428.846946] ? __mod_node_page_state+0x22/0xa0 [ 1428.846951] ? inc_zone_page_state+0x54/0x100 [ 1428.846956] ? __test_set_page_writeback+0x336/0x5d0 [ 1428.846964] f2fs_convert_inline_page+0x407/0x6d0 [ 1428.846971] ? f2fs_read_inline_data+0x3b0/0x3b0 [ 1428.846978] ? __get_node_page+0x335/0x6b0 [ 1428.846987] f2fs_convert_inline_inode+0x41b/0x500 [ 1428.846994] ? f2fs_convert_inline_page+0x6d0/0x6d0 [ 1428.847000] ? kasan_unpoison_shadow+0x31/0x40 [ 1428.847005] ? kasan_kmalloc+0xa6/0xd0 [ 1428.847024] f2fs_file_mmap+0x79/0xc0 [ 1428.847029] mmap_region+0x58b/0x880 [ 1428.847037] ? arch_get_unmapped_area+0x370/0x370 [ 1428.847042] do_mmap+0x55b/0x7a0 [ 1428.847048] vm_mmap_pgoff+0x16f/0x1c0 [ 1428.847055] ? vma_is_stack_for_current+0x50/0x50 [ 1428.847062] ? __fsnotify_update_child_dentry_flags.part.1+0x160/0x160 [ 1428.847068] ? do_sys_open+0x206/0x2a0 [ 1428.847073] ? __fget+0xb4/0x100 [ 1428.847079] ksys_mmap_pgoff+0x278/0x360 [ 1428.847085] ? find_mergeable_anon_vma+0x50/0x50 [ 1428.847091] do_syscall_64+0x73/0x160 [ 1428.847098] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 1428.847102] RIP: 0033:0x7fb1430766ba [ 1428.847103] Code: 89 f5 41 54 49 89 fc 55 53 74 35 49 63 e8 48 63 da 4d 89 f9 49 89 e8 4d 63 d6 48 89 da 4c 89 ee 4c 89 e7 b8 09 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 56 5b 5d 41 5c 41 5d 41 5e 41 5f c3 0f 1f 00 [ 1428.847162] RSP: 002b:00007ffc651d9388 EFLAGS: 00000246 ORIG_RAX: 0000000000000009 [ 1428.847167] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007fb1430766ba [ 1428.847170] RDX: 0000000000000001 RSI: 0000000000001000 RDI: 0000000000000000 [ 1428.847173] RBP: 0000000000000003 R08: 0000000000000003 R09: 0000000000000000 [ 1428.847176] R10: 0000000000008002 R11: 0000000000000246 R12: 0000000000000000 [ 1428.847179] R13: 0000000000001000 R14: 0000000000008002 R15: 0000000000000000 [ 1428.847252] Allocated by task 2683: [ 1428.847372] kasan_kmalloc+0xa6/0xd0 [ 1428.847380] kmem_cache_alloc+0xc8/0x1e0 [ 1428.847385] getname_flags+0x73/0x2b0 [ 1428.847390] user_path_at_empty+0x1d/0x40 [ 1428.847395] vfs_statx+0xc1/0x150 [ 1428.847401] __do_sys_newlstat+0x7e/0xd0 [ 1428.847405] do_syscall_64+0x73/0x160 [ 1428.847411] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 1428.847466] Freed by task 2683: [ 1428.847566] __kasan_slab_free+0x137/0x190 [ 1428.847571] kmem_cache_free+0x85/0x1e0 [ 1428.847575] filename_lookup+0x191/0x280 [ 1428.847580] vfs_statx+0xc1/0x150 [ 1428.847585] __do_sys_newlstat+0x7e/0xd0 [ 1428.847590] do_syscall_64+0x73/0x160 [ 1428.847596] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 1428.847648] The buggy address belongs to the object at ffff880194483300 which belongs to the cache names_cache of size 4096 [ 1428.847946] The buggy address is located 576 bytes inside of 4096-byte region [ffff880194483300, ffff880194484300) [ 1428.848234] The buggy address belongs to the page: [ 1428.848366] page:ffffea0006512000 count:1 mapcount:0 mapping:ffff8801f3586380 index:0x0 compound_mapcount: 0 [ 1428.848606] flags: 0x17fff8000008100(slab|head) [ 1428.848737] raw: 017fff8000008100 dead000000000100 dead000000000200 ffff8801f3586380 [ 1428.848931] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 [ 1428.849122] page dumped because: kasan: bad access detected [ 1428.849305] Memory state around the buggy address: [ 1428.849436] ffff880194483400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1428.849620] ffff880194483480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1428.849804] >ffff880194483500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1428.849985] ^ [ 1428.850120] ffff880194483580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1428.850303] ffff880194483600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1428.850498] ================================================================== Reported-by: Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/inline.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 8322e4e7bb3fc..295d5505b9395 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -128,6 +128,16 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) if (err) return err; + if (unlikely(dn->data_blkaddr != NEW_ADDR)) { + f2fs_put_dnode(dn); + set_sbi_flag(fio.sbi, SBI_NEED_FSCK); + f2fs_msg(fio.sbi->sb, KERN_WARNING, + "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, " + "run fsck to fix.", + __func__, dn->inode->i_ino, dn->data_blkaddr); + return -EINVAL; + } + f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page)); read_inline_data(page, dn->inode_page); @@ -365,6 +375,17 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, if (err) goto out; + if (unlikely(dn.data_blkaddr != NEW_ADDR)) { + f2fs_put_dnode(&dn); + set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK); + f2fs_msg(F2FS_P_SB(page)->sb, KERN_WARNING, + "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, " + "run fsck to fix.", + __func__, dir->i_ino, dn.data_blkaddr); + err = -EINVAL; + goto out; + } + f2fs_wait_on_page_writeback(page, DATA, true); zero_user_segment(page, MAX_INLINE_DATA(dir), PAGE_SIZE); -- GitLab From e607db7ce9845620173d70a8885390ee224c4787 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Sat, 16 Jun 2018 09:06:33 +0200 Subject: [PATCH 0279/2269] MIPS: Octeon: add missing of_node_put() [ Upstream commit b1259519e618d479ede8a0db5474b3aff99f5056 ] The call to of_find_node_by_name returns a node pointer with refcount incremented thus it must be explicitly decremented here after the last usage. Signed-off-by: Nicholas Mc Guire Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/19558/ Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/mips/cavium-octeon/octeon-platform.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c index 8505db478904b..1d92efb82c372 100644 --- a/arch/mips/cavium-octeon/octeon-platform.c +++ b/arch/mips/cavium-octeon/octeon-platform.c @@ -322,6 +322,7 @@ static int __init octeon_ehci_device_init(void) return 0; pd = of_find_device_by_node(ehci_node); + of_node_put(ehci_node); if (!pd) return 0; @@ -384,6 +385,7 @@ static int __init octeon_ohci_device_init(void) return 0; pd = of_find_device_by_node(ohci_node); + of_node_put(ohci_node); if (!pd) return 0; -- GitLab From 946cf3fe1be660a0fd8520524f7385a2dab47ecc Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Wed, 11 Jul 2018 20:32:45 +0200 Subject: [PATCH 0280/2269] MIPS: generic: fix missing of_node_put() [ Upstream commit 28ec2238f37e72a3a40a7eb46893e7651bcc40a6 ] of_find_compatible_node() returns a device_node pointer with refcount incremented and must be decremented explicitly. As this code is using the result only to check presence of the interrupt controller (!NULL) but not actually using the result otherwise the refcount can be decremented here immediately again. Signed-off-by: Nicholas Mc Guire Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/19820/ Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/mips/generic/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/generic/init.c b/arch/mips/generic/init.c index 5ba6fcc26fa72..94a78dbbc91f7 100644 --- a/arch/mips/generic/init.c +++ b/arch/mips/generic/init.c @@ -204,6 +204,7 @@ void __init arch_init_irq(void) "mti,cpu-interrupt-controller"); if (!cpu_has_veic && !intc_node) mips_cpu_irq_init(); + of_node_put(intc_node); irqchip_init(); } -- GitLab From 94f885db2a18b9c6d36b31f1f4af7c1c820fac60 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 27 Jul 2018 15:26:55 +0300 Subject: [PATCH 0281/2269] net: dcb: For wild-card lookups, use priority -1, not 0 [ Upstream commit 08193d1a893c802c4b807e4d522865061f4e9f4f ] The function dcb_app_lookup walks the list of specified DCB APP entries, looking for one that matches a given criteria: ifindex, selector, protocol ID and optionally also priority. The "don't care" value for priority is set to 0, because that priority has not been allowed under CEE regime, which predates the IEEE standardization. Under IEEE, 0 is a valid priority number. But because dcb_app_lookup considers zero a wild card, attempts to add an APP entry with priority 0 fail when other entries exist for a given ifindex / selector / PID triplet. Fix by changing the wild-card value to -1. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/dcb/dcbnl.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index bae7d78aa0689..fbeacbc2be5da 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1765,7 +1765,7 @@ static struct dcb_app_type *dcb_app_lookup(const struct dcb_app *app, if (itr->app.selector == app->selector && itr->app.protocol == app->protocol && itr->ifindex == ifindex && - (!prio || itr->app.priority == prio)) + ((prio == -1) || itr->app.priority == prio)) return itr; } @@ -1800,7 +1800,8 @@ u8 dcb_getapp(struct net_device *dev, struct dcb_app *app) u8 prio = 0; spin_lock_bh(&dcb_lock); - if ((itr = dcb_app_lookup(app, dev->ifindex, 0))) + itr = dcb_app_lookup(app, dev->ifindex, -1); + if (itr) prio = itr->app.priority; spin_unlock_bh(&dcb_lock); @@ -1828,7 +1829,8 @@ int dcb_setapp(struct net_device *dev, struct dcb_app *new) spin_lock_bh(&dcb_lock); /* Search for existing match and replace */ - if ((itr = dcb_app_lookup(new, dev->ifindex, 0))) { + itr = dcb_app_lookup(new, dev->ifindex, -1); + if (itr) { if (new->priority) itr->app.priority = new->priority; else { @@ -1861,7 +1863,8 @@ u8 dcb_ieee_getapp_mask(struct net_device *dev, struct dcb_app *app) u8 prio = 0; spin_lock_bh(&dcb_lock); - if ((itr = dcb_app_lookup(app, dev->ifindex, 0))) + itr = dcb_app_lookup(app, dev->ifindex, -1); + if (itr) prio |= 1 << itr->app.priority; spin_unlock_bh(&dcb_lock); -- GitLab From e85940a5bb5fe04dc5b241dd6f6ba7ce43ab13d9 Mon Sep 17 00:00:00 2001 From: John Pittman Date: Thu, 21 Jun 2018 17:35:33 -0400 Subject: [PATCH 0282/2269] dm cache: only allow a single io_mode cache feature to be requested [ Upstream commit af9313c32c0fa2a0ac3b113669273833d60cc9de ] More than one io_mode feature can be requested when creating a dm cache device (as is: last one wins). The io_mode selections are incompatible with one another, we should force them to be selected exclusively. Add a counter to check for more than one io_mode selection. Fixes: 629d0a8a1a10 ("dm cache metadata: add "metadata2" feature") Signed-off-by: John Pittman Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-target.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 71c3507df9a0e..a4b7c26980966 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2330,7 +2330,7 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as, {0, 2, "Invalid number of cache feature arguments"}, }; - int r; + int r, mode_ctr = 0; unsigned argc; const char *arg; struct cache_features *cf = &ca->features; @@ -2344,14 +2344,20 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as, while (argc--) { arg = dm_shift_arg(as); - if (!strcasecmp(arg, "writeback")) + if (!strcasecmp(arg, "writeback")) { cf->io_mode = CM_IO_WRITEBACK; + mode_ctr++; + } - else if (!strcasecmp(arg, "writethrough")) + else if (!strcasecmp(arg, "writethrough")) { cf->io_mode = CM_IO_WRITETHROUGH; + mode_ctr++; + } - else if (!strcasecmp(arg, "passthrough")) + else if (!strcasecmp(arg, "passthrough")) { cf->io_mode = CM_IO_PASSTHROUGH; + mode_ctr++; + } else if (!strcasecmp(arg, "metadata2")) cf->metadata_version = 2; @@ -2362,6 +2368,11 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as, } } + if (mode_ctr > 1) { + *error = "Duplicate cache io_mode features requested"; + return -EINVAL; + } + return 0; } -- GitLab From b0a6faaa6060e842b12e442ec5fdea02cb672fb2 Mon Sep 17 00:00:00 2001 From: Nick Dyer Date: Fri, 27 Jul 2018 11:44:20 -0700 Subject: [PATCH 0283/2269] Input: atmel_mxt_ts - only use first T9 instance [ Upstream commit 36f5d9ef26e52edff046b4b097855db89bf0cd4a ] The driver only registers one input device, which uses the screen parameters from the first T9 instance. The first T63 instance also uses those parameters. It is incorrect to send input reports from the second instances of these objects if they are enabled: the input scaling will be wrong and the positions will be mashed together. This also causes problems on Android if the number of slots exceeds 32. In the future, this could be handled by looking for enabled touch object instances and creating an input device for each one. Signed-off-by: Nick Dyer Acked-by: Benson Leung Acked-by: Yufeng Shen Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/input/touchscreen/atmel_mxt_ts.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index fc149ea64be79..59aaac43db91f 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -1647,10 +1647,11 @@ static int mxt_parse_object_table(struct mxt_data *data, break; case MXT_TOUCH_MULTI_T9: data->multitouch = MXT_TOUCH_MULTI_T9; + /* Only handle messages from first T9 instance */ data->T9_reportid_min = min_id; - data->T9_reportid_max = max_id; - data->num_touchids = object->num_report_ids - * mxt_obj_instances(object); + data->T9_reportid_max = min_id + + object->num_report_ids - 1; + data->num_touchids = object->num_report_ids; break; case MXT_SPT_MESSAGECOUNT_T44: data->T44_address = object->start_address; -- GitLab From 758289892a132d9497d3a9f9e17823dbadb41ba9 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Tue, 5 Jun 2018 09:33:59 -0400 Subject: [PATCH 0284/2269] media: s5p-mfc: Fix buffer look up in s5p_mfc_handle_frame_{new, copy_time} functions [ Upstream commit 4faeaf9c0f4581667ce5826f9c90c4fd463ef086 ] Look up of buffers in s5p_mfc_handle_frame_new, s5p_mfc_handle_frame_copy_time functions is not working properly for DMA addresses above 2 GiB. As a result flags and timestamp of returned buffers are not set correctly and it breaks operation of GStreamer/OMX plugins which rely on the CAPTURE buffer queue flags. Due to improper return type of the get_dec_y_adr, get_dspl_y_adr callbacks and sign bit extension these callbacks return incorrect address values, e.g. 0xfffffffffefc0000 instead of 0x00000000fefc0000. Then the statement: "if (vb2_dma_contig_plane_dma_addr(&dst_buf->b->vb2_buf, 0) == dec_y_addr)" is always false, which breaks looking up capture queue buffers. To ensure proper matching by address u32 type is used for the DMA addresses. This should work on all related SoCs, since the MFC DMA address width is not larger than 32-bit. Changes done in this patch are minimal as there is a larger patch series pending refactoring the whole driver. Signed-off-by: Sylwester Nawrocki Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/s5p-mfc/s5p_mfc.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc.c b/drivers/media/platform/s5p-mfc/s5p_mfc.c index 8e9531f7f83f5..9942932ecbf9c 100644 --- a/drivers/media/platform/s5p-mfc/s5p_mfc.c +++ b/drivers/media/platform/s5p-mfc/s5p_mfc.c @@ -254,24 +254,24 @@ static void s5p_mfc_handle_frame_all_extracted(struct s5p_mfc_ctx *ctx) static void s5p_mfc_handle_frame_copy_time(struct s5p_mfc_ctx *ctx) { struct s5p_mfc_dev *dev = ctx->dev; - struct s5p_mfc_buf *dst_buf, *src_buf; - size_t dec_y_addr; + struct s5p_mfc_buf *dst_buf, *src_buf; + u32 dec_y_addr; unsigned int frame_type; /* Make sure we actually have a new frame before continuing. */ frame_type = s5p_mfc_hw_call(dev->mfc_ops, get_dec_frame_type, dev); if (frame_type == S5P_FIMV_DECODE_FRAME_SKIPPED) return; - dec_y_addr = s5p_mfc_hw_call(dev->mfc_ops, get_dec_y_adr, dev); + dec_y_addr = (u32)s5p_mfc_hw_call(dev->mfc_ops, get_dec_y_adr, dev); /* Copy timestamp / timecode from decoded src to dst and set appropriate flags. */ src_buf = list_entry(ctx->src_queue.next, struct s5p_mfc_buf, list); list_for_each_entry(dst_buf, &ctx->dst_queue, list) { - if (vb2_dma_contig_plane_dma_addr(&dst_buf->b->vb2_buf, 0) - == dec_y_addr) { - dst_buf->b->timecode = - src_buf->b->timecode; + u32 addr = (u32)vb2_dma_contig_plane_dma_addr(&dst_buf->b->vb2_buf, 0); + + if (addr == dec_y_addr) { + dst_buf->b->timecode = src_buf->b->timecode; dst_buf->b->vb2_buf.timestamp = src_buf->b->vb2_buf.timestamp; dst_buf->b->flags &= @@ -307,10 +307,10 @@ static void s5p_mfc_handle_frame_new(struct s5p_mfc_ctx *ctx, unsigned int err) { struct s5p_mfc_dev *dev = ctx->dev; struct s5p_mfc_buf *dst_buf; - size_t dspl_y_addr; + u32 dspl_y_addr; unsigned int frame_type; - dspl_y_addr = s5p_mfc_hw_call(dev->mfc_ops, get_dspl_y_adr, dev); + dspl_y_addr = (u32)s5p_mfc_hw_call(dev->mfc_ops, get_dspl_y_adr, dev); if (IS_MFCV6_PLUS(dev)) frame_type = s5p_mfc_hw_call(dev->mfc_ops, get_disp_frame_type, ctx); @@ -329,9 +329,10 @@ static void s5p_mfc_handle_frame_new(struct s5p_mfc_ctx *ctx, unsigned int err) /* The MFC returns address of the buffer, now we have to * check which videobuf does it correspond to */ list_for_each_entry(dst_buf, &ctx->dst_queue, list) { + u32 addr = (u32)vb2_dma_contig_plane_dma_addr(&dst_buf->b->vb2_buf, 0); + /* Check if this is the buffer we're looking for */ - if (vb2_dma_contig_plane_dma_addr(&dst_buf->b->vb2_buf, 0) - == dspl_y_addr) { + if (addr == dspl_y_addr) { list_del(&dst_buf->list); ctx->dst_queue_cnt--; dst_buf->b->sequence = ctx->sequence; -- GitLab From f3677a5c7d080ccc03a5acbfdc90f22ccf984877 Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Wed, 25 Jul 2018 22:46:29 -0300 Subject: [PATCH 0285/2269] partitions/aix: append null character to print data from disk [ Upstream commit d43fdae7bac2def8c4314b5a49822cb7f08a45f1 ] Even if properly initialized, the lvname array (i.e., strings) is read from disk, and might contain corrupt data (e.g., lack the null terminating character for strings). So, make sure the partition name string used in pr_warn() has the null terminating character. Fixes: 6ceea22bbbc8 ("partitions: add aix lvm partition support files") Suggested-by: Daniel J. Axtens Signed-off-by: Mauricio Faria de Oliveira Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- block/partitions/aix.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/block/partitions/aix.c b/block/partitions/aix.c index 007f95eea0e1a..238aca56f552b 100644 --- a/block/partitions/aix.c +++ b/block/partitions/aix.c @@ -282,10 +282,14 @@ int aix_partition(struct parsed_partitions *state) next_lp_ix += 1; } for (i = 0; i < state->limit; i += 1) - if (lvip[i].pps_found && !lvip[i].lv_is_contiguous) + if (lvip[i].pps_found && !lvip[i].lv_is_contiguous) { + char tmp[sizeof(n[i].name) + 1]; // null char + + snprintf(tmp, sizeof(tmp), "%s", n[i].name); pr_warn("partition %s (%u pp's found) is " "not contiguous\n", - n[i].name, lvip[i].pps_found); + tmp, lvip[i].pps_found); + } kfree(pvd); } kfree(n); -- GitLab From 5deea7d63ba14027cee70936881c4d9171d8fa30 Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Wed, 25 Jul 2018 22:46:28 -0300 Subject: [PATCH 0286/2269] partitions/aix: fix usage of uninitialized lv_info and lvname structures [ Upstream commit 14cb2c8a6c5dae57ee3e2da10fa3db2b9087e39e ] The if-block that sets a successful return value in aix_partition() uses 'lvip[].pps_per_lv' and 'n[].name' potentially uninitialized. For example, if 'numlvs' is zero or alloc_lvn() fails, neither is initialized, but are used anyway if alloc_pvd() succeeds after it. So, make the alloc_pvd() call conditional on their initialization. This has been hit when attaching an apparently corrupted/stressed AIX LUN, misleading the kernel to pr_warn() invalid data and hang. [...] partition (null) (11 pp's found) is not contiguous [...] partition (null) (2 pp's found) is not contiguous [...] partition (null) (3 pp's found) is not contiguous [...] partition (null) (64 pp's found) is not contiguous Fixes: 6ceea22bbbc8 ("partitions: add aix lvm partition support files") Signed-off-by: Mauricio Faria de Oliveira Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- block/partitions/aix.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/block/partitions/aix.c b/block/partitions/aix.c index 238aca56f552b..903f3ed175d02 100644 --- a/block/partitions/aix.c +++ b/block/partitions/aix.c @@ -178,7 +178,7 @@ int aix_partition(struct parsed_partitions *state) u32 vgda_sector = 0; u32 vgda_len = 0; int numlvs = 0; - struct pvd *pvd; + struct pvd *pvd = NULL; struct lv_info { unsigned short pps_per_lv; unsigned short pps_found; @@ -232,10 +232,11 @@ int aix_partition(struct parsed_partitions *state) if (lvip[i].pps_per_lv) foundlvs += 1; } + /* pvd loops depend on n[].name and lvip[].pps_per_lv */ + pvd = alloc_pvd(state, vgda_sector + 17); } put_dev_sector(sect); } - pvd = alloc_pvd(state, vgda_sector + 17); if (pvd) { int numpps = be16_to_cpu(pvd->pp_count); int psn_part1 = be32_to_cpu(pvd->psn_part1); -- GitLab From 9e850bc7691a46ae2abee995631d7361a7b71012 Mon Sep 17 00:00:00 2001 From: Katsuhiro Suzuki Date: Mon, 28 May 2018 21:09:20 -0400 Subject: [PATCH 0287/2269] media: helene: fix xtal frequency setting at power on [ Upstream commit a00e5f074b3f3cd39d1ccdc53d4d805b014df3f3 ] This patch fixes crystal frequency setting when power on this device. Signed-off-by: Katsuhiro Suzuki Acked-by: Abylay Ospan Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-frontends/helene.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/helene.c b/drivers/media/dvb-frontends/helene.c index 2ab8d83e55768..fcfe658a43288 100644 --- a/drivers/media/dvb-frontends/helene.c +++ b/drivers/media/dvb-frontends/helene.c @@ -897,7 +897,10 @@ static int helene_x_pon(struct helene_priv *priv) helene_write_regs(priv, 0x99, cdata, sizeof(cdata)); /* 0x81 - 0x94 */ - data[0] = 0x18; /* xtal 24 MHz */ + if (priv->xtal == SONY_HELENE_XTAL_16000) + data[0] = 0x10; /* xtal 16 MHz */ + else + data[0] = 0x18; /* xtal 24 MHz */ data[1] = (uint8_t)(0x80 | (0x04 & 0x1F)); /* 4 x 25 = 100uA */ data[2] = (uint8_t)(0x80 | (0x26 & 0x7F)); /* 38 x 0.25 = 9.5pF */ data[3] = 0x80; /* REFOUT signal output 500mVpp */ -- GitLab From 9d54a48ef29663ede2b012e0b1cda39e1eb36b20 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 21 Jun 2018 22:38:28 +0800 Subject: [PATCH 0288/2269] f2fs: fix to wait on page writeback before updating page [ Upstream commit 6aead1617b3adf2b7e2c56f0f13e4e0ee42ebb4a ] In error path of f2fs_move_rehashed_dirents, inode page could be writeback state, so we should wait on inode page writeback before updating it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/inline.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 295d5505b9395..888a9dc13677f 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -502,6 +502,7 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, return 0; recover: lock_page(ipage); + f2fs_wait_on_page_writeback(ipage, NODE, true); memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA(dir)); f2fs_i_depth_write(dir, 0); f2fs_i_size_write(dir, MAX_INLINE_DATA(dir)); -- GitLab From 1252c1daa9c38ab807548b2703f90192a4ff5eb1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 20 Jun 2018 13:39:53 +0300 Subject: [PATCH 0289/2269] f2fs: Fix uninitialized return in f2fs_ioc_shutdown() [ Upstream commit 2a96d8ad94ce57cb0072f7a660b1039720c47716 ] "ret" can be uninitialized on the success path when "in == F2FS_GOING_DOWN_FULLSYNC". Fixes: 60b2b4ee2bc0 ("f2fs: Fix deadlock in shutdown ioctl") Signed-off-by: Dan Carpenter Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 87e654c53c317..6f589730782d5 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1803,7 +1803,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct super_block *sb = sbi->sb; __u32 in; - int ret; + int ret = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; -- GitLab From b28c14ae3576f69563d063d66f438651a1649a39 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 20 Jul 2018 18:16:59 +0200 Subject: [PATCH 0290/2269] iommu/ipmmu-vmsa: Fix allocation in atomic context [ Upstream commit 46583e8c48c5a094ba28060615b3a7c8c576690f ] When attaching a device to an IOMMU group with CONFIG_DEBUG_ATOMIC_SLEEP=y: BUG: sleeping function called from invalid context at mm/slab.h:421 in_atomic(): 1, irqs_disabled(): 128, pid: 61, name: kworker/1:1 ... Call trace: ... arm_lpae_alloc_pgtable+0x114/0x184 arm_64_lpae_alloc_pgtable_s1+0x2c/0x128 arm_32_lpae_alloc_pgtable_s1+0x40/0x6c alloc_io_pgtable_ops+0x60/0x88 ipmmu_attach_device+0x140/0x334 ipmmu_attach_device() takes a spinlock, while arm_lpae_alloc_pgtable() allocates memory using GFP_KERNEL. Originally, the ipmmu-vmsa driver had its own custom page table allocation implementation using GFP_ATOMIC, hence the spinlock was fine. Fix this by replacing the spinlock by a mutex, like the arm-smmu driver does. Fixes: f20ed39f53145e45 ("iommu/ipmmu-vmsa: Use the ARM LPAE page table allocator") Signed-off-by: Geert Uytterhoeven Reviewed-by: Laurent Pinchart Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/ipmmu-vmsa.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 195d6e93ac718..5d0ba5f644c48 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -54,7 +54,7 @@ struct ipmmu_vmsa_domain { struct io_pgtable_ops *iop; unsigned int context_id; - spinlock_t lock; /* Protects mappings */ + struct mutex mutex; /* Protects mappings */ }; struct ipmmu_vmsa_iommu_priv { @@ -523,7 +523,7 @@ static struct iommu_domain *__ipmmu_domain_alloc(unsigned type) if (!domain) return NULL; - spin_lock_init(&domain->lock); + mutex_init(&domain->mutex); return &domain->io_domain; } @@ -548,7 +548,6 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, struct iommu_fwspec *fwspec = dev->iommu_fwspec; struct ipmmu_vmsa_device *mmu = priv->mmu; struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); - unsigned long flags; unsigned int i; int ret = 0; @@ -557,7 +556,7 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, return -ENXIO; } - spin_lock_irqsave(&domain->lock, flags); + mutex_lock(&domain->mutex); if (!domain->mmu) { /* The domain hasn't been used yet, initialize it. */ @@ -574,7 +573,7 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, } else dev_info(dev, "Reusing IPMMU context %u\n", domain->context_id); - spin_unlock_irqrestore(&domain->lock, flags); + mutex_unlock(&domain->mutex); if (ret < 0) return ret; -- GitLab From 7beff543897c6766b8e131150bdde8947ca7ea66 Mon Sep 17 00:00:00 2001 From: Zumeng Chen Date: Wed, 4 Jul 2018 12:35:29 +0800 Subject: [PATCH 0291/2269] mfd: ti_am335x_tscadc: Fix struct clk memory leak [ Upstream commit c2b1509c77a99a0dcea0a9051ca743cb88385f50 ] Use devm_elk_get() to let Linux manage struct clk memory to avoid the following memory leakage report: unreferenced object 0xdd75efc0 (size 64): comm "systemd-udevd", pid 186, jiffies 4294945126 (age 1195.750s) hex dump (first 32 bytes): 61 64 63 5f 74 73 63 5f 66 63 6b 00 00 00 00 00 adc_tsc_fck..... 00 00 00 00 92 03 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x40/0x74 [] __kmalloc_track_caller+0x198/0x388 [] kstrdup+0x40/0x5c [] kstrdup_const+0x30/0x3c [] __clk_create_clk+0x60/0xac [] clk_get_sys+0x74/0x144 [] clk_get+0x5c/0x68 [] ti_tscadc_probe+0x260/0x468 [ti_am335x_tscadc] [] platform_drv_probe+0x60/0xac [] driver_probe_device+0x214/0x2dc [] __driver_attach+0x94/0xc0 [] bus_for_each_dev+0x90/0xa0 [] driver_attach+0x28/0x30 [] bus_add_driver+0x184/0x1ec [] driver_register+0xb0/0xf0 [] __platform_driver_register+0x40/0x54 Signed-off-by: Zumeng Chen Signed-off-by: Lee Jones Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/ti_am335x_tscadc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c index 0f3fab47fe480..7dc1cbcd2fb89 100644 --- a/drivers/mfd/ti_am335x_tscadc.c +++ b/drivers/mfd/ti_am335x_tscadc.c @@ -210,14 +210,13 @@ static int ti_tscadc_probe(struct platform_device *pdev) * The TSC_ADC_SS controller design assumes the OCP clock is * at least 6x faster than the ADC clock. */ - clk = clk_get(&pdev->dev, "adc_tsc_fck"); + clk = devm_clk_get(&pdev->dev, "adc_tsc_fck"); if (IS_ERR(clk)) { dev_err(&pdev->dev, "failed to get TSC fck\n"); err = PTR_ERR(clk); goto err_disable_clk; } clock_rate = clk_get_rate(clk); - clk_put(clk); tscadc->clk_div = clock_rate / ADC_CLK; /* TSCADC_CLKDIV needs to be configured to the value minus 1 */ -- GitLab From 0983ef553d8fbda2d7bd1fb4783b567f7d563151 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 23 Jun 2018 11:25:19 +0800 Subject: [PATCH 0292/2269] f2fs: fix to do sanity check with {sit,nat}_ver_bitmap_bytesize [ Upstream commit c77ec61ca0a49544ca81881cc5d5529858f7e196 ] This patch adds to do sanity check with {sit,nat}_ver_bitmap_bytesize during mount, in order to avoid accessing across cache boundary with this abnormal bitmap size. - Overview buffer overrun in build_sit_info() when mounting a crafted f2fs image - Reproduce - Kernel message [ 548.580867] F2FS-fs (loop0): Invalid log blocks per segment (8201) [ 548.580877] F2FS-fs (loop0): Can't find valid F2FS filesystem in 1th superblock [ 548.584979] ================================================================== [ 548.586568] BUG: KASAN: use-after-free in kmemdup+0x36/0x50 [ 548.587715] Read of size 64 at addr ffff8801e9c265ff by task mount/1295 [ 548.589428] CPU: 1 PID: 1295 Comm: mount Not tainted 4.18.0-rc1+ #4 [ 548.589432] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 548.589438] Call Trace: [ 548.589474] dump_stack+0x7b/0xb5 [ 548.589487] print_address_description+0x70/0x290 [ 548.589492] kasan_report+0x291/0x390 [ 548.589496] ? kmemdup+0x36/0x50 [ 548.589509] check_memory_region+0x139/0x190 [ 548.589514] memcpy+0x23/0x50 [ 548.589518] kmemdup+0x36/0x50 [ 548.589545] f2fs_build_segment_manager+0x8fa/0x3410 [ 548.589551] ? __asan_loadN+0xf/0x20 [ 548.589560] ? f2fs_sanity_check_ckpt+0x1be/0x240 [ 548.589566] ? f2fs_flush_sit_entries+0x10c0/0x10c0 [ 548.589587] ? __put_user_ns+0x40/0x40 [ 548.589604] ? find_next_bit+0x57/0x90 [ 548.589610] f2fs_fill_super+0x194b/0x2b40 [ 548.589617] ? f2fs_commit_super+0x1b0/0x1b0 [ 548.589637] ? set_blocksize+0x90/0x140 [ 548.589651] mount_bdev+0x1c5/0x210 [ 548.589655] ? f2fs_commit_super+0x1b0/0x1b0 [ 548.589667] f2fs_mount+0x15/0x20 [ 548.589672] mount_fs+0x60/0x1a0 [ 548.589683] ? alloc_vfsmnt+0x309/0x360 [ 548.589688] vfs_kern_mount+0x6b/0x1a0 [ 548.589699] do_mount+0x34a/0x18c0 [ 548.589710] ? lockref_put_or_lock+0xcf/0x160 [ 548.589716] ? copy_mount_string+0x20/0x20 [ 548.589728] ? memcg_kmem_put_cache+0x1b/0xa0 [ 548.589734] ? kasan_check_write+0x14/0x20 [ 548.589740] ? _copy_from_user+0x6a/0x90 [ 548.589744] ? memdup_user+0x42/0x60 [ 548.589750] ksys_mount+0x83/0xd0 [ 548.589755] __x64_sys_mount+0x67/0x80 [ 548.589781] do_syscall_64+0x78/0x170 [ 548.589797] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 548.589820] RIP: 0033:0x7f76fc331b9a [ 548.589821] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48 [ 548.589880] RSP: 002b:00007ffd4f0a0e48 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 [ 548.589890] RAX: ffffffffffffffda RBX: 000000000146c030 RCX: 00007f76fc331b9a [ 548.589892] RDX: 000000000146c210 RSI: 000000000146df30 RDI: 0000000001474ec0 [ 548.589895] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013 [ 548.589897] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000000001474ec0 [ 548.589900] R13: 000000000146c210 R14: 0000000000000000 R15: 0000000000000003 [ 548.590242] The buggy address belongs to the page: [ 548.591243] page:ffffea0007a70980 count:0 mapcount:0 mapping:0000000000000000 index:0x0 [ 548.592886] flags: 0x2ffff0000000000() [ 548.593665] raw: 02ffff0000000000 dead000000000100 dead000000000200 0000000000000000 [ 548.595258] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 [ 548.603713] page dumped because: kasan: bad access detected [ 548.605203] Memory state around the buggy address: [ 548.606198] ffff8801e9c26480: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 548.607676] ffff8801e9c26500: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 548.609157] >ffff8801e9c26580: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 548.610629] ^ [ 548.612088] ffff8801e9c26600: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 548.613674] ffff8801e9c26680: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 548.615141] ================================================================== [ 548.616613] Disabling lock debugging due to kernel taint [ 548.622871] WARNING: CPU: 1 PID: 1295 at mm/page_alloc.c:4065 __alloc_pages_slowpath+0xe4a/0x1420 [ 548.622878] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy [ 548.623217] CPU: 1 PID: 1295 Comm: mount Tainted: G B 4.18.0-rc1+ #4 [ 548.623219] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 548.623226] RIP: 0010:__alloc_pages_slowpath+0xe4a/0x1420 [ 548.623227] Code: ff ff 01 89 85 c8 fe ff ff e9 91 fc ff ff 41 89 c5 e9 5c fc ff ff 0f 0b 89 f8 25 ff ff f7 ff 89 85 8c fe ff ff e9 d5 f2 ff ff <0f> 0b e9 65 f2 ff ff 65 8b 05 38 81 d2 47 f6 c4 01 74 1c 65 48 8b [ 548.623281] RSP: 0018:ffff8801f28c7678 EFLAGS: 00010246 [ 548.623284] RAX: 0000000000000000 RBX: 00000000006040c0 RCX: ffffffffb82f73b7 [ 548.623287] RDX: 1ffff1003e518eeb RSI: 000000000000000c RDI: 0000000000000000 [ 548.623290] RBP: ffff8801f28c7880 R08: 0000000000000000 R09: ffffed0047fff2c5 [ 548.623292] R10: 0000000000000001 R11: ffffed0047fff2c4 R12: ffff8801e88de040 [ 548.623295] R13: 00000000006040c0 R14: 000000000000000c R15: ffff8801f28c7938 [ 548.623299] FS: 00007f76fca51840(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000 [ 548.623302] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 548.623304] CR2: 00007f19b9171760 CR3: 00000001ed952000 CR4: 00000000000006e0 [ 548.623317] Call Trace: [ 548.623325] ? kasan_check_read+0x11/0x20 [ 548.623330] ? __zone_watermark_ok+0x92/0x240 [ 548.623336] ? get_page_from_freelist+0x1c3/0x1d90 [ 548.623347] ? _raw_spin_lock_irqsave+0x2a/0x60 [ 548.623353] ? warn_alloc+0x250/0x250 [ 548.623358] ? save_stack+0x46/0xd0 [ 548.623361] ? kasan_kmalloc+0xad/0xe0 [ 548.623366] ? __isolate_free_page+0x2a0/0x2a0 [ 548.623370] ? mount_fs+0x60/0x1a0 [ 548.623374] ? vfs_kern_mount+0x6b/0x1a0 [ 548.623378] ? do_mount+0x34a/0x18c0 [ 548.623383] ? ksys_mount+0x83/0xd0 [ 548.623387] ? __x64_sys_mount+0x67/0x80 [ 548.623391] ? do_syscall_64+0x78/0x170 [ 548.623396] ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 548.623401] __alloc_pages_nodemask+0x3c5/0x400 [ 548.623407] ? __alloc_pages_slowpath+0x1420/0x1420 [ 548.623412] ? __mutex_lock_slowpath+0x20/0x20 [ 548.623417] ? kvmalloc_node+0x31/0x80 [ 548.623424] alloc_pages_current+0x75/0x110 [ 548.623436] kmalloc_order+0x24/0x60 [ 548.623442] kmalloc_order_trace+0x24/0xb0 [ 548.623448] __kmalloc_track_caller+0x207/0x220 [ 548.623455] ? f2fs_build_node_manager+0x399/0xbb0 [ 548.623460] kmemdup+0x20/0x50 [ 548.623465] f2fs_build_node_manager+0x399/0xbb0 [ 548.623470] f2fs_fill_super+0x195e/0x2b40 [ 548.623477] ? f2fs_commit_super+0x1b0/0x1b0 [ 548.623481] ? set_blocksize+0x90/0x140 [ 548.623486] mount_bdev+0x1c5/0x210 [ 548.623489] ? f2fs_commit_super+0x1b0/0x1b0 [ 548.623495] f2fs_mount+0x15/0x20 [ 548.623498] mount_fs+0x60/0x1a0 [ 548.623503] ? alloc_vfsmnt+0x309/0x360 [ 548.623508] vfs_kern_mount+0x6b/0x1a0 [ 548.623513] do_mount+0x34a/0x18c0 [ 548.623518] ? lockref_put_or_lock+0xcf/0x160 [ 548.623523] ? copy_mount_string+0x20/0x20 [ 548.623528] ? memcg_kmem_put_cache+0x1b/0xa0 [ 548.623533] ? kasan_check_write+0x14/0x20 [ 548.623537] ? _copy_from_user+0x6a/0x90 [ 548.623542] ? memdup_user+0x42/0x60 [ 548.623547] ksys_mount+0x83/0xd0 [ 548.623552] __x64_sys_mount+0x67/0x80 [ 548.623557] do_syscall_64+0x78/0x170 [ 548.623562] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 548.623566] RIP: 0033:0x7f76fc331b9a [ 548.623567] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48 [ 548.623632] RSP: 002b:00007ffd4f0a0e48 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 [ 548.623636] RAX: ffffffffffffffda RBX: 000000000146c030 RCX: 00007f76fc331b9a [ 548.623639] RDX: 000000000146c210 RSI: 000000000146df30 RDI: 0000000001474ec0 [ 548.623641] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013 [ 548.623643] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000000001474ec0 [ 548.623646] R13: 000000000146c210 R14: 0000000000000000 R15: 0000000000000003 [ 548.623650] ---[ end trace 4ce02f25ff7d3df5 ]--- [ 548.623656] F2FS-fs (loop0): Failed to initialize F2FS node manager [ 548.627936] F2FS-fs (loop0): Invalid log blocks per segment (8201) [ 548.627940] F2FS-fs (loop0): Can't find valid F2FS filesystem in 1th superblock [ 548.635835] F2FS-fs (loop0): Failed to initialize F2FS node manager - Location https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/f2fs/segment.c#L3578 sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL); Buffer overrun happens when doing memcpy. I suspect there is missing (inconsistent) checks on bitmap_size. Reported by Wen Xu (wen.xu@gatech.edu) from SSLab, Gatech. Reported-by: Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/super.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 400c00058bad9..eae35909fa51a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1883,12 +1883,17 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); unsigned int ovp_segments, reserved_segments; unsigned int main_segs, blocks_per_seg; + unsigned int sit_segs, nat_segs; + unsigned int sit_bitmap_size, nat_bitmap_size; + unsigned int log_blocks_per_seg; int i; total = le32_to_cpu(raw_super->segment_count); fsmeta = le32_to_cpu(raw_super->segment_count_ckpt); - fsmeta += le32_to_cpu(raw_super->segment_count_sit); - fsmeta += le32_to_cpu(raw_super->segment_count_nat); + sit_segs = le32_to_cpu(raw_super->segment_count_sit); + fsmeta += sit_segs; + nat_segs = le32_to_cpu(raw_super->segment_count_nat); + fsmeta += nat_segs; fsmeta += le32_to_cpu(ckpt->rsvd_segment_count); fsmeta += le32_to_cpu(raw_super->segment_count_ssa); @@ -1919,6 +1924,18 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) return 1; } + sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize); + nat_bitmap_size = le32_to_cpu(ckpt->nat_ver_bitmap_bytesize); + log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); + + if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 || + nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) { + f2fs_msg(sbi->sb, KERN_ERR, + "Wrong bitmap size: sit: %u, nat:%u", + sit_bitmap_size, nat_bitmap_size); + return 1; + } + if (unlikely(f2fs_cp_error(sbi))) { f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); return 1; -- GitLab From 1181e8687a8d075841b1e8876ec6cc503219e207 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 12 Jul 2018 14:19:03 -0400 Subject: [PATCH 0293/2269] NFSv4.1: Fix a potential layoutget/layoutrecall deadlock [ Upstream commit bd3d16a887b0c19a2a20d35ffed499e3a3637feb ] If the client is sending a layoutget, but the server issues a callback to recall what it thinks may be an outstanding layout, then we may find an uninitialised layout attached to the inode due to the layoutget. In that case, it is appropriate to return NFS4ERR_NOMATCHING_LAYOUT rather than NFS4ERR_DELAY, as the latter can end up deadlocking. Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfs/callback_proc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 2c3f398995f6f..b8d55da2f04d5 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -213,9 +213,9 @@ static u32 pnfs_check_callback_stateid(struct pnfs_layout_hdr *lo, { u32 oldseq, newseq; - /* Is the stateid still not initialised? */ + /* Is the stateid not initialised? */ if (!pnfs_layout_is_valid(lo)) - return NFS4ERR_DELAY; + return NFS4ERR_NOMATCHING_LAYOUT; /* Mismatched stateid? */ if (!nfs4_stateid_match_other(&lo->plh_stateid, new)) -- GitLab From 0d1d365d1d4423b55e3a18e1d1ad2036aa1708ea Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 25 Nov 2016 18:46:09 +0000 Subject: [PATCH 0294/2269] MIPS: WARN_ON invalid DMA cache maintenance, not BUG_ON [ Upstream commit d4da0e97baea8768b3d66ccef3967bebd50dfc3b ] If a driver causes DMA cache maintenance with a zero length then we currently BUG and kill the kernel. As this is a scenario that we may well be able to recover from, WARN & return in the condition instead. Signed-off-by: Paul Burton Acked-by: Florian Fainelli Patchwork: https://patchwork.linux-mips.org/patch/14623/ Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/mips/mm/c-r4k.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index e12dfa48b478d..a5893b2cdc0e0 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -835,7 +835,8 @@ static void r4k_flush_icache_user_range(unsigned long start, unsigned long end) static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) { /* Catch bad driver code */ - BUG_ON(size == 0); + if (WARN_ON(size == 0)) + return; preempt_disable(); if (cpu_has_inclusive_pcaches) { @@ -871,7 +872,8 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) { /* Catch bad driver code */ - BUG_ON(size == 0); + if (WARN_ON(size == 0)) + return; preempt_disable(); if (cpu_has_inclusive_pcaches) { -- GitLab From bdbf6e0b93261127196a6dbfc180aabf44e1a296 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 16 Jul 2018 11:50:13 +0300 Subject: [PATCH 0295/2269] RDMA/cma: Do not ignore net namespace for unbound cm_id [ Upstream commit 643d213a9a034fa04f5575a40dfc8548e33ce04f ] Currently if the cm_id is not bound to any netdevice, than for such cm_id, net namespace is ignored; which is incorrect. Regardless of cm_id bound to a netdevice or not, net namespace must match. When a cm_id is bound to a netdevice, in such case net namespace and netdevice both must match. Fixes: 4c21b5bcef73 ("IB/cma: Add net_dev and private data checks to RDMA CM") Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/cma.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 79843a3ca9dcd..752dbc388c271 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1459,9 +1459,16 @@ static bool cma_match_net_dev(const struct rdma_cm_id *id, (addr->src_addr.ss_family == AF_IB || cma_protocol_roce_dev_port(id->device, port_num)); - return !addr->dev_addr.bound_dev_if || - (net_eq(dev_net(net_dev), addr->dev_addr.net) && - addr->dev_addr.bound_dev_if == net_dev->ifindex); + /* + * Net namespaces must match, and if the listner is listening + * on a specific netdevice than netdevice must match as well. + */ + if (net_eq(dev_net(net_dev), addr->dev_addr.net) && + (!!addr->dev_addr.bound_dev_if == + (addr->dev_addr.bound_dev_if == net_dev->ifindex))) + return true; + else + return false; } static struct rdma_id_private *cma_find_listener( -- GitLab From 6f7bf899b92ddb5e6a4dd3b0bc2762aec4836685 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 14 Aug 2018 09:00:01 +0300 Subject: [PATCH 0296/2269] drm/i915: set DP Main Stream Attribute for color range on DDI platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6209c285e7a5e68dbcdf8fd2456c6dd68433806b upstream. Since Haswell we have no color range indication either in the pipe or port registers for DP. Instead, there's a separate register for setting the DP Main Stream Attributes (MSA) directly. The MSA register definition makes no references to colorimetry, just a vague reference to the DP spec. The connection to the color range was lost. Apparently we've failed to set the proper MSA bit for limited, or CEA, range ever since the first DDI platforms. We've started setting other MSA parameters since commit dae847991a43 ("drm/i915: add intel_ddi_set_pipe_settings"). Without the crucial bit of information, the DP sink has no way of knowing the source is actually transmitting limited range RGB, leading to "washed out" colors. With the colorimetry information, compliant sinks should be able to handle the limited range properly. Native (i.e. non-LSPCON) HDMI was not affected because we do pass the color range via AVI infoframes. Though not the root cause, the problem was made worse for DDI platforms with commit 55bc60db5988 ("drm/i915: Add "Automatic" mode for the "Broadcast RGB" property"), which selects limited range RGB automatically based on the mode, as per the DP, HDMI and CEA specs. After all these years, the fix boils down to flipping one bit. [Per testing reports, this fixes DP sinks, but not the LSPCON. My educated guess is that the LSPCON fails to turn the CEA range MSA into AVI infoframes for HDMI.] Reported-by: Michał Kopeć Reported-by: N. W. Reported-by: Nicholas Stommel Reported-by: Tom Yan Tested-by: Nicholas Stommel References: https://bugs.freedesktop.org/show_bug.cgi?id=100023 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107476 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=94921 Cc: Paulo Zanoni Cc: Rodrigo Vivi Cc: Ville Syrjälä Cc: # v3.9+ Reviewed-by: Rodrigo Vivi Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180814060001.18224-1-jani.nikula@intel.com (cherry picked from commit dc5977da99ea28094b8fa4e9bacbd29bedc41de5) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_ddi.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index be813b2738c15..2e706f1abe64f 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8462,6 +8462,7 @@ enum skl_power_gate { #define TRANS_MSA_10_BPC (2<<5) #define TRANS_MSA_12_BPC (3<<5) #define TRANS_MSA_16_BPC (4<<5) +#define TRANS_MSA_CEA_RANGE (1<<3) /* LCPLL Control */ #define LCPLL_CTL _MMIO(0x130040) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 5e5fe03b638cb..3a4a581345c43 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1396,6 +1396,10 @@ void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state) WARN_ON(transcoder_is_dsi(cpu_transcoder)); temp = TRANS_MSA_SYNC_CLK; + + if (crtc_state->limited_color_range) + temp |= TRANS_MSA_CEA_RANGE; + switch (crtc_state->pipe_bpp) { case 18: temp |= TRANS_MSA_6_BPC; -- GitLab From 6093d5abcf5ada86d2bb61bd5154bc144bf5a3aa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Sep 2018 07:58:33 -0700 Subject: [PATCH 0297/2269] inet: frags: change inet_frags_init_net() return value We will soon initialize one rhashtable per struct netns_frags in inet_frags_init_net(). This patch changes the return value to eventually propagate an error. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller (cherry picked from commit 787bea7748a76130566f881c2342a0be4127d182) Signed-off-by: Greg Kroah-Hartman --- include/net/inet_frag.h | 3 ++- net/ieee802154/6lowpan/reassembly.c | 11 ++++++++--- net/ipv4/ip_fragment.c | 12 +++++++++--- net/ipv6/netfilter/nf_conntrack_reasm.c | 12 +++++++++--- net/ipv6/reassembly.c | 11 +++++++++-- 5 files changed, 37 insertions(+), 12 deletions(-) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index a6e4edd8d4a25..2ad894e446acd 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -104,9 +104,10 @@ struct inet_frags { int inet_frags_init(struct inet_frags *); void inet_frags_fini(struct inet_frags *); -static inline void inet_frags_init_net(struct netns_frags *nf) +static inline int inet_frags_init_net(struct netns_frags *nf) { atomic_set(&nf->mem, 0); + return 0; } void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index f85b08baff160..9757ce6c077ae 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -580,14 +580,19 @@ static int __net_init lowpan_frags_init_net(struct net *net) { struct netns_ieee802154_lowpan *ieee802154_lowpan = net_ieee802154_lowpan(net); + int res; ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH; ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH; ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT; - inet_frags_init_net(&ieee802154_lowpan->frags); - - return lowpan_frags_ns_sysctl_register(net); + res = inet_frags_init_net(&ieee802154_lowpan->frags); + if (res < 0) + return res; + res = lowpan_frags_ns_sysctl_register(net); + if (res < 0) + inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags); + return res; } static void __net_exit lowpan_frags_exit_net(struct net *net) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 4cb1befc39494..916def2e2afed 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -850,6 +850,8 @@ static void __init ip4_frags_ctl_register(void) static int __net_init ipv4_frags_init_net(struct net *net) { + int res; + /* Fragment cache limits. * * The fragment memory accounting code, (tries to) account for @@ -875,9 +877,13 @@ static int __net_init ipv4_frags_init_net(struct net *net) net->ipv4.frags.max_dist = 64; - inet_frags_init_net(&net->ipv4.frags); - - return ip4_frags_ns_ctl_register(net); + res = inet_frags_init_net(&net->ipv4.frags); + if (res < 0) + return res; + res = ip4_frags_ns_ctl_register(net); + if (res < 0) + inet_frags_exit_net(&net->ipv4.frags, &ip4_frags); + return res; } static void __net_exit ipv4_frags_exit_net(struct net *net) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index ee33a6743f3bf..afa9ea76155d8 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -630,12 +630,18 @@ EXPORT_SYMBOL_GPL(nf_ct_frag6_gather); static int nf_ct_net_init(struct net *net) { + int res; + net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH; net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT; - inet_frags_init_net(&net->nf_frag.frags); - - return nf_ct_frag6_sysctl_register(net); + res = inet_frags_init_net(&net->nf_frag.frags); + if (res < 0) + return res; + res = nf_ct_frag6_sysctl_register(net); + if (res < 0) + inet_frags_exit_net(&net->nf_frag.frags, &nf_frags); + return res; } static void nf_ct_net_exit(struct net *net) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 846012eae5268..38bf38a9717f0 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -714,13 +714,20 @@ static void ip6_frags_sysctl_unregister(void) static int __net_init ipv6_frags_init_net(struct net *net) { + int res; + net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH; net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT; - inet_frags_init_net(&net->ipv6.frags); + res = inet_frags_init_net(&net->ipv6.frags); + if (res < 0) + return res; - return ip6_frags_ns_sysctl_register(net); + res = ip6_frags_ns_sysctl_register(net); + if (res < 0) + inet_frags_exit_net(&net->ipv6.frags, &ip6_frags); + return res; } static void __net_exit ipv6_frags_exit_net(struct net *net) -- GitLab From 673220d6417de8812b20bfeb4d2f809e05a82463 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Sep 2018 07:58:34 -0700 Subject: [PATCH 0298/2269] inet: frags: add a pointer to struct netns_frags In order to simplify the API, add a pointer to struct inet_frags. This will allow us to make things less complex. These functions no longer have a struct inet_frags parameter : inet_frag_destroy(struct inet_frag_queue *q /*, struct inet_frags *f */) inet_frag_put(struct inet_frag_queue *q /*, struct inet_frags *f */) inet_frag_kill(struct inet_frag_queue *q /*, struct inet_frags *f */) inet_frags_exit_net(struct netns_frags *nf /*, struct inet_frags *f */) ip6_expire_frag_queue(struct net *net, struct frag_queue *fq) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller (cherry picked from commit 093ba72914b696521e4885756a68a3332782c8de) Signed-off-by: Greg Kroah-Hartman --- include/net/inet_frag.h | 11 ++++++----- include/net/ipv6.h | 3 +-- net/ieee802154/6lowpan/reassembly.c | 13 +++++++------ net/ipv4/inet_fragment.c | 17 ++++++++++------- net/ipv4/ip_fragment.c | 9 +++++---- net/ipv6/netfilter/nf_conntrack_reasm.c | 16 +++++++++------- net/ipv6/reassembly.c | 20 ++++++++++---------- 7 files changed, 48 insertions(+), 41 deletions(-) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 2ad894e446acd..fd338293a0957 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -10,6 +10,7 @@ struct netns_frags { int high_thresh; int low_thresh; int max_dist; + struct inet_frags *f; }; /** @@ -109,20 +110,20 @@ static inline int inet_frags_init_net(struct netns_frags *nf) atomic_set(&nf->mem, 0); return 0; } -void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); +void inet_frags_exit_net(struct netns_frags *nf); -void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); -void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f); +void inet_frag_kill(struct inet_frag_queue *q); +void inet_frag_destroy(struct inet_frag_queue *q); struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frags *f, void *key, unsigned int hash); void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, const char *prefix); -static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) +static inline void inet_frag_put(struct inet_frag_queue *q) { if (refcount_dec_and_test(&q->refcnt)) - inet_frag_destroy(q, f); + inet_frag_destroy(q); } static inline bool inet_frag_evicting(struct inet_frag_queue *q) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index f280c61e019a1..ff8407b19d050 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -560,8 +560,7 @@ struct frag_queue { u8 ecn; }; -void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, - struct inet_frags *frags); +void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq); static inline bool ipv6_addr_any(const struct in6_addr *a) { diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 9757ce6c077ae..9ccb8458b5c37 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -93,10 +93,10 @@ static void lowpan_frag_expire(unsigned long data) if (fq->q.flags & INET_FRAG_COMPLETE) goto out; - inet_frag_kill(&fq->q, &lowpan_frags); + inet_frag_kill(&fq->q); out: spin_unlock(&fq->q.lock); - inet_frag_put(&fq->q, &lowpan_frags); + inet_frag_put(&fq->q); } static inline struct lowpan_frag_queue * @@ -229,7 +229,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev, struct sk_buff *fp, *head = fq->q.fragments; int sum_truesize; - inet_frag_kill(&fq->q, &lowpan_frags); + inet_frag_kill(&fq->q); /* Make the one we just received the head. */ if (prev) { @@ -437,7 +437,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type) ret = lowpan_frag_queue(fq, skb, frag_type); spin_unlock(&fq->q.lock); - inet_frag_put(&fq->q, &lowpan_frags); + inet_frag_put(&fq->q); return ret; } @@ -585,13 +585,14 @@ static int __net_init lowpan_frags_init_net(struct net *net) ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH; ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH; ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT; + ieee802154_lowpan->frags.f = &lowpan_frags; res = inet_frags_init_net(&ieee802154_lowpan->frags); if (res < 0) return res; res = lowpan_frags_ns_sysctl_register(net); if (res < 0) - inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags); + inet_frags_exit_net(&ieee802154_lowpan->frags); return res; } @@ -601,7 +602,7 @@ static void __net_exit lowpan_frags_exit_net(struct net *net) net_ieee802154_lowpan(net); lowpan_frags_ns_sysctl_unregister(net); - inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags); + inet_frags_exit_net(&ieee802154_lowpan->frags); } static struct pernet_operations lowpan_frags_ops = { diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index ba4454ecdf0f6..4b44f973c37f9 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -219,8 +219,9 @@ void inet_frags_fini(struct inet_frags *f) } EXPORT_SYMBOL(inet_frags_fini); -void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) +void inet_frags_exit_net(struct netns_frags *nf) { + struct inet_frags *f =nf->f; unsigned int seq; int i; @@ -264,33 +265,34 @@ __acquires(hb->chain_lock) return hb; } -static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) +static inline void fq_unlink(struct inet_frag_queue *fq) { struct inet_frag_bucket *hb; - hb = get_frag_bucket_locked(fq, f); + hb = get_frag_bucket_locked(fq, fq->net->f); hlist_del(&fq->list); fq->flags |= INET_FRAG_COMPLETE; spin_unlock(&hb->chain_lock); } -void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) +void inet_frag_kill(struct inet_frag_queue *fq) { if (del_timer(&fq->timer)) refcount_dec(&fq->refcnt); if (!(fq->flags & INET_FRAG_COMPLETE)) { - fq_unlink(fq, f); + fq_unlink(fq); refcount_dec(&fq->refcnt); } } EXPORT_SYMBOL(inet_frag_kill); -void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f) +void inet_frag_destroy(struct inet_frag_queue *q) { struct sk_buff *fp; struct netns_frags *nf; unsigned int sum, sum_truesize = 0; + struct inet_frags *f; WARN_ON(!(q->flags & INET_FRAG_COMPLETE)); WARN_ON(del_timer(&q->timer) != 0); @@ -298,6 +300,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f) /* Release all fragment data. */ fp = q->fragments; nf = q->net; + f = nf->f; while (fp) { struct sk_buff *xp = fp->next; @@ -333,7 +336,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, refcount_inc(&qp->refcnt); spin_unlock(&hb->chain_lock); qp_in->flags |= INET_FRAG_COMPLETE; - inet_frag_put(qp_in, f); + inet_frag_put(qp_in); return qp; } } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 916def2e2afed..c32718b00761e 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -168,7 +168,7 @@ static void ip4_frag_free(struct inet_frag_queue *q) static void ipq_put(struct ipq *ipq) { - inet_frag_put(&ipq->q, &ip4_frags); + inet_frag_put(&ipq->q); } /* Kill ipq entry. It is not destroyed immediately, @@ -176,7 +176,7 @@ static void ipq_put(struct ipq *ipq) */ static void ipq_kill(struct ipq *ipq) { - inet_frag_kill(&ipq->q, &ip4_frags); + inet_frag_kill(&ipq->q); } static bool frag_expire_skip_icmp(u32 user) @@ -876,20 +876,21 @@ static int __net_init ipv4_frags_init_net(struct net *net) net->ipv4.frags.timeout = IP_FRAG_TIME; net->ipv4.frags.max_dist = 64; + net->ipv4.frags.f = &ip4_frags; res = inet_frags_init_net(&net->ipv4.frags); if (res < 0) return res; res = ip4_frags_ns_ctl_register(net); if (res < 0) - inet_frags_exit_net(&net->ipv4.frags, &ip4_frags); + inet_frags_exit_net(&net->ipv4.frags); return res; } static void __net_exit ipv4_frags_exit_net(struct net *net) { ip4_frags_ns_ctl_unregister(net); - inet_frags_exit_net(&net->ipv4.frags, &ip4_frags); + inet_frags_exit_net(&net->ipv4.frags); } static struct pernet_operations ip4_frags_ops = { diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index afa9ea76155d8..7ea2b44906724 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -177,7 +177,7 @@ static void nf_ct_frag6_expire(unsigned long data) fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); net = container_of(fq->q.net, struct net, nf_frag.frags); - ip6_expire_frag_queue(net, fq, &nf_frags); + ip6_expire_frag_queue(net, fq); } /* Creation primitives. */ @@ -263,7 +263,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, * this case. -DaveM */ pr_debug("end of fragment not rounded to 8 bytes.\n"); - inet_frag_kill(&fq->q, &nf_frags); + inet_frag_kill(&fq->q); return -EPROTO; } if (end > fq->q.len) { @@ -356,7 +356,7 @@ found: return 0; discard_fq: - inet_frag_kill(&fq->q, &nf_frags); + inet_frag_kill(&fq->q); err: return -EINVAL; } @@ -378,7 +378,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic int payload_len; u8 ecn; - inet_frag_kill(&fq->q, &nf_frags); + inet_frag_kill(&fq->q); WARN_ON(head == NULL); WARN_ON(NFCT_FRAG6_CB(head)->offset != 0); @@ -623,7 +623,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) out_unlock: spin_unlock_bh(&fq->q.lock); - inet_frag_put(&fq->q, &nf_frags); + inet_frag_put(&fq->q); return ret; } EXPORT_SYMBOL_GPL(nf_ct_frag6_gather); @@ -635,19 +635,21 @@ static int nf_ct_net_init(struct net *net) net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH; net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT; + net->nf_frag.frags.f = &nf_frags; + res = inet_frags_init_net(&net->nf_frag.frags); if (res < 0) return res; res = nf_ct_frag6_sysctl_register(net); if (res < 0) - inet_frags_exit_net(&net->nf_frag.frags, &nf_frags); + inet_frags_exit_net(&net->nf_frag.frags); return res; } static void nf_ct_net_exit(struct net *net) { nf_ct_frags6_sysctl_unregister(net); - inet_frags_exit_net(&net->nf_frag.frags, &nf_frags); + inet_frags_exit_net(&net->nf_frag.frags); } static struct pernet_operations nf_ct_net_ops = { diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 38bf38a9717f0..26f737c3fc7b4 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -128,8 +128,7 @@ void ip6_frag_init(struct inet_frag_queue *q, const void *a) } EXPORT_SYMBOL(ip6_frag_init); -void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, - struct inet_frags *frags) +void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq) { struct net_device *dev = NULL; @@ -138,7 +137,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, if (fq->q.flags & INET_FRAG_COMPLETE) goto out; - inet_frag_kill(&fq->q, frags); + inet_frag_kill(&fq->q); rcu_read_lock(); dev = dev_get_by_index_rcu(net, fq->iif); @@ -166,7 +165,7 @@ out_rcu_unlock: rcu_read_unlock(); out: spin_unlock(&fq->q.lock); - inet_frag_put(&fq->q, frags); + inet_frag_put(&fq->q); } EXPORT_SYMBOL(ip6_expire_frag_queue); @@ -178,7 +177,7 @@ static void ip6_frag_expire(unsigned long data) fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); net = container_of(fq->q.net, struct net, ipv6.frags); - ip6_expire_frag_queue(net, fq, &ip6_frags); + ip6_expire_frag_queue(net, fq); } static struct frag_queue * @@ -363,7 +362,7 @@ found: return -1; discard_fq: - inet_frag_kill(&fq->q, &ip6_frags); + inet_frag_kill(&fq->q); err: __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); @@ -390,7 +389,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, int sum_truesize; u8 ecn; - inet_frag_kill(&fq->q, &ip6_frags); + inet_frag_kill(&fq->q); ecn = ip_frag_ecn_table[fq->ecn]; if (unlikely(ecn == 0xff)) @@ -568,7 +567,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); spin_unlock(&fq->q.lock); - inet_frag_put(&fq->q, &ip6_frags); + inet_frag_put(&fq->q); return ret; } @@ -719,6 +718,7 @@ static int __net_init ipv6_frags_init_net(struct net *net) net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH; net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT; + net->ipv6.frags.f = &ip6_frags; res = inet_frags_init_net(&net->ipv6.frags); if (res < 0) @@ -726,14 +726,14 @@ static int __net_init ipv6_frags_init_net(struct net *net) res = ip6_frags_ns_sysctl_register(net); if (res < 0) - inet_frags_exit_net(&net->ipv6.frags, &ip6_frags); + inet_frags_exit_net(&net->ipv6.frags); return res; } static void __net_exit ipv6_frags_exit_net(struct net *net) { ip6_frags_ns_sysctl_unregister(net); - inet_frags_exit_net(&net->ipv6.frags, &ip6_frags); + inet_frags_exit_net(&net->ipv6.frags); } static struct pernet_operations ip6_frags_ops = { -- GitLab From 0cbf74b9519d8f73dd27cc6fad6e03851788f956 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Sep 2018 07:58:35 -0700 Subject: [PATCH 0299/2269] inet: frags: refactor ipfrag_init() We need to call inet_frags_init() before register_pernet_subsys(), as a prereq for following patch ("inet: frags: use rhashtables for reassembly units") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller (cherry picked from commit 483a6e4fa055123142d8956866fe2aa9c98d546d) Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_fragment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index c32718b00761e..9d0b08c8ee005 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -900,8 +900,6 @@ static struct pernet_operations ip4_frags_ops = { void __init ipfrag_init(void) { - ip4_frags_ctl_register(); - register_pernet_subsys(&ip4_frags_ops); ip4_frags.hashfn = ip4_hashfn; ip4_frags.constructor = ip4_frag_init; ip4_frags.destructor = ip4_frag_free; @@ -911,4 +909,6 @@ void __init ipfrag_init(void) ip4_frags.frags_cache_name = ip_frag_cache_name; if (inet_frags_init(&ip4_frags)) panic("IP: failed to allocate ip4_frags cache\n"); + ip4_frags_ctl_register(); + register_pernet_subsys(&ip4_frags_ops); } -- GitLab From 0512f7e93504386cd1223990d9692c20a878d2d9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 13 Sep 2018 07:58:36 -0700 Subject: [PATCH 0300/2269] inet: frags: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Alexander Aring Cc: Stefan Schmidt Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: Hideaki YOSHIFUJI Cc: Pablo Neira Ayuso Cc: Jozsef Kadlecsik Cc: Florian Westphal Cc: linux-wpan@vger.kernel.org Cc: netdev@vger.kernel.org Cc: netfilter-devel@vger.kernel.org Cc: coreteam@netfilter.org Signed-off-by: Kees Cook Acked-by: Stefan Schmidt # for ieee802154 Signed-off-by: David S. Miller (cherry picked from commit 78802011fbe34331bdef6f2dfb1634011f0e4c32) Signed-off-by: Greg Kroah-Hartman --- include/net/inet_frag.h | 2 +- net/ieee802154/6lowpan/reassembly.c | 5 +++-- net/ipv4/inet_fragment.c | 4 ++-- net/ipv4/ip_fragment.c | 5 +++-- net/ipv6/netfilter/nf_conntrack_reasm.c | 5 +++-- net/ipv6/reassembly.c | 5 +++-- 6 files changed, 15 insertions(+), 11 deletions(-) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index fd338293a0957..69e531ed81894 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -97,7 +97,7 @@ struct inet_frags { void (*constructor)(struct inet_frag_queue *q, const void *arg); void (*destructor)(struct inet_frag_queue *); - void (*frag_expire)(unsigned long data); + void (*frag_expire)(struct timer_list *t); struct kmem_cache *frags_cachep; const char *frags_cache_name; }; diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 9ccb8458b5c37..6badc055555b7 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -80,12 +80,13 @@ static void lowpan_frag_init(struct inet_frag_queue *q, const void *a) fq->daddr = *arg->dst; } -static void lowpan_frag_expire(unsigned long data) +static void lowpan_frag_expire(struct timer_list *t) { + struct inet_frag_queue *frag = from_timer(frag, t, timer); struct frag_queue *fq; struct net *net; - fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); + fq = container_of(frag, struct frag_queue, q); net = container_of(fq->q.net, struct net, ieee802154_lowpan.frags); spin_lock(&fq->q.lock); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 4b44f973c37f9..97e747b1e9a0e 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -150,7 +150,7 @@ inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb) spin_unlock(&hb->chain_lock); hlist_for_each_entry_safe(fq, n, &expired, list_evictor) - f->frag_expire((unsigned long) fq); + f->frag_expire(&fq->timer); return evicted; } @@ -367,7 +367,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, f->constructor(q, arg); add_frag_mem_limit(nf, f->qsize); - setup_timer(&q->timer, f->frag_expire, (unsigned long)q); + timer_setup(&q->timer, f->frag_expire, 0); spin_lock_init(&q->lock); refcount_set(&q->refcnt, 1); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 9d0b08c8ee005..5171c8cc0eb6b 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -191,12 +191,13 @@ static bool frag_expire_skip_icmp(u32 user) /* * Oops, a fragment queue timed out. Kill it and send an ICMP reply. */ -static void ip_expire(unsigned long arg) +static void ip_expire(struct timer_list *t) { + struct inet_frag_queue *frag = from_timer(frag, t, timer); struct ipq *qp; struct net *net; - qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); + qp = container_of(frag, struct ipq, q); net = container_of(qp->q.net, struct net, ipv4.frags); rcu_read_lock(); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 7ea2b44906724..bc776ef392ea2 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -169,12 +169,13 @@ static unsigned int nf_hashfn(const struct inet_frag_queue *q) return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr); } -static void nf_ct_frag6_expire(unsigned long data) +static void nf_ct_frag6_expire(struct timer_list *t) { + struct inet_frag_queue *frag = from_timer(frag, t, timer); struct frag_queue *fq; struct net *net; - fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); + fq = container_of(frag, struct frag_queue, q); net = container_of(fq->q.net, struct net, nf_frag.frags); ip6_expire_frag_queue(net, fq); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 26f737c3fc7b4..b85ef051b75cd 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -169,12 +169,13 @@ out: } EXPORT_SYMBOL(ip6_expire_frag_queue); -static void ip6_frag_expire(unsigned long data) +static void ip6_frag_expire(struct timer_list *t) { + struct inet_frag_queue *frag = from_timer(frag, t, timer); struct frag_queue *fq; struct net *net; - fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); + fq = container_of(frag, struct frag_queue, q); net = container_of(fq->q.net, struct net, ipv6.frags); ip6_expire_frag_queue(net, fq); -- GitLab From eb1686ae5e20b4455b80be571e7e41f1c9d7b2ac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Sep 2018 07:58:37 -0700 Subject: [PATCH 0301/2269] inet: frags: refactor ipv6_frag_init() We want to call inet_frags_init() earlier. This is a prereq to "inet: frags: use rhashtables for reassembly units" Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller (cherry picked from commit 5b975bab23615cd0fdf67af6c9298eb01c4b9f61) Signed-off-by: Greg Kroah-Hartman --- net/ipv6/reassembly.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index b85ef051b75cd..42b6b2ba447af 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -746,10 +746,21 @@ int __init ipv6_frag_init(void) { int ret; - ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT); + ip6_frags.hashfn = ip6_hashfn; + ip6_frags.constructor = ip6_frag_init; + ip6_frags.destructor = NULL; + ip6_frags.qsize = sizeof(struct frag_queue); + ip6_frags.match = ip6_frag_match; + ip6_frags.frag_expire = ip6_frag_expire; + ip6_frags.frags_cache_name = ip6_frag_cache_name; + ret = inet_frags_init(&ip6_frags); if (ret) goto out; + ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT); + if (ret) + goto err_protocol; + ret = ip6_frags_sysctl_register(); if (ret) goto err_sysctl; @@ -758,16 +769,6 @@ int __init ipv6_frag_init(void) if (ret) goto err_pernet; - ip6_frags.hashfn = ip6_hashfn; - ip6_frags.constructor = ip6_frag_init; - ip6_frags.destructor = NULL; - ip6_frags.qsize = sizeof(struct frag_queue); - ip6_frags.match = ip6_frag_match; - ip6_frags.frag_expire = ip6_frag_expire; - ip6_frags.frags_cache_name = ip6_frag_cache_name; - ret = inet_frags_init(&ip6_frags); - if (ret) - goto err_pernet; out: return ret; @@ -775,6 +776,8 @@ err_pernet: ip6_frags_sysctl_unregister(); err_sysctl: inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT); +err_protocol: + inet_frags_fini(&ip6_frags); goto out; } -- GitLab From 266da0fb83f32e26470017d3eb32cb092b2210ed Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Sep 2018 07:58:38 -0700 Subject: [PATCH 0302/2269] inet: frags: refactor lowpan_net_frag_init() We want to call lowpan_net_frag_init() earlier. Similar to commit "inet: frags: refactor ipv6_frag_init()" This is a prereq to "inet: frags: use rhashtables for reassembly units" Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller (cherry picked from commit 807f1844df4ac23594268fa9f41902d0549e92aa) Signed-off-by: Greg Kroah-Hartman --- net/ieee802154/6lowpan/reassembly.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 6badc055555b7..ddada12a044de 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -615,14 +615,6 @@ int __init lowpan_net_frag_init(void) { int ret; - ret = lowpan_frags_sysctl_register(); - if (ret) - return ret; - - ret = register_pernet_subsys(&lowpan_frags_ops); - if (ret) - goto err_pernet; - lowpan_frags.hashfn = lowpan_hashfn; lowpan_frags.constructor = lowpan_frag_init; lowpan_frags.destructor = NULL; @@ -632,11 +624,21 @@ int __init lowpan_net_frag_init(void) lowpan_frags.frags_cache_name = lowpan_frags_cache_name; ret = inet_frags_init(&lowpan_frags); if (ret) - goto err_pernet; + goto out; + ret = lowpan_frags_sysctl_register(); + if (ret) + goto err_sysctl; + + ret = register_pernet_subsys(&lowpan_frags_ops); + if (ret) + goto err_pernet; +out: return ret; err_pernet: lowpan_frags_sysctl_unregister(); +err_sysctl: + inet_frags_fini(&lowpan_frags); return ret; } -- GitLab From 11be675bf0aaf4a6dcde817126168b9cbd8ba90e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Sep 2018 07:58:39 -0700 Subject: [PATCH 0303/2269] ipv6: export ip6 fragments sysctl to unprivileged users IPv4 was changed in commit 52a773d645e9 ("net: Export ip fragment sysctl to unprivileged users") The only sysctl that is not per-netns is not used : ip6frag_secret_interval Signed-off-by: Eric Dumazet Cc: Nikolay Borisov Signed-off-by: David S. Miller (cherry picked from commit 18dcbe12fe9fca0ab825f7eff993060525ac2503) Signed-off-by: Greg Kroah-Hartman --- net/ipv6/reassembly.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 42b6b2ba447af..f0071b113a92f 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -649,10 +649,6 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net) table[1].data = &net->ipv6.frags.low_thresh; table[1].extra2 = &net->ipv6.frags.high_thresh; table[2].data = &net->ipv6.frags.timeout; - - /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) - table[0].procname = NULL; } hdr = register_net_sysctl(net, "net/ipv6", table); -- GitLab From 33dc9f7c5d127bfc203a17f7a31d4dcc754376df Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Sep 2018 07:58:40 -0700 Subject: [PATCH 0304/2269] rhashtable: add schedule points Rehashing and destroying large hash table takes a lot of time, and happens in process context. It is safe to add cond_resched() in rhashtable_rehash_table() and rhashtable_free_and_destroy() Signed-off-by: Eric Dumazet Acked-by: Herbert Xu Signed-off-by: David S. Miller (cherry picked from commit ae6da1f503abb5a5081f9f6c4a6881de97830f3e) Signed-off-by: Greg Kroah-Hartman --- lib/rhashtable.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 39215c724fc72..cebbcec877d71 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -364,6 +364,7 @@ static int rhashtable_rehash_table(struct rhashtable *ht) err = rhashtable_rehash_chain(ht, old_hash); if (err) return err; + cond_resched(); } /* Publish the new table pointer. */ @@ -1073,6 +1074,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, for (i = 0; i < tbl->size; i++) { struct rhash_head *pos, *next; + cond_resched(); for (pos = rht_dereference(*rht_bucket(tbl, i), ht), next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL; -- GitLab From 9aee41eff751e4c789ff785c561d7bf7ad72c286 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Sep 2018 07:58:41 -0700 Subject: [PATCH 0305/2269] inet: frags: use rhashtables for reassembly units Some applications still rely on IP fragmentation, and to be fair linux reassembly unit is not working under any serious load. It uses static hash tables of 1024 buckets, and up to 128 items per bucket (!!!) A work queue is supposed to garbage collect items when host is under memory pressure, and doing a hash rebuild, changing seed used in hash computations. This work queue blocks softirqs for up to 25 ms when doing a hash rebuild, occurring every 5 seconds if host is under fire. Then there is the problem of sharing this hash table for all netns. It is time to switch to rhashtables, and allocate one of them per netns to speedup netns dismantle, since this is a critical metric these days. Lookup is now using RCU. A followup patch will even remove the refcount hold/release left from prior implementation and save a couple of atomic operations. Before this patch, 16 cpus (16 RX queue NIC) could not handle more than 1 Mpps frags DDOS. After the patch, I reach 9 Mpps without any tuning, and can use up to 2GB of storage for the fragments (exact number depends on frags being evicted after timeout) $ grep FRAG /proc/net/sockstat FRAG: inuse 1966916 memory 2140004608 A followup patch will change the limits for 64bit arches. Signed-off-by: Eric Dumazet Cc: Kirill Tkhai Cc: Herbert Xu Cc: Florian Westphal Cc: Jesper Dangaard Brouer Cc: Alexander Aring Cc: Stefan Schmidt Signed-off-by: David S. Miller (cherry picked from commit 648700f76b03b7e8149d13cc2bdb3355035258a9) Signed-off-by: Greg Kroah-Hartman --- Documentation/networking/ip-sysctl.txt | 7 +- include/net/inet_frag.h | 81 +++--- include/net/ipv6.h | 16 +- net/ieee802154/6lowpan/6lowpan_i.h | 26 +- net/ieee802154/6lowpan/reassembly.c | 91 +++---- net/ipv4/inet_fragment.c | 346 +++++------------------- net/ipv4/ip_fragment.c | 112 ++++---- net/ipv6/netfilter/nf_conntrack_reasm.c | 51 +--- net/ipv6/reassembly.c | 110 ++++---- 9 files changed, 266 insertions(+), 574 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index d499676890d8e..f23582a3c661c 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -134,13 +134,10 @@ min_adv_mss - INTEGER IP Fragmentation: ipfrag_high_thresh - INTEGER - Maximum memory used to reassemble IP fragments. When - ipfrag_high_thresh bytes of memory is allocated for this purpose, - the fragment handler will toss packets until ipfrag_low_thresh - is reached. This also serves as a maximum limit to namespaces - different from the initial one. + Maximum memory used to reassemble IP fragments. ipfrag_low_thresh - INTEGER + (Obsolete since linux-4.17) Maximum memory used to reassemble IP fragments before the kernel begins to remove incomplete fragment queues to free up resources. The kernel still accepts new fragments for defragmentation. diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 69e531ed81894..3fec0d3a0d018 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -2,7 +2,11 @@ #ifndef __NET_FRAG_H__ #define __NET_FRAG_H__ +#include + struct netns_frags { + struct rhashtable rhashtable ____cacheline_aligned_in_smp; + /* Keep atomic mem on separate cachelines in structs that include it */ atomic_t mem ____cacheline_aligned_in_smp; /* sysctls */ @@ -26,12 +30,30 @@ enum { INET_FRAG_COMPLETE = BIT(2), }; +struct frag_v4_compare_key { + __be32 saddr; + __be32 daddr; + u32 user; + u32 vif; + __be16 id; + u16 protocol; +}; + +struct frag_v6_compare_key { + struct in6_addr saddr; + struct in6_addr daddr; + u32 user; + __be32 id; + u32 iif; +}; + /** * struct inet_frag_queue - fragment queue * - * @lock: spinlock protecting the queue + * @node: rhash node + * @key: keys identifying this frag. * @timer: queue expiration timer - * @list: hash bucket list + * @lock: spinlock protecting this frag * @refcnt: reference count of the queue * @fragments: received fragments head * @fragments_tail: received fragments tail @@ -41,12 +63,16 @@ enum { * @flags: fragment queue flags * @max_size: maximum received fragment size * @net: namespace that this frag belongs to - * @list_evictor: list of queues to forcefully evict (e.g. due to low memory) + * @rcu: rcu head for freeing deferall */ struct inet_frag_queue { - spinlock_t lock; + struct rhash_head node; + union { + struct frag_v4_compare_key v4; + struct frag_v6_compare_key v6; + } key; struct timer_list timer; - struct hlist_node list; + spinlock_t lock; refcount_t refcnt; struct sk_buff *fragments; struct sk_buff *fragments_tail; @@ -55,51 +81,20 @@ struct inet_frag_queue { int meat; __u8 flags; u16 max_size; - struct netns_frags *net; - struct hlist_node list_evictor; -}; - -#define INETFRAGS_HASHSZ 1024 - -/* averaged: - * max_depth = default ipfrag_high_thresh / INETFRAGS_HASHSZ / - * rounded up (SKB_TRUELEN(0) + sizeof(struct ipq or - * struct frag_queue)) - */ -#define INETFRAGS_MAXDEPTH 128 - -struct inet_frag_bucket { - struct hlist_head chain; - spinlock_t chain_lock; + struct netns_frags *net; + struct rcu_head rcu; }; struct inet_frags { - struct inet_frag_bucket hash[INETFRAGS_HASHSZ]; - - struct work_struct frags_work; - unsigned int next_bucket; - unsigned long last_rebuild_jiffies; - bool rebuild; - - /* The first call to hashfn is responsible to initialize - * rnd. This is best done with net_get_random_once. - * - * rnd_seqlock is used to let hash insertion detect - * when it needs to re-lookup the hash chain to use. - */ - u32 rnd; - seqlock_t rnd_seqlock; unsigned int qsize; - unsigned int (*hashfn)(const struct inet_frag_queue *); - bool (*match)(const struct inet_frag_queue *q, - const void *arg); void (*constructor)(struct inet_frag_queue *q, const void *arg); void (*destructor)(struct inet_frag_queue *); void (*frag_expire)(struct timer_list *t); struct kmem_cache *frags_cachep; const char *frags_cache_name; + struct rhashtable_params rhash_params; }; int inet_frags_init(struct inet_frags *); @@ -108,15 +103,13 @@ void inet_frags_fini(struct inet_frags *); static inline int inet_frags_init_net(struct netns_frags *nf) { atomic_set(&nf->mem, 0); - return 0; + return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params); } void inet_frags_exit_net(struct netns_frags *nf); void inet_frag_kill(struct inet_frag_queue *q); void inet_frag_destroy(struct inet_frag_queue *q); -struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, - struct inet_frags *f, void *key, unsigned int hash); - +struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key); void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, const char *prefix); @@ -128,7 +121,7 @@ static inline void inet_frag_put(struct inet_frag_queue *q) static inline bool inet_frag_evicting(struct inet_frag_queue *q) { - return !hlist_unhashed(&q->list_evictor); + return false; } /* Memory Tracking Functions. */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index ff8407b19d050..a271611d341d8 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -531,17 +531,8 @@ enum ip6_defrag_users { __IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX, }; -struct ip6_create_arg { - __be32 id; - u32 user; - const struct in6_addr *src; - const struct in6_addr *dst; - int iif; - u8 ecn; -}; - void ip6_frag_init(struct inet_frag_queue *q, const void *a); -bool ip6_frag_match(const struct inet_frag_queue *q, const void *a); +extern const struct rhashtable_params ip6_rhash_params; /* * Equivalent of ipv4 struct ip @@ -549,11 +540,6 @@ bool ip6_frag_match(const struct inet_frag_queue *q, const void *a); struct frag_queue { struct inet_frag_queue q; - __be32 id; /* fragment id */ - u32 user; - struct in6_addr saddr; - struct in6_addr daddr; - int iif; unsigned int csum; __u16 nhoffset; diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h index d8de3bcfb1032..b8d95cb71c25d 100644 --- a/net/ieee802154/6lowpan/6lowpan_i.h +++ b/net/ieee802154/6lowpan/6lowpan_i.h @@ -17,37 +17,19 @@ typedef unsigned __bitwise lowpan_rx_result; #define LOWPAN_DISPATCH_FRAG1 0xc0 #define LOWPAN_DISPATCH_FRAGN 0xe0 -struct lowpan_create_arg { +struct frag_lowpan_compare_key { u16 tag; u16 d_size; - const struct ieee802154_addr *src; - const struct ieee802154_addr *dst; + const struct ieee802154_addr src; + const struct ieee802154_addr dst; }; -/* Equivalent of ipv4 struct ip +/* Equivalent of ipv4 struct ipq */ struct lowpan_frag_queue { struct inet_frag_queue q; - - u16 tag; - u16 d_size; - struct ieee802154_addr saddr; - struct ieee802154_addr daddr; }; -static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a) -{ - switch (a->mode) { - case IEEE802154_ADDR_LONG: - return (((__force u64)a->extended_addr) >> 32) ^ - (((__force u64)a->extended_addr) & 0xffffffff); - case IEEE802154_ADDR_SHORT: - return (__force u32)(a->short_addr + (a->pan_id << 16)); - default: - return 0; - } -} - int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type); void lowpan_net_frag_exit(void); int lowpan_net_frag_init(void); diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index ddada12a044de..0fa0121f85d45 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -37,47 +37,15 @@ static struct inet_frags lowpan_frags; static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev, struct net_device *ldev); -static unsigned int lowpan_hash_frag(u16 tag, u16 d_size, - const struct ieee802154_addr *saddr, - const struct ieee802154_addr *daddr) -{ - net_get_random_once(&lowpan_frags.rnd, sizeof(lowpan_frags.rnd)); - return jhash_3words(ieee802154_addr_hash(saddr), - ieee802154_addr_hash(daddr), - (__force u32)(tag + (d_size << 16)), - lowpan_frags.rnd); -} - -static unsigned int lowpan_hashfn(const struct inet_frag_queue *q) -{ - const struct lowpan_frag_queue *fq; - - fq = container_of(q, struct lowpan_frag_queue, q); - return lowpan_hash_frag(fq->tag, fq->d_size, &fq->saddr, &fq->daddr); -} - -static bool lowpan_frag_match(const struct inet_frag_queue *q, const void *a) -{ - const struct lowpan_frag_queue *fq; - const struct lowpan_create_arg *arg = a; - - fq = container_of(q, struct lowpan_frag_queue, q); - return fq->tag == arg->tag && fq->d_size == arg->d_size && - ieee802154_addr_equal(&fq->saddr, arg->src) && - ieee802154_addr_equal(&fq->daddr, arg->dst); -} - static void lowpan_frag_init(struct inet_frag_queue *q, const void *a) { - const struct lowpan_create_arg *arg = a; + const struct frag_lowpan_compare_key *key = a; struct lowpan_frag_queue *fq; fq = container_of(q, struct lowpan_frag_queue, q); - fq->tag = arg->tag; - fq->d_size = arg->d_size; - fq->saddr = *arg->src; - fq->daddr = *arg->dst; + BUILD_BUG_ON(sizeof(*key) > sizeof(q->key)); + memcpy(&q->key, key, sizeof(*key)); } static void lowpan_frag_expire(struct timer_list *t) @@ -105,21 +73,17 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb, const struct ieee802154_addr *src, const struct ieee802154_addr *dst) { - struct inet_frag_queue *q; - struct lowpan_create_arg arg; - unsigned int hash; struct netns_ieee802154_lowpan *ieee802154_lowpan = net_ieee802154_lowpan(net); + struct frag_lowpan_compare_key key = { + .tag = cb->d_tag, + .d_size = cb->d_size, + .src = *src, + .dst = *dst, + }; + struct inet_frag_queue *q; - arg.tag = cb->d_tag; - arg.d_size = cb->d_size; - arg.src = src; - arg.dst = dst; - - hash = lowpan_hash_frag(cb->d_tag, cb->d_size, src, dst); - - q = inet_frag_find(&ieee802154_lowpan->frags, - &lowpan_frags, &arg, hash); + q = inet_frag_find(&ieee802154_lowpan->frags, &key); if (IS_ERR_OR_NULL(q)) { inet_frag_maybe_warn_overflow(q, pr_fmt()); return NULL; @@ -611,17 +575,46 @@ static struct pernet_operations lowpan_frags_ops = { .exit = lowpan_frags_exit_net, }; +static u32 lowpan_key_hashfn(const void *data, u32 len, u32 seed) +{ + return jhash2(data, + sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed); +} + +static u32 lowpan_obj_hashfn(const void *data, u32 len, u32 seed) +{ + const struct inet_frag_queue *fq = data; + + return jhash2((const u32 *)&fq->key, + sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed); +} + +static int lowpan_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr) +{ + const struct frag_lowpan_compare_key *key = arg->key; + const struct inet_frag_queue *fq = ptr; + + return !!memcmp(&fq->key, key, sizeof(*key)); +} + +static const struct rhashtable_params lowpan_rhash_params = { + .head_offset = offsetof(struct inet_frag_queue, node), + .hashfn = lowpan_key_hashfn, + .obj_hashfn = lowpan_obj_hashfn, + .obj_cmpfn = lowpan_obj_cmpfn, + .automatic_shrinking = true, +}; + int __init lowpan_net_frag_init(void) { int ret; - lowpan_frags.hashfn = lowpan_hashfn; lowpan_frags.constructor = lowpan_frag_init; lowpan_frags.destructor = NULL; lowpan_frags.qsize = sizeof(struct frag_queue); - lowpan_frags.match = lowpan_frag_match; lowpan_frags.frag_expire = lowpan_frag_expire; lowpan_frags.frags_cache_name = lowpan_frags_cache_name; + lowpan_frags.rhash_params = lowpan_rhash_params; ret = inet_frags_init(&lowpan_frags); if (ret) goto out; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 97e747b1e9a0e..ebb8f411e0db1 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -25,12 +25,6 @@ #include #include -#define INETFRAGS_EVICT_BUCKETS 128 -#define INETFRAGS_EVICT_MAX 512 - -/* don't rebuild inetfrag table with new secret more often than this */ -#define INETFRAGS_MIN_REBUILD_INTERVAL (5 * HZ) - /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements * Value : 0xff if frame should be dropped. * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field @@ -52,157 +46,8 @@ const u8 ip_frag_ecn_table[16] = { }; EXPORT_SYMBOL(ip_frag_ecn_table); -static unsigned int -inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q) -{ - return f->hashfn(q) & (INETFRAGS_HASHSZ - 1); -} - -static bool inet_frag_may_rebuild(struct inet_frags *f) -{ - return time_after(jiffies, - f->last_rebuild_jiffies + INETFRAGS_MIN_REBUILD_INTERVAL); -} - -static void inet_frag_secret_rebuild(struct inet_frags *f) -{ - int i; - - write_seqlock_bh(&f->rnd_seqlock); - - if (!inet_frag_may_rebuild(f)) - goto out; - - get_random_bytes(&f->rnd, sizeof(u32)); - - for (i = 0; i < INETFRAGS_HASHSZ; i++) { - struct inet_frag_bucket *hb; - struct inet_frag_queue *q; - struct hlist_node *n; - - hb = &f->hash[i]; - spin_lock(&hb->chain_lock); - - hlist_for_each_entry_safe(q, n, &hb->chain, list) { - unsigned int hval = inet_frag_hashfn(f, q); - - if (hval != i) { - struct inet_frag_bucket *hb_dest; - - hlist_del(&q->list); - - /* Relink to new hash chain. */ - hb_dest = &f->hash[hval]; - - /* This is the only place where we take - * another chain_lock while already holding - * one. As this will not run concurrently, - * we cannot deadlock on hb_dest lock below, if its - * already locked it will be released soon since - * other caller cannot be waiting for hb lock - * that we've taken above. - */ - spin_lock_nested(&hb_dest->chain_lock, - SINGLE_DEPTH_NESTING); - hlist_add_head(&q->list, &hb_dest->chain); - spin_unlock(&hb_dest->chain_lock); - } - } - spin_unlock(&hb->chain_lock); - } - - f->rebuild = false; - f->last_rebuild_jiffies = jiffies; -out: - write_sequnlock_bh(&f->rnd_seqlock); -} - -static bool inet_fragq_should_evict(const struct inet_frag_queue *q) -{ - if (!hlist_unhashed(&q->list_evictor)) - return false; - - return q->net->low_thresh == 0 || - frag_mem_limit(q->net) >= q->net->low_thresh; -} - -static unsigned int -inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb) -{ - struct inet_frag_queue *fq; - struct hlist_node *n; - unsigned int evicted = 0; - HLIST_HEAD(expired); - - spin_lock(&hb->chain_lock); - - hlist_for_each_entry_safe(fq, n, &hb->chain, list) { - if (!inet_fragq_should_evict(fq)) - continue; - - if (!del_timer(&fq->timer)) - continue; - - hlist_add_head(&fq->list_evictor, &expired); - ++evicted; - } - - spin_unlock(&hb->chain_lock); - - hlist_for_each_entry_safe(fq, n, &expired, list_evictor) - f->frag_expire(&fq->timer); - - return evicted; -} - -static void inet_frag_worker(struct work_struct *work) -{ - unsigned int budget = INETFRAGS_EVICT_BUCKETS; - unsigned int i, evicted = 0; - struct inet_frags *f; - - f = container_of(work, struct inet_frags, frags_work); - - BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ); - - local_bh_disable(); - - for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) { - evicted += inet_evict_bucket(f, &f->hash[i]); - i = (i + 1) & (INETFRAGS_HASHSZ - 1); - if (evicted > INETFRAGS_EVICT_MAX) - break; - } - - f->next_bucket = i; - - local_bh_enable(); - - if (f->rebuild && inet_frag_may_rebuild(f)) - inet_frag_secret_rebuild(f); -} - -static void inet_frag_schedule_worker(struct inet_frags *f) -{ - if (unlikely(!work_pending(&f->frags_work))) - schedule_work(&f->frags_work); -} - int inet_frags_init(struct inet_frags *f) { - int i; - - INIT_WORK(&f->frags_work, inet_frag_worker); - - for (i = 0; i < INETFRAGS_HASHSZ; i++) { - struct inet_frag_bucket *hb = &f->hash[i]; - - spin_lock_init(&hb->chain_lock); - INIT_HLIST_HEAD(&hb->chain); - } - - seqlock_init(&f->rnd_seqlock); - f->last_rebuild_jiffies = 0; f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0, NULL); if (!f->frags_cachep) @@ -214,66 +59,42 @@ EXPORT_SYMBOL(inet_frags_init); void inet_frags_fini(struct inet_frags *f) { - cancel_work_sync(&f->frags_work); + /* We must wait that all inet_frag_destroy_rcu() have completed. */ + rcu_barrier(); + kmem_cache_destroy(f->frags_cachep); + f->frags_cachep = NULL; } EXPORT_SYMBOL(inet_frags_fini); -void inet_frags_exit_net(struct netns_frags *nf) -{ - struct inet_frags *f =nf->f; - unsigned int seq; - int i; - - nf->low_thresh = 0; - -evict_again: - local_bh_disable(); - seq = read_seqbegin(&f->rnd_seqlock); - - for (i = 0; i < INETFRAGS_HASHSZ ; i++) - inet_evict_bucket(f, &f->hash[i]); - - local_bh_enable(); - cond_resched(); - - if (read_seqretry(&f->rnd_seqlock, seq) || - sum_frag_mem_limit(nf)) - goto evict_again; -} -EXPORT_SYMBOL(inet_frags_exit_net); - -static struct inet_frag_bucket * -get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f) -__acquires(hb->chain_lock) +static void inet_frags_free_cb(void *ptr, void *arg) { - struct inet_frag_bucket *hb; - unsigned int seq, hash; - - restart: - seq = read_seqbegin(&f->rnd_seqlock); + struct inet_frag_queue *fq = ptr; - hash = inet_frag_hashfn(f, fq); - hb = &f->hash[hash]; + /* If we can not cancel the timer, it means this frag_queue + * is already disappearing, we have nothing to do. + * Otherwise, we own a refcount until the end of this function. + */ + if (!del_timer(&fq->timer)) + return; - spin_lock(&hb->chain_lock); - if (read_seqretry(&f->rnd_seqlock, seq)) { - spin_unlock(&hb->chain_lock); - goto restart; + spin_lock_bh(&fq->lock); + if (!(fq->flags & INET_FRAG_COMPLETE)) { + fq->flags |= INET_FRAG_COMPLETE; + refcount_dec(&fq->refcnt); } + spin_unlock_bh(&fq->lock); - return hb; + inet_frag_put(fq); } -static inline void fq_unlink(struct inet_frag_queue *fq) +void inet_frags_exit_net(struct netns_frags *nf) { - struct inet_frag_bucket *hb; + nf->low_thresh = 0; /* prevent creation of new frags */ - hb = get_frag_bucket_locked(fq, fq->net->f); - hlist_del(&fq->list); - fq->flags |= INET_FRAG_COMPLETE; - spin_unlock(&hb->chain_lock); + rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL); } +EXPORT_SYMBOL(inet_frags_exit_net); void inet_frag_kill(struct inet_frag_queue *fq) { @@ -281,12 +102,26 @@ void inet_frag_kill(struct inet_frag_queue *fq) refcount_dec(&fq->refcnt); if (!(fq->flags & INET_FRAG_COMPLETE)) { - fq_unlink(fq); + struct netns_frags *nf = fq->net; + + fq->flags |= INET_FRAG_COMPLETE; + rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params); refcount_dec(&fq->refcnt); } } EXPORT_SYMBOL(inet_frag_kill); +static void inet_frag_destroy_rcu(struct rcu_head *head) +{ + struct inet_frag_queue *q = container_of(head, struct inet_frag_queue, + rcu); + struct inet_frags *f = q->net->f; + + if (f->destructor) + f->destructor(q); + kmem_cache_free(f->frags_cachep, q); +} + void inet_frag_destroy(struct inet_frag_queue *q) { struct sk_buff *fp; @@ -310,55 +145,21 @@ void inet_frag_destroy(struct inet_frag_queue *q) } sum = sum_truesize + f->qsize; - if (f->destructor) - f->destructor(q); - kmem_cache_free(f->frags_cachep, q); + call_rcu(&q->rcu, inet_frag_destroy_rcu); sub_frag_mem_limit(nf, sum); } EXPORT_SYMBOL(inet_frag_destroy); -static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, - struct inet_frag_queue *qp_in, - struct inet_frags *f, - void *arg) -{ - struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f); - struct inet_frag_queue *qp; - -#ifdef CONFIG_SMP - /* With SMP race we have to recheck hash table, because - * such entry could have been created on other cpu before - * we acquired hash bucket lock. - */ - hlist_for_each_entry(qp, &hb->chain, list) { - if (qp->net == nf && f->match(qp, arg)) { - refcount_inc(&qp->refcnt); - spin_unlock(&hb->chain_lock); - qp_in->flags |= INET_FRAG_COMPLETE; - inet_frag_put(qp_in); - return qp; - } - } -#endif - qp = qp_in; - if (!mod_timer(&qp->timer, jiffies + nf->timeout)) - refcount_inc(&qp->refcnt); - - refcount_inc(&qp->refcnt); - hlist_add_head(&qp->list, &hb->chain); - - spin_unlock(&hb->chain_lock); - - return qp; -} - static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, struct inet_frags *f, void *arg) { struct inet_frag_queue *q; + if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) + return NULL; + q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC); if (!q) return NULL; @@ -369,64 +170,52 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, timer_setup(&q->timer, f->frag_expire, 0); spin_lock_init(&q->lock); - refcount_set(&q->refcnt, 1); + refcount_set(&q->refcnt, 3); return q; } static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, - struct inet_frags *f, void *arg) { + struct inet_frags *f = nf->f; struct inet_frag_queue *q; + int err; q = inet_frag_alloc(nf, f, arg); if (!q) return NULL; - return inet_frag_intern(nf, q, f, arg); -} + mod_timer(&q->timer, jiffies + nf->timeout); -struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, - struct inet_frags *f, void *key, - unsigned int hash) -{ - struct inet_frag_bucket *hb; - struct inet_frag_queue *q; - int depth = 0; - - if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) { - inet_frag_schedule_worker(f); + err = rhashtable_insert_fast(&nf->rhashtable, &q->node, + f->rhash_params); + if (err < 0) { + q->flags |= INET_FRAG_COMPLETE; + inet_frag_kill(q); + inet_frag_destroy(q); return NULL; } + return q; +} - if (frag_mem_limit(nf) > nf->low_thresh) - inet_frag_schedule_worker(f); - - hash &= (INETFRAGS_HASHSZ - 1); - hb = &f->hash[hash]; - - spin_lock(&hb->chain_lock); - hlist_for_each_entry(q, &hb->chain, list) { - if (q->net == nf && f->match(q, key)) { - refcount_inc(&q->refcnt); - spin_unlock(&hb->chain_lock); - return q; - } - depth++; - } - spin_unlock(&hb->chain_lock); +/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */ +struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key) +{ + struct inet_frag_queue *fq; - if (depth <= INETFRAGS_MAXDEPTH) - return inet_frag_create(nf, f, key); + rcu_read_lock(); - if (inet_frag_may_rebuild(f)) { - if (!f->rebuild) - f->rebuild = true; - inet_frag_schedule_worker(f); + fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params); + if (fq) { + if (!refcount_inc_not_zero(&fq->refcnt)) + fq = NULL; + rcu_read_unlock(); + return fq; } + rcu_read_unlock(); - return ERR_PTR(-ENOBUFS); + return inet_frag_create(nf, key); } EXPORT_SYMBOL(inet_frag_find); @@ -434,8 +223,7 @@ void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, const char *prefix) { static const char msg[] = "inet_frag_find: Fragment hash bucket" - " list length grew over limit " __stringify(INETFRAGS_MAXDEPTH) - ". Dropping fragment.\n"; + " list length grew over limit. Dropping fragment.\n"; if (PTR_ERR(q) == -ENOBUFS) net_dbg_ratelimited("%s%s", prefix, msg); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 5171c8cc0eb6b..1d3b82e96c453 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -69,15 +69,9 @@ struct ipfrag_skb_cb struct ipq { struct inet_frag_queue q; - u32 user; - __be32 saddr; - __be32 daddr; - __be16 id; - u8 protocol; u8 ecn; /* RFC3168 support */ u16 max_df_size; /* largest frag with DF set seen */ int iif; - int vif; /* L3 master device index */ unsigned int rid; struct inet_peer *peer; }; @@ -97,41 +91,6 @@ int ip_frag_mem(struct net *net) static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, struct net_device *dev); -struct ip4_create_arg { - struct iphdr *iph; - u32 user; - int vif; -}; - -static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) -{ - net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd)); - return jhash_3words((__force u32)id << 16 | prot, - (__force u32)saddr, (__force u32)daddr, - ip4_frags.rnd); -} - -static unsigned int ip4_hashfn(const struct inet_frag_queue *q) -{ - const struct ipq *ipq; - - ipq = container_of(q, struct ipq, q); - return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol); -} - -static bool ip4_frag_match(const struct inet_frag_queue *q, const void *a) -{ - const struct ipq *qp; - const struct ip4_create_arg *arg = a; - - qp = container_of(q, struct ipq, q); - return qp->id == arg->iph->id && - qp->saddr == arg->iph->saddr && - qp->daddr == arg->iph->daddr && - qp->protocol == arg->iph->protocol && - qp->user == arg->user && - qp->vif == arg->vif; -} static void ip4_frag_init(struct inet_frag_queue *q, const void *a) { @@ -140,17 +99,12 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a) frags); struct net *net = container_of(ipv4, struct net, ipv4); - const struct ip4_create_arg *arg = a; + const struct frag_v4_compare_key *key = a; - qp->protocol = arg->iph->protocol; - qp->id = arg->iph->id; - qp->ecn = ip4_frag_ecn(arg->iph->tos); - qp->saddr = arg->iph->saddr; - qp->daddr = arg->iph->daddr; - qp->vif = arg->vif; - qp->user = arg->user; + q->key.v4 = *key; + qp->ecn = 0; qp->peer = q->net->max_dist ? - inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, arg->vif, 1) : + inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) : NULL; } @@ -234,7 +188,7 @@ static void ip_expire(struct timer_list *t) /* Only an end host needs to send an ICMP * "Fragment Reassembly Timeout" message, per RFC792. */ - if (frag_expire_skip_icmp(qp->user) && + if (frag_expire_skip_icmp(qp->q.key.v4.user) && (skb_rtable(head)->rt_type != RTN_LOCAL)) goto out; @@ -262,17 +216,17 @@ out_rcu_unlock: static struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user, int vif) { + struct frag_v4_compare_key key = { + .saddr = iph->saddr, + .daddr = iph->daddr, + .user = user, + .vif = vif, + .id = iph->id, + .protocol = iph->protocol, + }; struct inet_frag_queue *q; - struct ip4_create_arg arg; - unsigned int hash; - - arg.iph = iph; - arg.user = user; - arg.vif = vif; - hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); - - q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); + q = inet_frag_find(&net->ipv4.frags, &key); if (IS_ERR_OR_NULL(q)) { inet_frag_maybe_warn_overflow(q, pr_fmt()); return NULL; @@ -661,7 +615,7 @@ out_nomem: err = -ENOMEM; goto out_fail; out_oversize: - net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->saddr); + net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->q.key.v4.saddr); out_fail: __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); return err; @@ -899,15 +853,47 @@ static struct pernet_operations ip4_frags_ops = { .exit = ipv4_frags_exit_net, }; + +static u32 ip4_key_hashfn(const void *data, u32 len, u32 seed) +{ + return jhash2(data, + sizeof(struct frag_v4_compare_key) / sizeof(u32), seed); +} + +static u32 ip4_obj_hashfn(const void *data, u32 len, u32 seed) +{ + const struct inet_frag_queue *fq = data; + + return jhash2((const u32 *)&fq->key.v4, + sizeof(struct frag_v4_compare_key) / sizeof(u32), seed); +} + +static int ip4_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr) +{ + const struct frag_v4_compare_key *key = arg->key; + const struct inet_frag_queue *fq = ptr; + + return !!memcmp(&fq->key, key, sizeof(*key)); +} + +static const struct rhashtable_params ip4_rhash_params = { + .head_offset = offsetof(struct inet_frag_queue, node), + .key_offset = offsetof(struct inet_frag_queue, key), + .key_len = sizeof(struct frag_v4_compare_key), + .hashfn = ip4_key_hashfn, + .obj_hashfn = ip4_obj_hashfn, + .obj_cmpfn = ip4_obj_cmpfn, + .automatic_shrinking = true, +}; + void __init ipfrag_init(void) { - ip4_frags.hashfn = ip4_hashfn; ip4_frags.constructor = ip4_frag_init; ip4_frags.destructor = ip4_frag_free; ip4_frags.qsize = sizeof(struct ipq); - ip4_frags.match = ip4_frag_match; ip4_frags.frag_expire = ip_expire; ip4_frags.frags_cache_name = ip_frag_cache_name; + ip4_frags.rhash_params = ip4_rhash_params; if (inet_frags_init(&ip4_frags)) panic("IP: failed to allocate ip4_frags cache\n"); ip4_frags_ctl_register(); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index bc776ef392ea2..8b12431ae2961 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -152,23 +152,6 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); } -static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr, - const struct in6_addr *daddr) -{ - net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd)); - return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), - (__force u32)id, nf_frags.rnd); -} - - -static unsigned int nf_hashfn(const struct inet_frag_queue *q) -{ - const struct frag_queue *nq; - - nq = container_of(q, struct frag_queue, q); - return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr); -} - static void nf_ct_frag6_expire(struct timer_list *t) { struct inet_frag_queue *frag = from_timer(frag, t, timer); @@ -182,26 +165,19 @@ static void nf_ct_frag6_expire(struct timer_list *t) } /* Creation primitives. */ -static inline struct frag_queue *fq_find(struct net *net, __be32 id, - u32 user, struct in6_addr *src, - struct in6_addr *dst, int iif, u8 ecn) +static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, + const struct ipv6hdr *hdr, int iif) { + struct frag_v6_compare_key key = { + .id = id, + .saddr = hdr->saddr, + .daddr = hdr->daddr, + .user = user, + .iif = iif, + }; struct inet_frag_queue *q; - struct ip6_create_arg arg; - unsigned int hash; - - arg.id = id; - arg.user = user; - arg.src = src; - arg.dst = dst; - arg.iif = iif; - arg.ecn = ecn; - - local_bh_disable(); - hash = nf_hash_frag(id, src, dst); - q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash); - local_bh_enable(); + q = inet_frag_find(&net->nf_frag.frags, &key); if (IS_ERR_OR_NULL(q)) { inet_frag_maybe_warn_overflow(q, pr_fmt()); return NULL; @@ -593,8 +569,8 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) fhdr = (struct frag_hdr *)skb_transport_header(skb); skb_orphan(skb); - fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, - skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr)); + fq = fq_find(net, fhdr->identification, user, hdr, + skb->dev ? skb->dev->ifindex : 0); if (fq == NULL) { pr_debug("Can't find and can't create new queue\n"); return -ENOMEM; @@ -662,13 +638,12 @@ int nf_ct_frag6_init(void) { int ret = 0; - nf_frags.hashfn = nf_hashfn; nf_frags.constructor = ip6_frag_init; nf_frags.destructor = NULL; nf_frags.qsize = sizeof(struct frag_queue); - nf_frags.match = ip6_frag_match; nf_frags.frag_expire = nf_ct_frag6_expire; nf_frags.frags_cache_name = nf_frags_cache_name; + nf_frags.rhash_params = ip6_rhash_params; ret = inet_frags_init(&nf_frags); if (ret) goto out; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index f0071b113a92f..3fc853e4492ab 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -79,52 +79,13 @@ static struct inet_frags ip6_frags; static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev); -/* - * callers should be careful not to use the hash value outside the ipfrag_lock - * as doing so could race with ipfrag_hash_rnd being recalculated. - */ -static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, - const struct in6_addr *daddr) -{ - net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd)); - return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), - (__force u32)id, ip6_frags.rnd); -} - -static unsigned int ip6_hashfn(const struct inet_frag_queue *q) -{ - const struct frag_queue *fq; - - fq = container_of(q, struct frag_queue, q); - return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr); -} - -bool ip6_frag_match(const struct inet_frag_queue *q, const void *a) -{ - const struct frag_queue *fq; - const struct ip6_create_arg *arg = a; - - fq = container_of(q, struct frag_queue, q); - return fq->id == arg->id && - fq->user == arg->user && - ipv6_addr_equal(&fq->saddr, arg->src) && - ipv6_addr_equal(&fq->daddr, arg->dst) && - (arg->iif == fq->iif || - !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST | - IPV6_ADDR_LINKLOCAL))); -} -EXPORT_SYMBOL(ip6_frag_match); - void ip6_frag_init(struct inet_frag_queue *q, const void *a) { struct frag_queue *fq = container_of(q, struct frag_queue, q); - const struct ip6_create_arg *arg = a; + const struct frag_v6_compare_key *key = a; - fq->id = arg->id; - fq->user = arg->user; - fq->saddr = *arg->src; - fq->daddr = *arg->dst; - fq->ecn = arg->ecn; + q->key.v6 = *key; + fq->ecn = 0; } EXPORT_SYMBOL(ip6_frag_init); @@ -182,23 +143,22 @@ static void ip6_frag_expire(struct timer_list *t) } static struct frag_queue * -fq_find(struct net *net, __be32 id, const struct in6_addr *src, - const struct in6_addr *dst, int iif, u8 ecn) +fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif) { + struct frag_v6_compare_key key = { + .id = id, + .saddr = hdr->saddr, + .daddr = hdr->daddr, + .user = IP6_DEFRAG_LOCAL_DELIVER, + .iif = iif, + }; struct inet_frag_queue *q; - struct ip6_create_arg arg; - unsigned int hash; - - arg.id = id; - arg.user = IP6_DEFRAG_LOCAL_DELIVER; - arg.src = src; - arg.dst = dst; - arg.iif = iif; - arg.ecn = ecn; - hash = inet6_hash_frag(id, src, dst); + if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST | + IPV6_ADDR_LINKLOCAL))) + key.iif = 0; - q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); + q = inet_frag_find(&net->ipv6.frags, &key); if (IS_ERR_OR_NULL(q)) { inet_frag_maybe_warn_overflow(q, pr_fmt()); return NULL; @@ -530,6 +490,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) struct frag_queue *fq; const struct ipv6hdr *hdr = ipv6_hdr(skb); struct net *net = dev_net(skb_dst(skb)->dev); + int iif; if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED) goto fail_hdr; @@ -558,13 +519,14 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return 1; } - fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, - skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr)); + iif = skb->dev ? skb->dev->ifindex : 0; + fq = fq_find(net, fhdr->identification, hdr, iif); if (fq) { int ret; spin_lock(&fq->q.lock); + fq->iif = iif; ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); spin_unlock(&fq->q.lock); @@ -738,17 +700,47 @@ static struct pernet_operations ip6_frags_ops = { .exit = ipv6_frags_exit_net, }; +static u32 ip6_key_hashfn(const void *data, u32 len, u32 seed) +{ + return jhash2(data, + sizeof(struct frag_v6_compare_key) / sizeof(u32), seed); +} + +static u32 ip6_obj_hashfn(const void *data, u32 len, u32 seed) +{ + const struct inet_frag_queue *fq = data; + + return jhash2((const u32 *)&fq->key.v6, + sizeof(struct frag_v6_compare_key) / sizeof(u32), seed); +} + +static int ip6_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr) +{ + const struct frag_v6_compare_key *key = arg->key; + const struct inet_frag_queue *fq = ptr; + + return !!memcmp(&fq->key, key, sizeof(*key)); +} + +const struct rhashtable_params ip6_rhash_params = { + .head_offset = offsetof(struct inet_frag_queue, node), + .hashfn = ip6_key_hashfn, + .obj_hashfn = ip6_obj_hashfn, + .obj_cmpfn = ip6_obj_cmpfn, + .automatic_shrinking = true, +}; +EXPORT_SYMBOL(ip6_rhash_params); + int __init ipv6_frag_init(void) { int ret; - ip6_frags.hashfn = ip6_hashfn; ip6_frags.constructor = ip6_frag_init; ip6_frags.destructor = NULL; ip6_frags.qsize = sizeof(struct frag_queue); - ip6_frags.match = ip6_frag_match; ip6_frags.frag_expire = ip6_frag_expire; ip6_frags.frags_cache_name = ip6_frag_cache_name; + ip6_frags.rhash_params = ip6_rhash_params; ret = inet_frags_init(&ip6_frags); if (ret) goto out; -- GitLab From bd3df633f17d64523828d0ef5d74e4f1a768683c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Sep 2018 07:58:42 -0700 Subject: [PATCH 0306/2269] inet: frags: remove some helpers Remove sum_frag_mem_limit(), ip_frag_mem() & ip6_frag_mem() Also since we use rhashtable we can bring back the number of fragments in "grep FRAG /proc/net/sockstat /proc/net/sockstat6" that was removed in commit 434d305405ab ("inet: frag: don't account number of fragment queues") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller (cherry picked from commit 6befe4a78b1553edb6eed3a78b4bcd9748526672) Signed-off-by: Greg Kroah-Hartman --- include/net/inet_frag.h | 5 ----- include/net/ip.h | 1 - include/net/ipv6.h | 7 ------- net/ipv4/ip_fragment.c | 5 ----- net/ipv4/proc.c | 6 +++--- net/ipv6/proc.c | 5 +++-- 6 files changed, 6 insertions(+), 23 deletions(-) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 3fec0d3a0d018..4b5449df0aadf 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -141,11 +141,6 @@ static inline void add_frag_mem_limit(struct netns_frags *nf, int i) atomic_add(i, &nf->mem); } -static inline int sum_frag_mem_limit(struct netns_frags *nf) -{ - return atomic_read(&nf->mem); -} - /* RFC 3168 support : * We want to check ECN values of all fragments, do detect invalid combinations. * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value. diff --git a/include/net/ip.h b/include/net/ip.h index 81da1123fc8ea..7c430343176af 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -570,7 +570,6 @@ static inline struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *s return skb; } #endif -int ip_frag_mem(struct net *net); /* * Functions provided by ip_forward.c diff --git a/include/net/ipv6.h b/include/net/ipv6.h index a271611d341d8..fa87a62e9bd3b 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -331,13 +331,6 @@ static inline bool ipv6_accept_ra(struct inet6_dev *idev) idev->cnf.accept_ra; } -#if IS_ENABLED(CONFIG_IPV6) -static inline int ip6_frag_mem(struct net *net) -{ - return sum_frag_mem_limit(&net->ipv6.frags); -} -#endif - #define IPV6_FRAG_HIGH_THRESH (4 * 1024*1024) /* 4194304 */ #define IPV6_FRAG_LOW_THRESH (3 * 1024*1024) /* 3145728 */ #define IPV6_FRAG_TIMEOUT (60 * HZ) /* 60 seconds */ diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 1d3b82e96c453..94451fad99947 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -83,11 +83,6 @@ static u8 ip4_frag_ecn(u8 tos) static struct inet_frags ip4_frags; -int ip_frag_mem(struct net *net) -{ - return sum_frag_mem_limit(&net->ipv4.frags); -} - static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, struct net_device *dev); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 127153f1ed8a7..01a337c3a36be 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -54,7 +54,6 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; - unsigned int frag_mem; int orphans, sockets; orphans = percpu_counter_sum_positive(&tcp_orphan_count); @@ -72,8 +71,9 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) sock_prot_inuse_get(net, &udplite_prot)); seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(net, &raw_prot)); - frag_mem = ip_frag_mem(net); - seq_printf(seq, "FRAG: inuse %u memory %u\n", !!frag_mem, frag_mem); + seq_printf(seq, "FRAG: inuse %u memory %u\n", + atomic_read(&net->ipv4.frags.rhashtable.nelems), + frag_mem_limit(&net->ipv4.frags)); return 0; } diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index e88bcb8ff0fd7..5704ec3d31780 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -38,7 +38,6 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; - unsigned int frag_mem = ip6_frag_mem(net); seq_printf(seq, "TCP6: inuse %d\n", sock_prot_inuse_get(net, &tcpv6_prot)); @@ -48,7 +47,9 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) sock_prot_inuse_get(net, &udplitev6_prot)); seq_printf(seq, "RAW6: inuse %d\n", sock_prot_inuse_get(net, &rawv6_prot)); - seq_printf(seq, "FRAG6: inuse %u memory %u\n", !!frag_mem, frag_mem); + seq_printf(seq, "FRAG6: inuse %u memory %u\n", + atomic_read(&net->ipv6.frags.rhashtable.nelems), + frag_mem_limit(&net->ipv6.frags)); return 0; } -- GitLab From 5b1b3ad46dd100932925e979562aeefaf9ef189e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Sep 2018 07:58:43 -0700 Subject: [PATCH 0307/2269] inet: frags: get rif of inet_frag_evicting() This refactors ip_expire() since one indentation level is removed. Note: in the future, we should try hard to avoid the skb_clone() since this is a serious performance cost. Under DDOS, the ICMP message wont be sent because of rate limits. Fact that ip6_expire_frag_queue() does not use skb_clone() is disturbing too. Presumably IPv6 should have the same issue than the one we fixed in commit ec4fbd64751d ("inet: frag: release spinlock before calling icmp_send()") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller (cherry picked from commit 399d1404be660d355192ff4df5ccc3f4159ec1e4) Signed-off-by: Greg Kroah-Hartman --- include/net/inet_frag.h | 5 ---- net/ipv4/ip_fragment.c | 65 ++++++++++++++++++++--------------------- net/ipv6/reassembly.c | 4 --- 3 files changed, 32 insertions(+), 42 deletions(-) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 4b5449df0aadf..0e8e159d88f7f 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -119,11 +119,6 @@ static inline void inet_frag_put(struct inet_frag_queue *q) inet_frag_destroy(q); } -static inline bool inet_frag_evicting(struct inet_frag_queue *q) -{ - return false; -} - /* Memory Tracking Functions. */ static inline int frag_mem_limit(struct netns_frags *nf) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 94451fad99947..1222aee3e5eea 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -143,8 +143,11 @@ static bool frag_expire_skip_icmp(u32 user) static void ip_expire(struct timer_list *t) { struct inet_frag_queue *frag = from_timer(frag, t, timer); - struct ipq *qp; + struct sk_buff *clone, *head; + const struct iphdr *iph; struct net *net; + struct ipq *qp; + int err; qp = container_of(frag, struct ipq, q); net = container_of(qp->q.net, struct net, ipv4.frags); @@ -158,45 +161,41 @@ static void ip_expire(struct timer_list *t) ipq_kill(qp); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); - if (!inet_frag_evicting(&qp->q)) { - struct sk_buff *clone, *head = qp->q.fragments; - const struct iphdr *iph; - int err; + head = qp->q.fragments; - __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT); + __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT); - if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments) - goto out; + if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !head) + goto out; - head->dev = dev_get_by_index_rcu(net, qp->iif); - if (!head->dev) - goto out; + head->dev = dev_get_by_index_rcu(net, qp->iif); + if (!head->dev) + goto out; - /* skb has no dst, perform route lookup again */ - iph = ip_hdr(head); - err = ip_route_input_noref(head, iph->daddr, iph->saddr, + /* skb has no dst, perform route lookup again */ + iph = ip_hdr(head); + err = ip_route_input_noref(head, iph->daddr, iph->saddr, iph->tos, head->dev); - if (err) - goto out; + if (err) + goto out; - /* Only an end host needs to send an ICMP - * "Fragment Reassembly Timeout" message, per RFC792. - */ - if (frag_expire_skip_icmp(qp->q.key.v4.user) && - (skb_rtable(head)->rt_type != RTN_LOCAL)) - goto out; - - clone = skb_clone(head, GFP_ATOMIC); - - /* Send an ICMP "Fragment Reassembly Timeout" message. */ - if (clone) { - spin_unlock(&qp->q.lock); - icmp_send(clone, ICMP_TIME_EXCEEDED, - ICMP_EXC_FRAGTIME, 0); - consume_skb(clone); - goto out_rcu_unlock; - } + /* Only an end host needs to send an ICMP + * "Fragment Reassembly Timeout" message, per RFC792. + */ + if (frag_expire_skip_icmp(qp->q.key.v4.user) && + (skb_rtable(head)->rt_type != RTN_LOCAL)) + goto out; + + clone = skb_clone(head, GFP_ATOMIC); + + /* Send an ICMP "Fragment Reassembly Timeout" message. */ + if (clone) { + spin_unlock(&qp->q.lock); + icmp_send(clone, ICMP_TIME_EXCEEDED, + ICMP_EXC_FRAGTIME, 0); + consume_skb(clone); + goto out_rcu_unlock; } out: spin_unlock(&qp->q.lock); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 3fc853e4492ab..70acad126d044 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -106,10 +106,6 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq) goto out_rcu_unlock; __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); - - if (inet_frag_evicting(&fq->q)) - goto out_rcu_unlock; - __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); /* Don't send error if the first segment did not arrive. */ -- GitLab From caa4249eca082c5954ea377aa3ef86b5fc5c1ac1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Sep 2018 07:58:44 -0700 Subject: [PATCH 0308/2269] inet: frags: remove inet_frag_maybe_warn_overflow() This function is obsolete, after rhashtable addition to inet defrag. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller (cherry picked from commit 2d44ed22e607f9a285b049de2263e3840673a260) Signed-off-by: Greg Kroah-Hartman --- include/net/inet_frag.h | 2 -- net/ieee802154/6lowpan/reassembly.c | 5 ++--- net/ipv4/inet_fragment.c | 11 ----------- net/ipv4/ip_fragment.c | 5 ++--- net/ipv6/netfilter/nf_conntrack_reasm.c | 5 ++--- net/ipv6/reassembly.c | 5 ++--- 6 files changed, 8 insertions(+), 25 deletions(-) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 0e8e159d88f7f..95e353e3305b4 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -110,8 +110,6 @@ void inet_frags_exit_net(struct netns_frags *nf); void inet_frag_kill(struct inet_frag_queue *q); void inet_frag_destroy(struct inet_frag_queue *q); struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key); -void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, - const char *prefix); static inline void inet_frag_put(struct inet_frag_queue *q) { diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 0fa0121f85d45..1aec71a3f9047 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -84,10 +84,9 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb, struct inet_frag_queue *q; q = inet_frag_find(&ieee802154_lowpan->frags, &key); - if (IS_ERR_OR_NULL(q)) { - inet_frag_maybe_warn_overflow(q, pr_fmt()); + if (!q) return NULL; - } + return container_of(q, struct lowpan_frag_queue, q); } diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index ebb8f411e0db1..c9e35b81d0931 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -218,14 +218,3 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key) return inet_frag_create(nf, key); } EXPORT_SYMBOL(inet_frag_find); - -void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, - const char *prefix) -{ - static const char msg[] = "inet_frag_find: Fragment hash bucket" - " list length grew over limit. Dropping fragment.\n"; - - if (PTR_ERR(q) == -ENOBUFS) - net_dbg_ratelimited("%s%s", prefix, msg); -} -EXPORT_SYMBOL(inet_frag_maybe_warn_overflow); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 1222aee3e5eea..38cbf56bb48e9 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -221,10 +221,9 @@ static struct ipq *ip_find(struct net *net, struct iphdr *iph, struct inet_frag_queue *q; q = inet_frag_find(&net->ipv4.frags, &key); - if (IS_ERR_OR_NULL(q)) { - inet_frag_maybe_warn_overflow(q, pr_fmt()); + if (!q) return NULL; - } + return container_of(q, struct ipq, q); } diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 8b12431ae2961..54ce1d2a9a9dd 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -178,10 +178,9 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, struct inet_frag_queue *q; q = inet_frag_find(&net->nf_frag.frags, &key); - if (IS_ERR_OR_NULL(q)) { - inet_frag_maybe_warn_overflow(q, pr_fmt()); + if (!q) return NULL; - } + return container_of(q, struct frag_queue, q); } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 70acad126d044..2a77fda5e3bca 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -155,10 +155,9 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif) key.iif = 0; q = inet_frag_find(&net->ipv6.frags, &key); - if (IS_ERR_OR_NULL(q)) { - inet_frag_maybe_warn_overflow(q, pr_fmt()); + if (!q) return NULL; - } + return container_of(q, struct frag_queue, q); } -- GitLab From 990204ddc5f67530b2ac616767a5c6937c9fc2af Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Sep 2018 07:58:45 -0700 Subject: [PATCH 0309/2269] inet: frags: break the 2GB limit for frags storage Some users are willing to provision huge amounts of memory to be able to perform reassembly reasonnably well under pressure. Current memory tracking is using one atomic_t and integers. Switch to atomic_long_t so that 64bit arches can use more than 2GB, without any cost for 32bit arches. Note that this patch avoids an overflow error, if high_thresh was set to ~2GB, since this test in inet_frag_alloc() was never true : if (... || frag_mem_limit(nf) > nf->high_thresh) Tested: $ echo 16000000000 >/proc/sys/net/ipv4/ipfrag_high_thresh $ grep FRAG /proc/net/sockstat FRAG: inuse 14705885 memory 16000002880 $ nstat -n ; sleep 1 ; nstat | grep Reas IpReasmReqds 3317150 0.0 IpReasmFails 3317112 0.0 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller (cherry picked from commit 3e67f106f619dcfaf6f4e2039599bdb69848c714) Signed-off-by: Greg Kroah-Hartman --- Documentation/networking/ip-sysctl.txt | 4 ++-- include/net/inet_frag.h | 20 ++++++++++---------- net/ieee802154/6lowpan/reassembly.c | 10 +++++----- net/ipv4/ip_fragment.c | 10 +++++----- net/ipv4/proc.c | 2 +- net/ipv6/netfilter/nf_conntrack_reasm.c | 10 +++++----- net/ipv6/proc.c | 2 +- net/ipv6/reassembly.c | 6 +++--- 8 files changed, 32 insertions(+), 32 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index f23582a3c661c..a054b5ad410ad 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -133,10 +133,10 @@ min_adv_mss - INTEGER IP Fragmentation: -ipfrag_high_thresh - INTEGER +ipfrag_high_thresh - LONG INTEGER Maximum memory used to reassemble IP fragments. -ipfrag_low_thresh - INTEGER +ipfrag_low_thresh - LONG INTEGER (Obsolete since linux-4.17) Maximum memory used to reassemble IP fragments before the kernel begins to remove incomplete fragment queues to free up resources. diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 95e353e3305b4..a52e7273e7a59 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -8,11 +8,11 @@ struct netns_frags { struct rhashtable rhashtable ____cacheline_aligned_in_smp; /* Keep atomic mem on separate cachelines in structs that include it */ - atomic_t mem ____cacheline_aligned_in_smp; + atomic_long_t mem ____cacheline_aligned_in_smp; /* sysctls */ + long high_thresh; + long low_thresh; int timeout; - int high_thresh; - int low_thresh; int max_dist; struct inet_frags *f; }; @@ -102,7 +102,7 @@ void inet_frags_fini(struct inet_frags *); static inline int inet_frags_init_net(struct netns_frags *nf) { - atomic_set(&nf->mem, 0); + atomic_long_set(&nf->mem, 0); return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params); } void inet_frags_exit_net(struct netns_frags *nf); @@ -119,19 +119,19 @@ static inline void inet_frag_put(struct inet_frag_queue *q) /* Memory Tracking Functions. */ -static inline int frag_mem_limit(struct netns_frags *nf) +static inline long frag_mem_limit(const struct netns_frags *nf) { - return atomic_read(&nf->mem); + return atomic_long_read(&nf->mem); } -static inline void sub_frag_mem_limit(struct netns_frags *nf, int i) +static inline void sub_frag_mem_limit(struct netns_frags *nf, long val) { - atomic_sub(i, &nf->mem); + atomic_long_sub(val, &nf->mem); } -static inline void add_frag_mem_limit(struct netns_frags *nf, int i) +static inline void add_frag_mem_limit(struct netns_frags *nf, long val) { - atomic_add(i, &nf->mem); + atomic_long_add(val, &nf->mem); } /* RFC 3168 support : diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 1aec71a3f9047..44f148a6bb57b 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -411,23 +411,23 @@ err: } #ifdef CONFIG_SYSCTL -static int zero; +static long zero; static struct ctl_table lowpan_frags_ns_ctl_table[] = { { .procname = "6lowpanfrag_high_thresh", .data = &init_net.ieee802154_lowpan.frags.high_thresh, - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_doulongvec_minmax, .extra1 = &init_net.ieee802154_lowpan.frags.low_thresh }, { .procname = "6lowpanfrag_low_thresh", .data = &init_net.ieee802154_lowpan.frags.low_thresh, - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_doulongvec_minmax, .extra1 = &zero, .extra2 = &init_net.ieee802154_lowpan.frags.high_thresh }, diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 38cbf56bb48e9..dc3ed0ac4c58c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -683,23 +683,23 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user) EXPORT_SYMBOL(ip_check_defrag); #ifdef CONFIG_SYSCTL -static int zero; +static long zero; static struct ctl_table ip4_frags_ns_ctl_table[] = { { .procname = "ipfrag_high_thresh", .data = &init_net.ipv4.frags.high_thresh, - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_doulongvec_minmax, .extra1 = &init_net.ipv4.frags.low_thresh }, { .procname = "ipfrag_low_thresh", .data = &init_net.ipv4.frags.low_thresh, - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_doulongvec_minmax, .extra1 = &zero, .extra2 = &init_net.ipv4.frags.high_thresh }, diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 01a337c3a36be..8fbd5633e5448 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -71,7 +71,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) sock_prot_inuse_get(net, &udplite_prot)); seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(net, &raw_prot)); - seq_printf(seq, "FRAG: inuse %u memory %u\n", + seq_printf(seq, "FRAG: inuse %u memory %lu\n", atomic_read(&net->ipv4.frags.rhashtable.nelems), frag_mem_limit(&net->ipv4.frags)); return 0; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 54ce1d2a9a9dd..6613f81e553ae 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -63,7 +63,7 @@ struct nf_ct_frag6_skb_cb static struct inet_frags nf_frags; #ifdef CONFIG_SYSCTL -static int zero; +static long zero; static struct ctl_table nf_ct_frag6_sysctl_table[] = { { @@ -76,18 +76,18 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = { { .procname = "nf_conntrack_frag6_low_thresh", .data = &init_net.nf_frag.frags.low_thresh, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_doulongvec_minmax, .extra1 = &zero, .extra2 = &init_net.nf_frag.frags.high_thresh }, { .procname = "nf_conntrack_frag6_high_thresh", .data = &init_net.nf_frag.frags.high_thresh, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_doulongvec_minmax, .extra1 = &init_net.nf_frag.frags.low_thresh }, { } diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 5704ec3d31780..dc04c024986ce 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -47,7 +47,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) sock_prot_inuse_get(net, &udplitev6_prot)); seq_printf(seq, "RAW6: inuse %d\n", sock_prot_inuse_get(net, &rawv6_prot)); - seq_printf(seq, "FRAG6: inuse %u memory %u\n", + seq_printf(seq, "FRAG6: inuse %u memory %lu\n", atomic_read(&net->ipv6.frags.rhashtable.nelems), frag_mem_limit(&net->ipv6.frags)); return 0; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 2a77fda5e3bca..905a8aee2671f 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -552,15 +552,15 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = { { .procname = "ip6frag_high_thresh", .data = &init_net.ipv6.frags.high_thresh, - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_doulongvec_minmax, .extra1 = &init_net.ipv6.frags.low_thresh }, { .procname = "ip6frag_low_thresh", .data = &init_net.ipv6.frags.low_thresh, - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &zero, -- GitLab From 085a0147447a4f82138825b6a3a329b997c2fb13 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Sep 2018 07:58:46 -0700 Subject: [PATCH 0310/2269] inet: frags: do not clone skb in ip_expire() An skb_clone() was added in commit ec4fbd64751d ("inet: frag: release spinlock before calling icmp_send()") While fixing the bug at that time, it also added a very high cost for DDOS frags, as the ICMP rate limit is applied after this expensive operation (skb_clone() + consume_skb(), implying memory allocations, copy, and freeing) We can use skb_get(head) here, all we want is to make sure skb wont be freed by another cpu. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller (cherry picked from commit 1eec5d5670084ee644597bd26c25e22c69b9f748) Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_fragment.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index dc3ed0ac4c58c..88fa8ffc55580 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -143,8 +143,8 @@ static bool frag_expire_skip_icmp(u32 user) static void ip_expire(struct timer_list *t) { struct inet_frag_queue *frag = from_timer(frag, t, timer); - struct sk_buff *clone, *head; const struct iphdr *iph; + struct sk_buff *head; struct net *net; struct ipq *qp; int err; @@ -187,16 +187,12 @@ static void ip_expire(struct timer_list *t) (skb_rtable(head)->rt_type != RTN_LOCAL)) goto out; - clone = skb_clone(head, GFP_ATOMIC); + skb_get(head); + spin_unlock(&qp->q.lock); + icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); + kfree_skb(head); + goto out_rcu_unlock; - /* Send an ICMP "Fragment Reassembly Timeout" message. */ - if (clone) { - spin_unlock(&qp->q.lock); - icmp_send(clone, ICMP_TIME_EXCEEDED, - ICMP_EXC_FRAGTIME, 0); - consume_skb(clone); - goto out_rcu_unlock; - } out: spin_unlock(&qp->q.lock); out_rcu_unlock: -- GitLab From 3226bdcb044862084c3bfc3278d148948600ebc4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Sep 2018 07:58:47 -0700 Subject: [PATCH 0311/2269] ipv6: frags: rewrite ip6_expire_frag_queue() Make it similar to IPv4 ip_expire(), and release the lock before calling icmp functions. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller (cherry picked from commit 05c0b86b9696802fd0ce5676a92a63f1b455bdf3) Signed-off-by: Greg Kroah-Hartman --- net/ipv6/reassembly.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 905a8aee2671f..2127da130dc2c 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -92,7 +92,9 @@ EXPORT_SYMBOL(ip6_frag_init); void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq) { struct net_device *dev = NULL; + struct sk_buff *head; + rcu_read_lock(); spin_lock(&fq->q.lock); if (fq->q.flags & INET_FRAG_COMPLETE) @@ -100,28 +102,34 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq) inet_frag_kill(&fq->q); - rcu_read_lock(); dev = dev_get_by_index_rcu(net, fq->iif); if (!dev) - goto out_rcu_unlock; + goto out; __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); /* Don't send error if the first segment did not arrive. */ - if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments) - goto out_rcu_unlock; + head = fq->q.fragments; + if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head) + goto out; /* But use as source device on which LAST ARRIVED * segment was received. And do not use fq->dev * pointer directly, device might already disappeared. */ - fq->q.fragments->dev = dev; - icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); -out_rcu_unlock: - rcu_read_unlock(); + head->dev = dev; + skb_get(head); + spin_unlock(&fq->q.lock); + + icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); + kfree_skb(head); + goto out_rcu_unlock; + out: spin_unlock(&fq->q.lock); +out_rcu_unlock: + rcu_read_unlock(); inet_frag_put(&fq->q); } EXPORT_SYMBOL(ip6_expire_frag_queue); -- GitLab From bd946fb5226e205530bea2581d867642e4b457ed Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Sep 2018 07:58:48 -0700 Subject: [PATCH 0312/2269] rhashtable: reorganize struct rhashtable layout While under frags DDOS I noticed unfortunate false sharing between @nelems and @params.automatic_shrinking Move @nelems at the end of struct rhashtable so that first cache line is shared between all cpus, because almost never dirtied. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller (cherry picked from commit e5d672a0780d9e7118caad4c171ec88b8299398d) Signed-off-by: Greg Kroah-Hartman --- include/linux/rhashtable.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 7fd514f36e74a..a4be6388a9803 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -152,25 +152,25 @@ struct rhashtable_params { /** * struct rhashtable - Hash table handle * @tbl: Bucket table - * @nelems: Number of elements in table * @key_len: Key length for hashfn - * @p: Configuration parameters * @max_elems: Maximum number of elements in table + * @p: Configuration parameters * @rhlist: True if this is an rhltable * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping * @lock: Spin lock to protect walker list + * @nelems: Number of elements in table */ struct rhashtable { struct bucket_table __rcu *tbl; - atomic_t nelems; unsigned int key_len; - struct rhashtable_params p; unsigned int max_elems; + struct rhashtable_params p; bool rhlist; struct work_struct run_work; struct mutex mutex; spinlock_t lock; + atomic_t nelems; }; /** -- GitLab From 8291cd943a9b4e2d764a4a294999bbb2f94f153c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Sep 2018 07:58:49 -0700 Subject: [PATCH 0313/2269] inet: frags: reorganize struct netns_frags Put the read-mostly fields in a separate cache line at the beginning of struct netns_frags, to reduce false sharing noticed in inet_frag_kill() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller (cherry picked from commit c2615cf5a761b32bf74e85bddc223dfff3d9b9f0) Signed-off-by: Greg Kroah-Hartman --- include/net/inet_frag.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index a52e7273e7a59..ed07e3786d986 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -5,16 +5,17 @@ #include struct netns_frags { - struct rhashtable rhashtable ____cacheline_aligned_in_smp; - - /* Keep atomic mem on separate cachelines in structs that include it */ - atomic_long_t mem ____cacheline_aligned_in_smp; /* sysctls */ long high_thresh; long low_thresh; int timeout; int max_dist; struct inet_frags *f; + + struct rhashtable rhashtable ____cacheline_aligned_in_smp; + + /* Keep atomic mem on separate cachelines in structs that include it */ + atomic_long_t mem ____cacheline_aligned_in_smp; }; /** -- GitLab From 48c2afc16888873da727f9ed7102a620a178fad8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Sep 2018 07:58:50 -0700 Subject: [PATCH 0314/2269] inet: frags: get rid of ipfrag_skb_cb/FRAG_CB ip_defrag uses skb->cb[] to store the fragment offset, and unfortunately this integer is currently in a different cache line than skb->next, meaning that we use two cache lines per skb when finding the insertion point. By aliasing skb->ip_defrag_offset and skb->dev, we pack all the fields in a single cache line and save precious memory bandwidth. Note that after the fast path added by Changli Gao in commit d6bebca92c66 ("fragment: add fast path for in-order fragments") this change wont help the fast path, since we still need to access prev->len (2nd cache line), but will show great benefits when slow path is entered, since we perform a linear scan of a potentially long list. Also, note that this potential long list is an attack vector, we might consider also using an rb-tree there eventually. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller (cherry picked from commit bf66337140c64c27fa37222b7abca7e49d63fb57) Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 1 + net/ipv4/ip_fragment.c | 35 ++++++++++++++--------------------- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6dd77767fd5b8..f4749678b7eef 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -678,6 +678,7 @@ struct sk_buff { * UDP receive path is one user. */ unsigned long dev_scratch; + int ip_defrag_offset; }; /* * This is the control buffer. It is free to use for every diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 88fa8ffc55580..5331a0d683740 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -57,14 +57,6 @@ */ static const char ip_frag_cache_name[] = "ip4-frags"; -struct ipfrag_skb_cb -{ - struct inet_skb_parm h; - int offset; -}; - -#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) - /* Describe an entry in the "incomplete datagrams" queue. */ struct ipq { struct inet_frag_queue q; @@ -353,13 +345,13 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) * this fragment, right? */ prev = qp->q.fragments_tail; - if (!prev || FRAG_CB(prev)->offset < offset) { + if (!prev || prev->ip_defrag_offset < offset) { next = NULL; goto found; } prev = NULL; for (next = qp->q.fragments; next != NULL; next = next->next) { - if (FRAG_CB(next)->offset >= offset) + if (next->ip_defrag_offset >= offset) break; /* bingo! */ prev = next; } @@ -370,7 +362,7 @@ found: * any overlaps are eliminated. */ if (prev) { - int i = (FRAG_CB(prev)->offset + prev->len) - offset; + int i = (prev->ip_defrag_offset + prev->len) - offset; if (i > 0) { offset += i; @@ -387,8 +379,8 @@ found: err = -ENOMEM; - while (next && FRAG_CB(next)->offset < end) { - int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */ + while (next && next->ip_defrag_offset < end) { + int i = end - next->ip_defrag_offset; /* overlap is 'i' bytes */ if (i < next->len) { int delta = -next->truesize; @@ -401,7 +393,7 @@ found: delta += next->truesize; if (delta) add_frag_mem_limit(qp->q.net, delta); - FRAG_CB(next)->offset += i; + next->ip_defrag_offset += i; qp->q.meat -= i; if (next->ip_summed != CHECKSUM_UNNECESSARY) next->ip_summed = CHECKSUM_NONE; @@ -425,7 +417,13 @@ found: } } - FRAG_CB(skb)->offset = offset; + /* Note : skb->ip_defrag_offset and skb->dev share the same location */ + dev = skb->dev; + if (dev) + qp->iif = dev->ifindex; + /* Makes sure compiler wont do silly aliasing games */ + barrier(); + skb->ip_defrag_offset = offset; /* Insert this fragment in the chain of fragments. */ skb->next = next; @@ -436,11 +434,6 @@ found: else qp->q.fragments = skb; - dev = skb->dev; - if (dev) { - qp->iif = dev->ifindex; - skb->dev = NULL; - } qp->q.stamp = skb->tstamp; qp->q.meat += skb->len; qp->ecn |= ecn; @@ -516,7 +509,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, } WARN_ON(!head); - WARN_ON(FRAG_CB(head)->offset != 0); + WARN_ON(head->ip_defrag_offset != 0); /* Allocate a new buffer for the datagram. */ ihlen = ip_hdrlen(head); -- GitLab From 5fff99e88a1f4b4e62fd07bf3eb87305c88f3400 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Sep 2018 07:58:51 -0700 Subject: [PATCH 0315/2269] inet: frags: fix ip6frag_low_thresh boundary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Giving an integer to proc_doulongvec_minmax() is dangerous on 64bit arches, since linker might place next to it a non zero value preventing a change to ip6frag_low_thresh. ip6frag_low_thresh is not used anymore in the kernel, but we do not want to prematuraly break user scripts wanting to change it. Since specifying a minimal value of 0 for proc_doulongvec_minmax() is moot, let's remove these zero values in all defrag units. Fixes: 6e00f7dd5e4e ("ipv6: frags: fix /proc/sys/net/ipv6/ip6frag_low_thresh") Signed-off-by: Eric Dumazet Reported-by: Maciej Żenczykowski Signed-off-by: David S. Miller (cherry picked from commit 3d23401283e80ceb03f765842787e0e79ff598b7) Signed-off-by: Greg Kroah-Hartman --- net/ieee802154/6lowpan/reassembly.c | 2 -- net/ipv4/ip_fragment.c | 5 ++--- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 -- net/ipv6/reassembly.c | 4 +--- 4 files changed, 3 insertions(+), 10 deletions(-) diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 44f148a6bb57b..1790b65944b3e 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -411,7 +411,6 @@ err: } #ifdef CONFIG_SYSCTL -static long zero; static struct ctl_table lowpan_frags_ns_ctl_table[] = { { @@ -428,7 +427,6 @@ static struct ctl_table lowpan_frags_ns_ctl_table[] = { .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, - .extra1 = &zero, .extra2 = &init_net.ieee802154_lowpan.frags.high_thresh }, { diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 5331a0d683740..d14d741fb05e5 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -672,7 +672,7 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user) EXPORT_SYMBOL(ip_check_defrag); #ifdef CONFIG_SYSCTL -static long zero; +static int dist_min; static struct ctl_table ip4_frags_ns_ctl_table[] = { { @@ -689,7 +689,6 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, - .extra1 = &zero, .extra2 = &init_net.ipv4.frags.high_thresh }, { @@ -705,7 +704,7 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero + .extra1 = &dist_min, }, { } }; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 6613f81e553ae..a1dc0d6a59498 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -63,7 +63,6 @@ struct nf_ct_frag6_skb_cb static struct inet_frags nf_frags; #ifdef CONFIG_SYSCTL -static long zero; static struct ctl_table nf_ct_frag6_sysctl_table[] = { { @@ -79,7 +78,6 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = { .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, - .extra1 = &zero, .extra2 = &init_net.nf_frag.frags.high_thresh }, { diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 2127da130dc2c..e1c5fa5e3873f 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -554,7 +554,6 @@ static const struct inet6_protocol frag_protocol = { }; #ifdef CONFIG_SYSCTL -static int zero; static struct ctl_table ip6_frags_ns_ctl_table[] = { { @@ -570,8 +569,7 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = { .data = &init_net.ipv6.frags.low_thresh, .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .proc_handler = proc_doulongvec_minmax, .extra2 = &init_net.ipv6.frags.high_thresh }, { -- GitLab From 1c44969111cc68f361638b6e54f5a176609aa05a Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Thu, 13 Sep 2018 07:58:52 -0700 Subject: [PATCH 0316/2269] ip: discard IPv4 datagrams with overlapping segments. This behavior is required in IPv6, and there is little need to tolerate overlapping fragments in IPv4. This change simplifies the code and eliminates potential DDoS attack vectors. Tested: ran ip_defrag selftest (not yet available uptream). Suggested-by: David S. Miller Signed-off-by: Peter Oskolkov Signed-off-by: Eric Dumazet Cc: Florian Westphal Acked-by: Stephen Hemminger Signed-off-by: David S. Miller (cherry picked from commit 7969e5c40dfd04799d4341f1b7cd266b6e47f227) Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/snmp.h | 1 + net/ipv4/ip_fragment.c | 75 ++++++++++----------------------------- net/ipv4/proc.c | 1 + 3 files changed, 21 insertions(+), 56 deletions(-) diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 0d941cdd8e8c0..f5d753e60836e 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -56,6 +56,7 @@ enum IPSTATS_MIB_ECT1PKTS, /* InECT1Pkts */ IPSTATS_MIB_ECT0PKTS, /* InECT0Pkts */ IPSTATS_MIB_CEPKTS, /* InCEPkts */ + IPSTATS_MIB_REASM_OVERLAPS, /* ReasmOverlaps */ __IPSTATS_MIB_MAX }; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index d14d741fb05e5..960bf5eab59fc 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -277,6 +277,7 @@ static int ip_frag_reinit(struct ipq *qp) /* Add new segment to existing queue. */ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) { + struct net *net = container_of(qp->q.net, struct net, ipv4.frags); struct sk_buff *prev, *next; struct net_device *dev; unsigned int fragsize; @@ -357,65 +358,23 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) } found: - /* We found where to put this one. Check for overlap with - * preceding fragment, and, if needed, align things so that - * any overlaps are eliminated. + /* RFC5722, Section 4, amended by Errata ID : 3089 + * When reassembling an IPv6 datagram, if + * one or more its constituent fragments is determined to be an + * overlapping fragment, the entire datagram (and any constituent + * fragments) MUST be silently discarded. + * + * We do the same here for IPv4. */ - if (prev) { - int i = (prev->ip_defrag_offset + prev->len) - offset; - if (i > 0) { - offset += i; - err = -EINVAL; - if (end <= offset) - goto err; - err = -ENOMEM; - if (!pskb_pull(skb, i)) - goto err; - if (skb->ip_summed != CHECKSUM_UNNECESSARY) - skb->ip_summed = CHECKSUM_NONE; - } - } + /* Is there an overlap with the previous fragment? */ + if (prev && + (prev->ip_defrag_offset + prev->len) > offset) + goto discard_qp; - err = -ENOMEM; - - while (next && next->ip_defrag_offset < end) { - int i = end - next->ip_defrag_offset; /* overlap is 'i' bytes */ - - if (i < next->len) { - int delta = -next->truesize; - - /* Eat head of the next overlapped fragment - * and leave the loop. The next ones cannot overlap. - */ - if (!pskb_pull(next, i)) - goto err; - delta += next->truesize; - if (delta) - add_frag_mem_limit(qp->q.net, delta); - next->ip_defrag_offset += i; - qp->q.meat -= i; - if (next->ip_summed != CHECKSUM_UNNECESSARY) - next->ip_summed = CHECKSUM_NONE; - break; - } else { - struct sk_buff *free_it = next; - - /* Old fragment is completely overridden with - * new one drop it. - */ - next = next->next; - - if (prev) - prev->next = next; - else - qp->q.fragments = next; - - qp->q.meat -= free_it->len; - sub_frag_mem_limit(qp->q.net, free_it->truesize); - kfree_skb(free_it); - } - } + /* Is there an overlap with the next fragment? */ + if (next && next->ip_defrag_offset < end) + goto discard_qp; /* Note : skb->ip_defrag_offset and skb->dev share the same location */ dev = skb->dev; @@ -463,6 +422,10 @@ found: skb_dst_drop(skb); return -EINPROGRESS; +discard_qp: + inet_frag_kill(&qp->q); + err = -EINVAL; + __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS); err: kfree_skb(skb); return err; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 8fbd5633e5448..3fbf688a1943a 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -132,6 +132,7 @@ static const struct snmp_mib snmp4_ipextstats_list[] = { SNMP_MIB_ITEM("InECT1Pkts", IPSTATS_MIB_ECT1PKTS), SNMP_MIB_ITEM("InECT0Pkts", IPSTATS_MIB_ECT0PKTS), SNMP_MIB_ITEM("InCEPkts", IPSTATS_MIB_CEPKTS), + SNMP_MIB_ITEM("ReasmOverlaps", IPSTATS_MIB_REASM_OVERLAPS), SNMP_MIB_SENTINEL }; -- GitLab From 7750c414b89bd8204901855bda21c512e269be35 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Sep 2018 07:58:53 -0700 Subject: [PATCH 0317/2269] net: speed up skb_rbtree_purge() As measured in my prior patch ("sch_netem: faster rb tree removal"), rbtree_postorder_for_each_entry_safe() is nice looking but much slower than using rb_next() directly, except when tree is small enough to fit in CPU caches (then the cost is the same) Also note that there is not even an increase of text size : $ size net/core/skbuff.o.before net/core/skbuff.o text data bss dec hex filename 40711 1298 0 42009 a419 net/core/skbuff.o.before 40711 1298 0 42009 a419 net/core/skbuff.o From: Eric Dumazet Signed-off-by: David S. Miller (cherry picked from commit 7c90584c66cc4b033a3b684b0e0950f79e7b7166) Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2e5eeba97de9e..c7c5f05f2af16 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2850,12 +2850,15 @@ EXPORT_SYMBOL(skb_queue_purge); */ void skb_rbtree_purge(struct rb_root *root) { - struct sk_buff *skb, *next; + struct rb_node *p = rb_first(root); - rbtree_postorder_for_each_entry_safe(skb, next, root, rbnode) - kfree_skb(skb); + while (p) { + struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); - *root = RB_ROOT; + p = rb_next(p); + rb_erase(&skb->rbnode, root); + kfree_skb(skb); + } } /** -- GitLab From 3bde783eca23d5d3019c220752f5a29083dea27c Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Thu, 13 Sep 2018 07:58:54 -0700 Subject: [PATCH 0318/2269] net: modify skb_rbtree_purge to return the truesize of all purged skbs. Tested: see the next patch is the series. Suggested-by: Eric Dumazet Signed-off-by: Peter Oskolkov Signed-off-by: Eric Dumazet Cc: Florian Westphal Signed-off-by: David S. Miller (cherry picked from commit 385114dec8a49b5e5945e77ba7de6356106713f4) Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 2 +- net/core/skbuff.c | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f4749678b7eef..9c8457375aee0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2581,7 +2581,7 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) kfree_skb(skb); } -void skb_rbtree_purge(struct rb_root *root); +unsigned int skb_rbtree_purge(struct rb_root *root); void *netdev_alloc_frag(unsigned int fragsz); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c7c5f05f2af16..8fd690def5c11 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2842,23 +2842,27 @@ EXPORT_SYMBOL(skb_queue_purge); /** * skb_rbtree_purge - empty a skb rbtree * @root: root of the rbtree to empty + * Return value: the sum of truesizes of all purged skbs. * * Delete all buffers on an &sk_buff rbtree. Each buffer is removed from * the list and one reference dropped. This function does not take * any lock. Synchronization should be handled by the caller (e.g., TCP * out-of-order queue is protected by the socket lock). */ -void skb_rbtree_purge(struct rb_root *root) +unsigned int skb_rbtree_purge(struct rb_root *root) { struct rb_node *p = rb_first(root); + unsigned int sum = 0; while (p) { struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); p = rb_next(p); rb_erase(&skb->rbnode, root); + sum += skb->truesize; kfree_skb(skb); } + return sum; } /** -- GitLab From 5123ffdad65954b3c308e055b388db08987a13ff Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 13 Sep 2018 07:58:55 -0700 Subject: [PATCH 0319/2269] ipv6: defrag: drop non-last frags smaller than min mtu don't bother with pathological cases, they only waste cycles. IPv6 requires a minimum MTU of 1280 so we should never see fragments smaller than this (except last frag). v3: don't use awkward "-offset + len" v2: drop IPv4 part, which added same check w. IPV4_MIN_MTU (68). There were concerns that there could be even smaller frags generated by intermediate nodes, e.g. on radio networks. Cc: Peter Oskolkov Cc: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: David S. Miller (cherry picked from commit 0ed4229b08c13c84a3c301a08defdc9e7f4467e6) Signed-off-by: Greg Kroah-Hartman --- net/ipv6/netfilter/nf_conntrack_reasm.c | 4 ++++ net/ipv6/reassembly.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index a1dc0d6a59498..1d2f07cde01a5 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -565,6 +565,10 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); + if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU && + fhdr->frag_off & htons(IP6_MF)) + return -EINVAL; + skb_orphan(skb); fq = fq_find(net, fhdr->identification, user, hdr, skb->dev ? skb->dev->ifindex : 0); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index e1c5fa5e3873f..afaad60dc2ac1 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -522,6 +522,10 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return 1; } + if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU && + fhdr->frag_off & htons(IP6_MF)) + goto fail_hdr; + iif = skb->dev ? skb->dev->ifindex : 0; fq = fq_find(net, fhdr->identification, hdr, iif); if (fq) { -- GitLab From 6bf32cda46ebfbaf13da3c48a0a009adae925703 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Sep 2018 07:58:56 -0700 Subject: [PATCH 0320/2269] net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends After working on IP defragmentation lately, I found that some large packets defeat CHECKSUM_COMPLETE optimization because of NIC adding zero paddings on the last (small) fragment. While removing the padding with pskb_trim_rcsum(), we set skb->ip_summed to CHECKSUM_NONE, forcing a full csum validation, even if all prior fragments had CHECKSUM_COMPLETE set. We can instead compute the checksum of the part we are trimming, usually smaller than the part we keep. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller (cherry picked from commit 88078d98d1bb085d72af8437707279e203524fa5) Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 5 ++--- net/core/skbuff.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9c8457375aee0..758084b434c86 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3135,6 +3135,7 @@ static inline void *skb_push_rcsum(struct sk_buff *skb, unsigned int len) return skb->data; } +int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len); /** * pskb_trim_rcsum - trim received skb and update checksum * @skb: buffer to trim @@ -3148,9 +3149,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) { if (likely(len >= skb->len)) return 0; - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = CHECKSUM_NONE; - return __pskb_trim(skb, len); + return pskb_trim_rcsum_slow(skb, len); } static inline int __skb_trim_rcsum(struct sk_buff *skb, unsigned int len) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8fd690def5c11..168a3e8883d4b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1839,6 +1839,20 @@ done: } EXPORT_SYMBOL(___pskb_trim); +/* Note : use pskb_trim_rcsum() instead of calling this directly + */ +int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) { + int delta = skb->len - len; + + skb->csum = csum_sub(skb->csum, + skb_checksum(skb, len, delta, 0)); + } + return __pskb_trim(skb, len); +} +EXPORT_SYMBOL(pskb_trim_rcsum_slow); + /** * __pskb_pull_tail - advance tail of skb header * @skb: buffer to reallocate -- GitLab From 37c7cc80b1d7de36a6ed54796ae30ee091d05eab Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Sep 2018 07:58:57 -0700 Subject: [PATCH 0321/2269] net: add rb_to_skb() and other rb tree helpers Geeralize private netem_rb_to_skb() TCP rtx queue will soon be converted to rb-tree, so we will need skb_rbtree_walk() helpers. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller (cherry picked from commit 18a4c0eab2623cc95be98a1e6af1ad18e7695977) Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 18 ++++++++++++++++++ net/ipv4/tcp_fastopen.c | 8 +++----- net/ipv4/tcp_input.c | 33 ++++++++++++--------------------- net/sched/sch_netem.c | 14 ++++---------- 4 files changed, 37 insertions(+), 36 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 758084b434c86..2837e55df03e1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3169,6 +3169,12 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len) #define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode) +#define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode) +#define skb_rb_first(root) rb_to_skb(rb_first(root)) +#define skb_rb_last(root) rb_to_skb(rb_last(root)) +#define skb_rb_next(skb) rb_to_skb(rb_next(&(skb)->rbnode)) +#define skb_rb_prev(skb) rb_to_skb(rb_prev(&(skb)->rbnode)) + #define skb_queue_walk(queue, skb) \ for (skb = (queue)->next; \ skb != (struct sk_buff *)(queue); \ @@ -3183,6 +3189,18 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len) for (; skb != (struct sk_buff *)(queue); \ skb = skb->next) +#define skb_rbtree_walk(skb, root) \ + for (skb = skb_rb_first(root); skb != NULL; \ + skb = skb_rb_next(skb)) + +#define skb_rbtree_walk_from(skb) \ + for (; skb != NULL; \ + skb = skb_rb_next(skb)) + +#define skb_rbtree_walk_from_safe(skb, tmp) \ + for (; tmp = skb ? skb_rb_next(skb) : NULL, (skb != NULL); \ + skb = tmp) + #define skb_queue_walk_from_safe(queue, skb, tmp) \ for (tmp = skb->next; \ skb != (struct sk_buff *)(queue); \ diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index fbbeda6477740..0567edb76522c 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -458,17 +458,15 @@ bool tcp_fastopen_active_should_disable(struct sock *sk) void tcp_fastopen_active_disable_ofo_check(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - struct rb_node *p; - struct sk_buff *skb; struct dst_entry *dst; + struct sk_buff *skb; if (!tp->syn_fastopen) return; if (!tp->data_segs_in) { - p = rb_first(&tp->out_of_order_queue); - if (p && !rb_next(p)) { - skb = rb_entry(p, struct sk_buff, rbnode); + skb = skb_rb_first(&tp->out_of_order_queue); + if (skb && !skb_rb_next(skb)) { if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) { tcp_fastopen_active_disable(sk); return; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bdabd748f4bcf..991f382afc1b0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4372,7 +4372,7 @@ static void tcp_ofo_queue(struct sock *sk) p = rb_first(&tp->out_of_order_queue); while (p) { - skb = rb_entry(p, struct sk_buff, rbnode); + skb = rb_to_skb(p); if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) break; @@ -4440,7 +4440,7 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); - struct rb_node **p, *q, *parent; + struct rb_node **p, *parent; struct sk_buff *skb1; u32 seq, end_seq; bool fragstolen; @@ -4503,7 +4503,7 @@ coalesce_done: parent = NULL; while (*p) { parent = *p; - skb1 = rb_entry(parent, struct sk_buff, rbnode); + skb1 = rb_to_skb(parent); if (before(seq, TCP_SKB_CB(skb1)->seq)) { p = &parent->rb_left; continue; @@ -4548,9 +4548,7 @@ insert: merge_right: /* Remove other segments covered by skb. */ - while ((q = rb_next(&skb->rbnode)) != NULL) { - skb1 = rb_entry(q, struct sk_buff, rbnode); - + while ((skb1 = skb_rb_next(skb)) != NULL) { if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) break; if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { @@ -4565,7 +4563,7 @@ merge_right: tcp_drop(sk, skb1); } /* If there is no skb after us, we are the last_skb ! */ - if (!q) + if (!skb1) tp->ooo_last_skb = skb; add_sack: @@ -4749,7 +4747,7 @@ static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *li if (list) return !skb_queue_is_last(list, skb) ? skb->next : NULL; - return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode); + return skb_rb_next(skb); } static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, @@ -4778,7 +4776,7 @@ static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb) while (*p) { parent = *p; - skb1 = rb_entry(parent, struct sk_buff, rbnode); + skb1 = rb_to_skb(parent); if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq)) p = &parent->rb_left; else @@ -4898,19 +4896,12 @@ static void tcp_collapse_ofo_queue(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); u32 range_truesize, sum_tiny = 0; struct sk_buff *skb, *head; - struct rb_node *p; u32 start, end; - p = rb_first(&tp->out_of_order_queue); - skb = rb_entry_safe(p, struct sk_buff, rbnode); + skb = skb_rb_first(&tp->out_of_order_queue); new_range: if (!skb) { - p = rb_last(&tp->out_of_order_queue); - /* Note: This is possible p is NULL here. We do not - * use rb_entry_safe(), as ooo_last_skb is valid only - * if rbtree is not empty. - */ - tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode); + tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue); return; } start = TCP_SKB_CB(skb)->seq; @@ -4918,7 +4909,7 @@ new_range: range_truesize = skb->truesize; for (head = skb;;) { - skb = tcp_skb_next(skb, NULL); + skb = skb_rb_next(skb); /* Range is terminated when we see a gap or when * we are at the queue end. @@ -4974,7 +4965,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk) prev = rb_prev(node); rb_erase(node, &tp->out_of_order_queue); goal -= rb_to_skb(node)->truesize; - tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode)); + tcp_drop(sk, rb_to_skb(node)); if (!prev || goal <= 0) { sk_mem_reclaim(sk); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && @@ -4984,7 +4975,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk) } node = prev; } while (node); - tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode); + tp->ooo_last_skb = rb_to_skb(prev); /* Reset SACK state. A conforming SACK implementation will * do the same at a timeout based retransmit. When a connection diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 8c8df75dbeadd..2a2ab6bfe5d85 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -149,12 +149,6 @@ struct netem_skb_cb { ktime_t tstamp_save; }; - -static struct sk_buff *netem_rb_to_skb(struct rb_node *rb) -{ - return rb_entry(rb, struct sk_buff, rbnode); -} - static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) { /* we assume we can use skb next/prev/tstamp as storage for rb_node */ @@ -365,7 +359,7 @@ static void tfifo_reset(struct Qdisc *sch) struct rb_node *p; while ((p = rb_first(&q->t_root))) { - struct sk_buff *skb = netem_rb_to_skb(p); + struct sk_buff *skb = rb_to_skb(p); rb_erase(p, &q->t_root); rtnl_kfree_skbs(skb, skb); @@ -382,7 +376,7 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) struct sk_buff *skb; parent = *p; - skb = netem_rb_to_skb(parent); + skb = rb_to_skb(parent); if (tnext >= netem_skb_cb(skb)->time_to_send) p = &parent->rb_right; else @@ -538,7 +532,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff *t_skb; struct netem_skb_cb *t_last; - t_skb = netem_rb_to_skb(rb_last(&q->t_root)); + t_skb = skb_rb_last(&q->t_root); t_last = netem_skb_cb(t_skb); if (!last || t_last->time_to_send > last->time_to_send) { @@ -618,7 +612,7 @@ deliver: if (p) { psched_time_t time_to_send; - skb = netem_rb_to_skb(p); + skb = rb_to_skb(p); /* if more time remaining? */ time_to_send = netem_skb_cb(skb)->time_to_send; -- GitLab From 6b921536f1707a240e6f53843f1f26231016fda5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Sep 2018 07:58:58 -0700 Subject: [PATCH 0322/2269] net: sk_buff rbnode reorg commit bffa72cf7f9df842f0016ba03586039296b4caaf upstream skb->rbnode shares space with skb->next, skb->prev and skb->tstamp Current uses (TCP receive ofo queue and netem) need to save/restore tstamp, while skb->dev is either NULL (TCP) or a constant for a given queue (netem). Since we plan using an RB tree for TCP retransmit queue to speedup SACK processing with large BDP, this patch exchanges skb->dev and skb->tstamp. This saves some overhead in both TCP and netem. v2: removes the swtstamp field from struct tcp_skb_cb Signed-off-by: Eric Dumazet Cc: Soheil Hassas Yeganeh Cc: Wei Wang Cc: Willem de Bruijn Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 24 ++-- include/net/inet_frag.h | 3 +- net/ipv4/inet_fragment.c | 16 ++- net/ipv4/ip_fragment.c | 182 +++++++++++++----------- net/ipv6/netfilter/nf_conntrack_reasm.c | 1 + net/ipv6/reassembly.c | 1 + 6 files changed, 129 insertions(+), 98 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2837e55df03e1..f64e884440826 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -663,23 +663,27 @@ struct sk_buff { struct sk_buff *prev; union { - ktime_t tstamp; - u64 skb_mstamp; + struct net_device *dev; + /* Some protocols might use this space to store information, + * while device pointer would be NULL. + * UDP receive path is one user. + */ + unsigned long dev_scratch; }; }; - struct rb_node rbnode; /* used in netem & tcp stack */ + struct rb_node rbnode; /* used in netem, ip4 defrag, and tcp stack */ + struct list_head list; }; - struct sock *sk; union { - struct net_device *dev; - /* Some protocols might use this space to store information, - * while device pointer would be NULL. - * UDP receive path is one user. - */ - unsigned long dev_scratch; + struct sock *sk; int ip_defrag_offset; }; + + union { + ktime_t tstamp; + u64 skb_mstamp; + }; /* * This is the control buffer. It is free to use for every * layer. Please put your private variables there. If you diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index ed07e3786d986..e4c71a7644be0 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -75,7 +75,8 @@ struct inet_frag_queue { struct timer_list timer; spinlock_t lock; refcount_t refcnt; - struct sk_buff *fragments; + struct sk_buff *fragments; /* Used in IPv6. */ + struct rb_root rb_fragments; /* Used in IPv4. */ struct sk_buff *fragments_tail; ktime_t stamp; int len; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index c9e35b81d0931..6904cbb7de1a4 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -136,12 +136,16 @@ void inet_frag_destroy(struct inet_frag_queue *q) fp = q->fragments; nf = q->net; f = nf->f; - while (fp) { - struct sk_buff *xp = fp->next; - - sum_truesize += fp->truesize; - kfree_skb(fp); - fp = xp; + if (fp) { + do { + struct sk_buff *xp = fp->next; + + sum_truesize += fp->truesize; + kfree_skb(fp); + fp = xp; + } while (fp); + } else { + sum_truesize = skb_rbtree_purge(&q->rb_fragments); } sum = sum_truesize + f->qsize; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 960bf5eab59fc..0e8f8de77e710 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -136,7 +136,7 @@ static void ip_expire(struct timer_list *t) { struct inet_frag_queue *frag = from_timer(frag, t, timer); const struct iphdr *iph; - struct sk_buff *head; + struct sk_buff *head = NULL; struct net *net; struct ipq *qp; int err; @@ -152,14 +152,31 @@ static void ip_expire(struct timer_list *t) ipq_kill(qp); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); - - head = qp->q.fragments; - __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT); - if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !head) + if (!qp->q.flags & INET_FRAG_FIRST_IN) goto out; + /* sk_buff::dev and sk_buff::rbnode are unionized. So we + * pull the head out of the tree in order to be able to + * deal with head->dev. + */ + if (qp->q.fragments) { + head = qp->q.fragments; + qp->q.fragments = head->next; + } else { + head = skb_rb_first(&qp->q.rb_fragments); + if (!head) + goto out; + rb_erase(&head->rbnode, &qp->q.rb_fragments); + memset(&head->rbnode, 0, sizeof(head->rbnode)); + barrier(); + } + if (head == qp->q.fragments_tail) + qp->q.fragments_tail = NULL; + + sub_frag_mem_limit(qp->q.net, head->truesize); + head->dev = dev_get_by_index_rcu(net, qp->iif); if (!head->dev) goto out; @@ -179,16 +196,16 @@ static void ip_expire(struct timer_list *t) (skb_rtable(head)->rt_type != RTN_LOCAL)) goto out; - skb_get(head); spin_unlock(&qp->q.lock); icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); - kfree_skb(head); goto out_rcu_unlock; out: spin_unlock(&qp->q.lock); out_rcu_unlock: rcu_read_unlock(); + if (head) + kfree_skb(head); ipq_put(qp); } @@ -231,7 +248,7 @@ static int ip_frag_too_far(struct ipq *qp) end = atomic_inc_return(&peer->rid); qp->rid = end; - rc = qp->q.fragments && (end - start) > max; + rc = qp->q.fragments_tail && (end - start) > max; if (rc) { struct net *net; @@ -245,7 +262,6 @@ static int ip_frag_too_far(struct ipq *qp) static int ip_frag_reinit(struct ipq *qp) { - struct sk_buff *fp; unsigned int sum_truesize = 0; if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) { @@ -253,20 +269,14 @@ static int ip_frag_reinit(struct ipq *qp) return -ETIMEDOUT; } - fp = qp->q.fragments; - do { - struct sk_buff *xp = fp->next; - - sum_truesize += fp->truesize; - kfree_skb(fp); - fp = xp; - } while (fp); + sum_truesize = skb_rbtree_purge(&qp->q.rb_fragments); sub_frag_mem_limit(qp->q.net, sum_truesize); qp->q.flags = 0; qp->q.len = 0; qp->q.meat = 0; qp->q.fragments = NULL; + qp->q.rb_fragments = RB_ROOT; qp->q.fragments_tail = NULL; qp->iif = 0; qp->ecn = 0; @@ -278,7 +288,8 @@ static int ip_frag_reinit(struct ipq *qp) static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) { struct net *net = container_of(qp->q.net, struct net, ipv4.frags); - struct sk_buff *prev, *next; + struct rb_node **rbn, *parent; + struct sk_buff *skb1; struct net_device *dev; unsigned int fragsize; int flags, offset; @@ -341,58 +352,58 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (err) goto err; - /* Find out which fragments are in front and at the back of us - * in the chain of fragments so far. We must know where to put - * this fragment, right? - */ - prev = qp->q.fragments_tail; - if (!prev || prev->ip_defrag_offset < offset) { - next = NULL; - goto found; - } - prev = NULL; - for (next = qp->q.fragments; next != NULL; next = next->next) { - if (next->ip_defrag_offset >= offset) - break; /* bingo! */ - prev = next; - } + /* Note : skb->rbnode and skb->dev share the same location. */ + dev = skb->dev; + /* Makes sure compiler wont do silly aliasing games */ + barrier(); -found: /* RFC5722, Section 4, amended by Errata ID : 3089 * When reassembling an IPv6 datagram, if * one or more its constituent fragments is determined to be an * overlapping fragment, the entire datagram (and any constituent * fragments) MUST be silently discarded. * - * We do the same here for IPv4. + * We do the same here for IPv4 (and increment an snmp counter). */ - /* Is there an overlap with the previous fragment? */ - if (prev && - (prev->ip_defrag_offset + prev->len) > offset) - goto discard_qp; - - /* Is there an overlap with the next fragment? */ - if (next && next->ip_defrag_offset < end) - goto discard_qp; + /* Find out where to put this fragment. */ + skb1 = qp->q.fragments_tail; + if (!skb1) { + /* This is the first fragment we've received. */ + rb_link_node(&skb->rbnode, NULL, &qp->q.rb_fragments.rb_node); + qp->q.fragments_tail = skb; + } else if ((skb1->ip_defrag_offset + skb1->len) < end) { + /* This is the common/special case: skb goes to the end. */ + /* Detect and discard overlaps. */ + if (offset < (skb1->ip_defrag_offset + skb1->len)) + goto discard_qp; + /* Insert after skb1. */ + rb_link_node(&skb->rbnode, &skb1->rbnode, &skb1->rbnode.rb_right); + qp->q.fragments_tail = skb; + } else { + /* Binary search. Note that skb can become the first fragment, but + * not the last (covered above). */ + rbn = &qp->q.rb_fragments.rb_node; + do { + parent = *rbn; + skb1 = rb_to_skb(parent); + if (end <= skb1->ip_defrag_offset) + rbn = &parent->rb_left; + else if (offset >= skb1->ip_defrag_offset + skb1->len) + rbn = &parent->rb_right; + else /* Found an overlap with skb1. */ + goto discard_qp; + } while (*rbn); + /* Here we have parent properly set, and rbn pointing to + * one of its NULL left/right children. Insert skb. */ + rb_link_node(&skb->rbnode, parent, rbn); + } + rb_insert_color(&skb->rbnode, &qp->q.rb_fragments); - /* Note : skb->ip_defrag_offset and skb->dev share the same location */ - dev = skb->dev; if (dev) qp->iif = dev->ifindex; - /* Makes sure compiler wont do silly aliasing games */ - barrier(); skb->ip_defrag_offset = offset; - /* Insert this fragment in the chain of fragments. */ - skb->next = next; - if (!next) - qp->q.fragments_tail = skb; - if (prev) - prev->next = skb; - else - qp->q.fragments = skb; - qp->q.stamp = skb->tstamp; qp->q.meat += skb->len; qp->ecn |= ecn; @@ -414,7 +425,7 @@ found: unsigned long orefdst = skb->_skb_refdst; skb->_skb_refdst = 0UL; - err = ip_frag_reasm(qp, prev, dev); + err = ip_frag_reasm(qp, skb, dev); skb->_skb_refdst = orefdst; return err; } @@ -431,15 +442,15 @@ err: return err; } - /* Build a new IP datagram from all its fragments. */ - -static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, +static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, struct net_device *dev) { struct net *net = container_of(qp->q.net, struct net, ipv4.frags); struct iphdr *iph; - struct sk_buff *fp, *head = qp->q.fragments; + struct sk_buff *fp, *head = skb_rb_first(&qp->q.rb_fragments); + struct sk_buff **nextp; /* To build frag_list. */ + struct rb_node *rbn; int len; int ihlen; int err; @@ -453,25 +464,20 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, goto out_fail; } /* Make the one we just received the head. */ - if (prev) { - head = prev->next; - fp = skb_clone(head, GFP_ATOMIC); + if (head != skb) { + fp = skb_clone(skb, GFP_ATOMIC); if (!fp) goto out_nomem; - - fp->next = head->next; - if (!fp->next) + rb_replace_node(&skb->rbnode, &fp->rbnode, &qp->q.rb_fragments); + if (qp->q.fragments_tail == skb) qp->q.fragments_tail = fp; - prev->next = fp; - - skb_morph(head, qp->q.fragments); - head->next = qp->q.fragments->next; - - consume_skb(qp->q.fragments); - qp->q.fragments = head; + skb_morph(skb, head); + rb_replace_node(&head->rbnode, &skb->rbnode, + &qp->q.rb_fragments); + consume_skb(head); + head = skb; } - WARN_ON(!head); WARN_ON(head->ip_defrag_offset != 0); /* Allocate a new buffer for the datagram. */ @@ -496,24 +502,35 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, clone = alloc_skb(0, GFP_ATOMIC); if (!clone) goto out_nomem; - clone->next = head->next; - head->next = clone; skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; skb_frag_list_init(head); for (i = 0; i < skb_shinfo(head)->nr_frags; i++) plen += skb_frag_size(&skb_shinfo(head)->frags[i]); clone->len = clone->data_len = head->data_len - plen; - head->data_len -= clone->len; - head->len -= clone->len; + skb->truesize += clone->truesize; clone->csum = 0; clone->ip_summed = head->ip_summed; add_frag_mem_limit(qp->q.net, clone->truesize); + skb_shinfo(head)->frag_list = clone; + nextp = &clone->next; + } else { + nextp = &skb_shinfo(head)->frag_list; } - skb_shinfo(head)->frag_list = head->next; skb_push(head, head->data - skb_network_header(head)); - for (fp=head->next; fp; fp = fp->next) { + /* Traverse the tree in order, to build frag_list. */ + rbn = rb_next(&head->rbnode); + rb_erase(&head->rbnode, &qp->q.rb_fragments); + while (rbn) { + struct rb_node *rbnext = rb_next(rbn); + fp = rb_to_skb(rbn); + rb_erase(rbn, &qp->q.rb_fragments); + rbn = rbnext; + *nextp = fp; + nextp = &fp->next; + fp->prev = NULL; + memset(&fp->rbnode, 0, sizeof(fp->rbnode)); head->data_len += fp->len; head->len += fp->len; if (head->ip_summed != fp->ip_summed) @@ -524,7 +541,9 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, } sub_frag_mem_limit(qp->q.net, head->truesize); + *nextp = NULL; head->next = NULL; + head->prev = NULL; head->dev = dev; head->tstamp = qp->q.stamp; IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size); @@ -552,6 +571,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, __IP_INC_STATS(net, IPSTATS_MIB_REASMOKS); qp->q.fragments = NULL; + qp->q.rb_fragments = RB_ROOT; qp->q.fragments_tail = NULL; return 0; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 1d2f07cde01a5..82ce0d0f54bf9 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -471,6 +471,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic head->csum); fq->q.fragments = NULL; + fq->q.rb_fragments = RB_ROOT; fq->q.fragments_tail = NULL; return true; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index afaad60dc2ac1..ede0061b6f5df 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -472,6 +472,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); rcu_read_unlock(); fq->q.fragments = NULL; + fq->q.rb_fragments = RB_ROOT; fq->q.fragments_tail = NULL; return 1; -- GitLab From 04b28f406e86512a3592664553b5e17efe663ece Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 13 Sep 2018 07:58:59 -0700 Subject: [PATCH 0323/2269] ipv4: frags: precedence bug in ip_expire() We accidentally removed the parentheses here, but they are required because '!' has higher precedence than '&'. Fixes: fa0f527358bd ("ip: use rb trees for IP frag queue.") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller (cherry picked from commit 70837ffe3085c9a91488b52ca13ac84424da1042) Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_fragment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 0e8f8de77e710..7cb7ed761d8cf 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -154,7 +154,7 @@ static void ip_expire(struct timer_list *t) __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT); - if (!qp->q.flags & INET_FRAG_FIRST_IN) + if (!(qp->q.flags & INET_FRAG_FIRST_IN)) goto out; /* sk_buff::dev and sk_buff::rbnode are unionized. So we -- GitLab From c91f27fb571666a176e1446646726f78d4657ddb Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Thu, 13 Sep 2018 07:59:00 -0700 Subject: [PATCH 0324/2269] ip: add helpers to process in-order fragments faster. This patch introduces several helper functions/macros that will be used in the follow-up patch. No runtime changes yet. The new logic (fully implemented in the second patch) is as follows: * Nodes in the rb-tree will now contain not single fragments, but lists of consecutive fragments ("runs"). * At each point in time, the current "active" run at the tail is maintained/tracked. Fragments that arrive in-order, adjacent to the previous tail fragment, are added to this tail run without triggering the re-balancing of the rb-tree. * If a fragment arrives out of order with the offset _before_ the tail run, it is inserted into the rb-tree as a single fragment. * If a fragment arrives after the current tail fragment (with a gap), it starts a new "tail" run, as is inserted into the rb-tree at the end as the head of the new run. skb->cb is used to store additional information needed here (suggested by Eric Dumazet). Reported-by: Willem de Bruijn Signed-off-by: Peter Oskolkov Cc: Eric Dumazet Cc: Florian Westphal Signed-off-by: David S. Miller (cherry picked from commit 353c9cb360874e737fb000545f783df756c06f9a) Signed-off-by: Greg Kroah-Hartman --- include/net/inet_frag.h | 6 ++++ net/ipv4/ip_fragment.c | 73 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index e4c71a7644be0..335cf7851f129 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -57,7 +57,9 @@ struct frag_v6_compare_key { * @lock: spinlock protecting this frag * @refcnt: reference count of the queue * @fragments: received fragments head + * @rb_fragments: received fragments rb-tree root * @fragments_tail: received fragments tail + * @last_run_head: the head of the last "run". see ip_fragment.c * @stamp: timestamp of the last received fragment * @len: total length of the original datagram * @meat: length of received fragments so far @@ -78,6 +80,7 @@ struct inet_frag_queue { struct sk_buff *fragments; /* Used in IPv6. */ struct rb_root rb_fragments; /* Used in IPv4. */ struct sk_buff *fragments_tail; + struct sk_buff *last_run_head; ktime_t stamp; int len; int meat; @@ -113,6 +116,9 @@ void inet_frag_kill(struct inet_frag_queue *q); void inet_frag_destroy(struct inet_frag_queue *q); struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key); +/* Free all skbs in the queue; return the sum of their truesizes. */ +unsigned int inet_frag_rbtree_purge(struct rb_root *root); + static inline void inet_frag_put(struct inet_frag_queue *q) { if (refcount_dec_and_test(&q->refcnt)) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 7cb7ed761d8cf..26ace9d2d9767 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -57,6 +57,57 @@ */ static const char ip_frag_cache_name[] = "ip4-frags"; +/* Use skb->cb to track consecutive/adjacent fragments coming at + * the end of the queue. Nodes in the rb-tree queue will + * contain "runs" of one or more adjacent fragments. + * + * Invariants: + * - next_frag is NULL at the tail of a "run"; + * - the head of a "run" has the sum of all fragment lengths in frag_run_len. + */ +struct ipfrag_skb_cb { + struct inet_skb_parm h; + struct sk_buff *next_frag; + int frag_run_len; +}; + +#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) + +static void ip4_frag_init_run(struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb)); + + FRAG_CB(skb)->next_frag = NULL; + FRAG_CB(skb)->frag_run_len = skb->len; +} + +/* Append skb to the last "run". */ +static void ip4_frag_append_to_last_run(struct inet_frag_queue *q, + struct sk_buff *skb) +{ + RB_CLEAR_NODE(&skb->rbnode); + FRAG_CB(skb)->next_frag = NULL; + + FRAG_CB(q->last_run_head)->frag_run_len += skb->len; + FRAG_CB(q->fragments_tail)->next_frag = skb; + q->fragments_tail = skb; +} + +/* Create a new "run" with the skb. */ +static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb) +{ + if (q->last_run_head) + rb_link_node(&skb->rbnode, &q->last_run_head->rbnode, + &q->last_run_head->rbnode.rb_right); + else + rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node); + rb_insert_color(&skb->rbnode, &q->rb_fragments); + + ip4_frag_init_run(skb); + q->fragments_tail = skb; + q->last_run_head = skb; +} + /* Describe an entry in the "incomplete datagrams" queue. */ struct ipq { struct inet_frag_queue q; @@ -654,6 +705,28 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user) } EXPORT_SYMBOL(ip_check_defrag); +unsigned int inet_frag_rbtree_purge(struct rb_root *root) +{ + struct rb_node *p = rb_first(root); + unsigned int sum = 0; + + while (p) { + struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); + + p = rb_next(p); + rb_erase(&skb->rbnode, root); + while (skb) { + struct sk_buff *next = FRAG_CB(skb)->next_frag; + + sum += skb->truesize; + kfree_skb(skb); + skb = next; + } + } + return sum; +} +EXPORT_SYMBOL(inet_frag_rbtree_purge); + #ifdef CONFIG_SYSCTL static int dist_min; -- GitLab From b3a0c61b73699b3764a6568e85c67f599158c541 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Thu, 13 Sep 2018 07:59:01 -0700 Subject: [PATCH 0325/2269] ip: process in-order fragments efficiently This patch changes the runtime behavior of IP defrag queue: incoming in-order fragments are added to the end of the current list/"run" of in-order fragments at the tail. On some workloads, UDP stream performance is substantially improved: RX: ./udp_stream -F 10 -T 2 -l 60 TX: ./udp_stream -c -H -F 10 -T 5 -l 60 with this patchset applied on a 10Gbps receiver: throughput=9524.18 throughput_units=Mbit/s upstream (net-next): throughput=4608.93 throughput_units=Mbit/s Reported-by: Willem de Bruijn Signed-off-by: Peter Oskolkov Cc: Eric Dumazet Cc: Florian Westphal Signed-off-by: David S. Miller (cherry picked from commit a4fd284a1f8fd4b6c59aa59db2185b1e17c5c11c) Signed-off-by: Greg Kroah-Hartman --- net/ipv4/inet_fragment.c | 2 +- net/ipv4/ip_fragment.c | 110 ++++++++++++++++++++++++--------------- 2 files changed, 70 insertions(+), 42 deletions(-) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 6904cbb7de1a4..f6764537148ce 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -145,7 +145,7 @@ void inet_frag_destroy(struct inet_frag_queue *q) fp = xp; } while (fp); } else { - sum_truesize = skb_rbtree_purge(&q->rb_fragments); + sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments); } sum = sum_truesize + f->qsize; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 26ace9d2d9767..88281fbce88ce 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -126,8 +126,8 @@ static u8 ip4_frag_ecn(u8 tos) static struct inet_frags ip4_frags; -static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, - struct net_device *dev); +static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, + struct sk_buff *prev_tail, struct net_device *dev); static void ip4_frag_init(struct inet_frag_queue *q, const void *a) @@ -219,7 +219,12 @@ static void ip_expire(struct timer_list *t) head = skb_rb_first(&qp->q.rb_fragments); if (!head) goto out; - rb_erase(&head->rbnode, &qp->q.rb_fragments); + if (FRAG_CB(head)->next_frag) + rb_replace_node(&head->rbnode, + &FRAG_CB(head)->next_frag->rbnode, + &qp->q.rb_fragments); + else + rb_erase(&head->rbnode, &qp->q.rb_fragments); memset(&head->rbnode, 0, sizeof(head->rbnode)); barrier(); } @@ -320,7 +325,7 @@ static int ip_frag_reinit(struct ipq *qp) return -ETIMEDOUT; } - sum_truesize = skb_rbtree_purge(&qp->q.rb_fragments); + sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments); sub_frag_mem_limit(qp->q.net, sum_truesize); qp->q.flags = 0; @@ -329,6 +334,7 @@ static int ip_frag_reinit(struct ipq *qp) qp->q.fragments = NULL; qp->q.rb_fragments = RB_ROOT; qp->q.fragments_tail = NULL; + qp->q.last_run_head = NULL; qp->iif = 0; qp->ecn = 0; @@ -340,7 +346,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) { struct net *net = container_of(qp->q.net, struct net, ipv4.frags); struct rb_node **rbn, *parent; - struct sk_buff *skb1; + struct sk_buff *skb1, *prev_tail; struct net_device *dev; unsigned int fragsize; int flags, offset; @@ -418,38 +424,41 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) */ /* Find out where to put this fragment. */ - skb1 = qp->q.fragments_tail; - if (!skb1) { - /* This is the first fragment we've received. */ - rb_link_node(&skb->rbnode, NULL, &qp->q.rb_fragments.rb_node); - qp->q.fragments_tail = skb; - } else if ((skb1->ip_defrag_offset + skb1->len) < end) { - /* This is the common/special case: skb goes to the end. */ + prev_tail = qp->q.fragments_tail; + if (!prev_tail) + ip4_frag_create_run(&qp->q, skb); /* First fragment. */ + else if (prev_tail->ip_defrag_offset + prev_tail->len < end) { + /* This is the common case: skb goes to the end. */ /* Detect and discard overlaps. */ - if (offset < (skb1->ip_defrag_offset + skb1->len)) + if (offset < prev_tail->ip_defrag_offset + prev_tail->len) goto discard_qp; - /* Insert after skb1. */ - rb_link_node(&skb->rbnode, &skb1->rbnode, &skb1->rbnode.rb_right); - qp->q.fragments_tail = skb; + if (offset == prev_tail->ip_defrag_offset + prev_tail->len) + ip4_frag_append_to_last_run(&qp->q, skb); + else + ip4_frag_create_run(&qp->q, skb); } else { - /* Binary search. Note that skb can become the first fragment, but - * not the last (covered above). */ + /* Binary search. Note that skb can become the first fragment, + * but not the last (covered above). + */ rbn = &qp->q.rb_fragments.rb_node; do { parent = *rbn; skb1 = rb_to_skb(parent); if (end <= skb1->ip_defrag_offset) rbn = &parent->rb_left; - else if (offset >= skb1->ip_defrag_offset + skb1->len) + else if (offset >= skb1->ip_defrag_offset + + FRAG_CB(skb1)->frag_run_len) rbn = &parent->rb_right; else /* Found an overlap with skb1. */ goto discard_qp; } while (*rbn); /* Here we have parent properly set, and rbn pointing to - * one of its NULL left/right children. Insert skb. */ + * one of its NULL left/right children. Insert skb. + */ + ip4_frag_init_run(skb); rb_link_node(&skb->rbnode, parent, rbn); + rb_insert_color(&skb->rbnode, &qp->q.rb_fragments); } - rb_insert_color(&skb->rbnode, &qp->q.rb_fragments); if (dev) qp->iif = dev->ifindex; @@ -476,7 +485,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) unsigned long orefdst = skb->_skb_refdst; skb->_skb_refdst = 0UL; - err = ip_frag_reasm(qp, skb, dev); + err = ip_frag_reasm(qp, skb, prev_tail, dev); skb->_skb_refdst = orefdst; return err; } @@ -495,7 +504,7 @@ err: /* Build a new IP datagram from all its fragments. */ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, - struct net_device *dev) + struct sk_buff *prev_tail, struct net_device *dev) { struct net *net = container_of(qp->q.net, struct net, ipv4.frags); struct iphdr *iph; @@ -519,10 +528,16 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, fp = skb_clone(skb, GFP_ATOMIC); if (!fp) goto out_nomem; - rb_replace_node(&skb->rbnode, &fp->rbnode, &qp->q.rb_fragments); + FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; + if (RB_EMPTY_NODE(&skb->rbnode)) + FRAG_CB(prev_tail)->next_frag = fp; + else + rb_replace_node(&skb->rbnode, &fp->rbnode, + &qp->q.rb_fragments); if (qp->q.fragments_tail == skb) qp->q.fragments_tail = fp; skb_morph(skb, head); + FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; rb_replace_node(&head->rbnode, &skb->rbnode, &qp->q.rb_fragments); consume_skb(head); @@ -558,7 +573,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, for (i = 0; i < skb_shinfo(head)->nr_frags; i++) plen += skb_frag_size(&skb_shinfo(head)->frags[i]); clone->len = clone->data_len = head->data_len - plen; - skb->truesize += clone->truesize; + head->truesize += clone->truesize; clone->csum = 0; clone->ip_summed = head->ip_summed; add_frag_mem_limit(qp->q.net, clone->truesize); @@ -571,24 +586,36 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, skb_push(head, head->data - skb_network_header(head)); /* Traverse the tree in order, to build frag_list. */ + fp = FRAG_CB(head)->next_frag; rbn = rb_next(&head->rbnode); rb_erase(&head->rbnode, &qp->q.rb_fragments); - while (rbn) { - struct rb_node *rbnext = rb_next(rbn); - fp = rb_to_skb(rbn); - rb_erase(rbn, &qp->q.rb_fragments); - rbn = rbnext; - *nextp = fp; - nextp = &fp->next; - fp->prev = NULL; - memset(&fp->rbnode, 0, sizeof(fp->rbnode)); - head->data_len += fp->len; - head->len += fp->len; - if (head->ip_summed != fp->ip_summed) - head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_add(head->csum, fp->csum); - head->truesize += fp->truesize; + while (rbn || fp) { + /* fp points to the next sk_buff in the current run; + * rbn points to the next run. + */ + /* Go through the current run. */ + while (fp) { + *nextp = fp; + nextp = &fp->next; + fp->prev = NULL; + memset(&fp->rbnode, 0, sizeof(fp->rbnode)); + head->data_len += fp->len; + head->len += fp->len; + if (head->ip_summed != fp->ip_summed) + head->ip_summed = CHECKSUM_NONE; + else if (head->ip_summed == CHECKSUM_COMPLETE) + head->csum = csum_add(head->csum, fp->csum); + head->truesize += fp->truesize; + fp = FRAG_CB(fp)->next_frag; + } + /* Move to the next run. */ + if (rbn) { + struct rb_node *rbnext = rb_next(rbn); + + fp = rb_to_skb(rbn); + rb_erase(rbn, &qp->q.rb_fragments); + rbn = rbnext; + } } sub_frag_mem_limit(qp->q.net, head->truesize); @@ -624,6 +651,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, qp->q.fragments = NULL; qp->q.rb_fragments = RB_ROOT; qp->q.fragments_tail = NULL; + qp->q.last_run_head = NULL; return 0; out_nomem: -- GitLab From 08fb833b40e361ce927c64d40e348af96996d9eb Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 13 Sep 2018 07:59:02 -0700 Subject: [PATCH 0326/2269] ip: frags: fix crash in ip_do_fragment() commit 5d407b071dc369c26a38398326ee2be53651cfe4 upstream A kernel crash occurrs when defragmented packet is fragmented in ip_do_fragment(). In defragment routine, skb_orphan() is called and skb->ip_defrag_offset is set. but skb->sk and skb->ip_defrag_offset are same union member. so that frag->sk is not NULL. Hence crash occurrs in skb->sk check routine in ip_do_fragment() when defragmented packet is fragmented. test commands: %iptables -t nat -I POSTROUTING -j MASQUERADE %hping3 192.168.4.2 -s 1000 -p 2000 -d 60000 splat looks like: [ 261.069429] kernel BUG at net/ipv4/ip_output.c:636! [ 261.075753] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI [ 261.083854] CPU: 1 PID: 1349 Comm: hping3 Not tainted 4.19.0-rc2+ #3 [ 261.100977] RIP: 0010:ip_do_fragment+0x1613/0x2600 [ 261.106945] Code: e8 e2 38 e3 fe 4c 8b 44 24 18 48 8b 74 24 08 e9 92 f6 ff ff 80 3c 02 00 0f 85 da 07 00 00 48 8b b5 d0 00 00 00 e9 25 f6 ff ff <0f> 0b 0f 0b 44 8b 54 24 58 4c 8b 4c 24 18 4c 8b 5c 24 60 4c 8b 6c [ 261.127015] RSP: 0018:ffff8801031cf2c0 EFLAGS: 00010202 [ 261.134156] RAX: 1ffff1002297537b RBX: ffffed0020639e6e RCX: 0000000000000004 [ 261.142156] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff880114ba9bd8 [ 261.150157] RBP: ffff880114ba8a40 R08: ffffed0022975395 R09: ffffed0022975395 [ 261.158157] R10: 0000000000000001 R11: ffffed0022975394 R12: ffff880114ba9ca4 [ 261.166159] R13: 0000000000000010 R14: ffff880114ba9bc0 R15: dffffc0000000000 [ 261.174169] FS: 00007fbae2199700(0000) GS:ffff88011b400000(0000) knlGS:0000000000000000 [ 261.183012] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 261.189013] CR2: 00005579244fe000 CR3: 0000000119bf4000 CR4: 00000000001006e0 [ 261.198158] Call Trace: [ 261.199018] ? dst_output+0x180/0x180 [ 261.205011] ? save_trace+0x300/0x300 [ 261.209018] ? ip_copy_metadata+0xb00/0xb00 [ 261.213034] ? sched_clock_local+0xd4/0x140 [ 261.218158] ? kill_l4proto+0x120/0x120 [nf_conntrack] [ 261.223014] ? rt_cpu_seq_stop+0x10/0x10 [ 261.227014] ? find_held_lock+0x39/0x1c0 [ 261.233008] ip_finish_output+0x51d/0xb50 [ 261.237006] ? ip_fragment.constprop.56+0x220/0x220 [ 261.243011] ? nf_ct_l4proto_register_one+0x5b0/0x5b0 [nf_conntrack] [ 261.250152] ? rcu_is_watching+0x77/0x120 [ 261.255010] ? nf_nat_ipv4_out+0x1e/0x2b0 [nf_nat_ipv4] [ 261.261033] ? nf_hook_slow+0xb1/0x160 [ 261.265007] ip_output+0x1c7/0x710 [ 261.269005] ? ip_mc_output+0x13f0/0x13f0 [ 261.273002] ? __local_bh_enable_ip+0xe9/0x1b0 [ 261.278152] ? ip_fragment.constprop.56+0x220/0x220 [ 261.282996] ? nf_hook_slow+0xb1/0x160 [ 261.287007] raw_sendmsg+0x21f9/0x4420 [ 261.291008] ? dst_output+0x180/0x180 [ 261.297003] ? sched_clock_cpu+0x126/0x170 [ 261.301003] ? find_held_lock+0x39/0x1c0 [ 261.306155] ? stop_critical_timings+0x420/0x420 [ 261.311004] ? check_flags.part.36+0x450/0x450 [ 261.315005] ? _raw_spin_unlock_irq+0x29/0x40 [ 261.320995] ? _raw_spin_unlock_irq+0x29/0x40 [ 261.326142] ? cyc2ns_read_end+0x10/0x10 [ 261.330139] ? raw_bind+0x280/0x280 [ 261.334138] ? sched_clock_cpu+0x126/0x170 [ 261.338995] ? check_flags.part.36+0x450/0x450 [ 261.342991] ? __lock_acquire+0x4500/0x4500 [ 261.348994] ? inet_sendmsg+0x11c/0x500 [ 261.352989] ? dst_output+0x180/0x180 [ 261.357012] inet_sendmsg+0x11c/0x500 [ ... ] v2: - clear skb->sk at reassembly routine.(Eric Dumarzet) Fixes: fa0f527358bd ("ip: use rb trees for IP frag queue.") Suggested-by: Eric Dumazet Signed-off-by: Taehee Yoo Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_fragment.c | 1 + net/ipv6/netfilter/nf_conntrack_reasm.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 88281fbce88ce..e7227128df2c8 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -599,6 +599,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, nextp = &fp->next; fp->prev = NULL; memset(&fp->rbnode, 0, sizeof(fp->rbnode)); + fp->sk = NULL; head->data_len += fp->len; head->len += fp->len; if (head->ip_summed != fp->ip_summed) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 82ce0d0f54bf9..2ed8536e10b64 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -453,6 +453,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; + fp->sk = NULL; } sub_frag_mem_limit(fq->q.net, head->truesize); -- GitLab From 8626c40a30937960926727a8606c6d0d8207425c Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 18 Jan 2018 14:05:05 +0000 Subject: [PATCH 0327/2269] mtd: ubi: wl: Fix error return code in ubi_wl_init() commit 7233982ade15eeac05c6f351e8d347406e6bcd2f upstream. Fix to return error code -ENOMEM from the kmem_cache_alloc() error handling case instead of 0, as done elsewhere in this function. Fixes: f78e5623f45b ("ubi: fastmap: Erase outdated anchor PEBs during attach") Signed-off-by: Wei Yongjun Reviewed-by: Boris Brezillon Signed-off-by: Richard Weinberger Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/wl.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 23a6986d512b4..a8f74d9bba4ff 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1615,8 +1615,10 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) cond_resched(); e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); - if (!e) + if (!e) { + err = -ENOMEM; goto out_free; + } e->pnum = aeb->pnum; e->ec = aeb->ec; @@ -1635,8 +1637,10 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) cond_resched(); e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); - if (!e) + if (!e) { + err = -ENOMEM; goto out_free; + } e->pnum = aeb->pnum; e->ec = aeb->ec; -- GitLab From ab75811f71815092dc4f66e283b3851dbc989b2e Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 11 May 2018 10:49:25 +0800 Subject: [PATCH 0328/2269] tun: fix use after free for ptr_ring commit b196d88aba8ac72b775137854121097f4c4c6862 upstream. We used to initialize ptr_ring during TUNSETIFF, this is because its size depends on the tx_queue_len of netdevice. And we try to clean it up when socket were detached from netdevice. A race were spotted when trying to do uninit during a read which will lead a use after free for pointer ring. Solving this by always initialize a zero size ptr_ring in open() and do resizing during TUNSETIFF, and then we can safely do cleanup during close(). With this, there's no need for the workaround that was introduced by commit 4df0bfc79904 ("tun: fix a memory leak for tfile->tx_array"). Backport Note :- Comparison with the upstream patch: [1] A "semantic revert" of the changes made in 4df0bfc799("tun: fix a memory leak for tfile->tx_array"). 4df0bfc799 was applied upstream, and then skb array was changed to use ptr_ring. The upstream patch then removes the changes introduced by 4df0bfc799. This backport does the same; "revert" the changes made by 4df0bfc799. [2] xdp_rxq_info_unreg() being called in relevant locations As xdp_rxq_info related patches are not present in 4.14, these changes are not needed in the backport. [3] An instance of ptr_ring_init needs to be replaced by skb_array_init Inside tun_attach() [4] ptr_ring_cleanup needs to be replaced by skb_array_cleanup Inside tun_chr_close() Note that the backport for 7063efd33b ("tuntap: fix use after free during release") needs to be applied on top of this patch. Reported-by: syzbot+e8b902c3c3fadf0a9dba@syzkaller.appspotmail.com Cc: Eric Dumazet Cc: Cong Wang Cc: Michael S. Tsirkin Fixes: 1576d9860599 ("tun: switch to use skb array for tx") Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Zubin Mithra Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index cb17ffadfc30e..87a989be1cef8 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -534,14 +534,6 @@ static void tun_queue_purge(struct tun_file *tfile) skb_queue_purge(&tfile->sk.sk_error_queue); } -static void tun_cleanup_tx_array(struct tun_file *tfile) -{ - if (tfile->tx_array.ring.queue) { - skb_array_cleanup(&tfile->tx_array); - memset(&tfile->tx_array, 0, sizeof(tfile->tx_array)); - } -} - static void __tun_detach(struct tun_file *tfile, bool clean) { struct tun_file *ntfile; @@ -583,7 +575,6 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun->dev->reg_state == NETREG_REGISTERED) unregister_netdevice(tun->dev); } - tun_cleanup_tx_array(tfile); sock_put(&tfile->sk); } } @@ -623,13 +614,11 @@ static void tun_detach_all(struct net_device *dev) /* Drop read queue */ tun_queue_purge(tfile); sock_put(&tfile->sk); - tun_cleanup_tx_array(tfile); } list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { tun_enable_queue(tfile); tun_queue_purge(tfile); sock_put(&tfile->sk); - tun_cleanup_tx_array(tfile); } BUG_ON(tun->numdisabled != 0); @@ -675,7 +664,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte } if (!tfile->detached && - skb_array_init(&tfile->tx_array, dev->tx_queue_len, GFP_KERNEL)) { + skb_array_resize(&tfile->tx_array, dev->tx_queue_len, GFP_KERNEL)) { err = -ENOMEM; goto out; } @@ -2624,6 +2613,11 @@ static int tun_chr_open(struct inode *inode, struct file * file) &tun_proto, 0); if (!tfile) return -ENOMEM; + if (skb_array_init(&tfile->tx_array, 0, GFP_KERNEL)) { + sk_free(&tfile->sk); + return -ENOMEM; + } + RCU_INIT_POINTER(tfile->tun, NULL); tfile->flags = 0; tfile->ifindex = 0; @@ -2644,8 +2638,6 @@ static int tun_chr_open(struct inode *inode, struct file * file) sock_set_flag(&tfile->sk, SOCK_ZEROCOPY); - memset(&tfile->tx_array, 0, sizeof(tfile->tx_array)); - return 0; } @@ -2654,6 +2646,7 @@ static int tun_chr_close(struct inode *inode, struct file *file) struct tun_file *tfile = file->private_data; tun_detach(tfile, true); + skb_array_cleanup(&tfile->tx_array); return 0; } -- GitLab From daf0ca743b28050664c7bd03b6fb46a95d94d248 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 16 May 2018 20:39:33 +0800 Subject: [PATCH 0329/2269] tuntap: fix use after free during release commit 7063efd33bb15abc0160347f89eb5aba6b7d000e upstream. After commit b196d88aba8a ("tun: fix use after free for ptr_ring") we need clean up tx ring during release(). But unfortunately, it tries to do the cleanup blindly after socket were destroyed which will lead another use-after-free. Fix this by doing the cleanup before dropping the last reference of the socket in __tun_detach(). Backport Note :- Upstream commit moves the ptr_ring_cleanup call from tun_chr_close to __tun_detach. Upstream applied that patch after replacing skb_array with ptr_ring. This patch moves the skb_array_cleanup call from tun_chr_close to __tun_detach. Reported-by: Andrei Vagin Acked-by: Andrei Vagin Fixes: b196d88aba8a ("tun: fix use after free for ptr_ring") Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Zubin Mithra Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 87a989be1cef8..e0baea2dfd3cb 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -575,6 +575,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun->dev->reg_state == NETREG_REGISTERED) unregister_netdevice(tun->dev); } + skb_array_cleanup(&tfile->tx_array); sock_put(&tfile->sk); } } @@ -2646,7 +2647,6 @@ static int tun_chr_close(struct inode *inode, struct file *file) struct tun_file *tfile = file->private_data; tun_detach(tfile, true); - skb_array_cleanup(&tfile->tx_array); return 0; } -- GitLab From 8b34a7b14ee73ed625c1d32ae3f5e59ee22fc709 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 21 Aug 2018 21:51:45 -0700 Subject: [PATCH 0330/2269] autofs: fix autofs_sbi() does not check super block type commit 0633da48f0793aeba27f82d30605624416723a91 upstream. autofs_sbi() does not check the superblock magic number to verify it has been given an autofs super block. Backport Note: autofs4 has been renamed to autofs upstream. As a result the upstream patch does not apply cleanly onto 4.14.y. Link: http://lkml.kernel.org/r/153475422934.17131.7563724552005298277.stgit@pluto.themaw.net Reported-by: Signed-off-by: Ian Kent Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Zubin Mithra Signed-off-by: Greg Kroah-Hartman --- fs/autofs4/autofs_i.h | 4 +++- fs/autofs4/inode.c | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 4737615f0eaaa..ce696d6c46412 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -26,6 +26,7 @@ #include #include #include +#include /* This is the range of ioctl() numbers we claim as ours */ #define AUTOFS_IOC_FIRST AUTOFS_IOC_READY @@ -124,7 +125,8 @@ struct autofs_sb_info { static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb) { - return (struct autofs_sb_info *)(sb->s_fs_info); + return sb->s_magic != AUTOFS_SUPER_MAGIC ? + NULL : (struct autofs_sb_info *)(sb->s_fs_info); } static inline struct autofs_info *autofs4_dentry_ino(struct dentry *dentry) diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 09e7d68dff02d..3c7e727612fa3 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -14,7 +14,6 @@ #include #include #include -#include #include "autofs_i.h" #include -- GitLab From 06274364edb4407b386a996a7ff46c3ca3459b70 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 12 Sep 2018 23:57:48 -1000 Subject: [PATCH 0331/2269] mm: get rid of vmacache_flush_all() entirely commit 7a9cdebdcc17e426fb5287e4a82db1dfe86339b2 upstream. Jann Horn points out that the vmacache_flush_all() function is not only potentially expensive, it's buggy too. It also happens to be entirely unnecessary, because the sequence number overflow case can be avoided by simply making the sequence number be 64-bit. That doesn't even grow the data structures in question, because the other adjacent fields are already 64-bit. So simplify the whole thing by just making the sequence number overflow case go away entirely, which gets rid of all the complications and makes the code faster too. Win-win. [ Oleg Nesterov points out that the VMACACHE_FULL_FLUSHES statistics also just goes away entirely with this ] Reported-by: Jann Horn Suggested-by: Will Deacon Acked-by: Davidlohr Bueso Cc: Oleg Nesterov Cc: stable@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/mm_types.h | 2 +- include/linux/mm_types_task.h | 2 +- include/linux/vm_event_item.h | 1 - include/linux/vmacache.h | 5 ----- mm/debug.c | 4 ++-- mm/vmacache.c | 38 ----------------------------------- 6 files changed, 4 insertions(+), 48 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 9f0bb908e2b5f..e41ef532c4ce0 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -354,7 +354,7 @@ struct kioctx_table; struct mm_struct { struct vm_area_struct *mmap; /* list of VMAs */ struct rb_root mm_rb; - u32 vmacache_seqnum; /* per-thread vmacache */ + u64 vmacache_seqnum; /* per-thread vmacache */ #ifdef CONFIG_MMU unsigned long (*get_unmapped_area) (struct file *filp, unsigned long addr, unsigned long len, diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h index 5fe87687664c7..d7016dcb245ee 100644 --- a/include/linux/mm_types_task.h +++ b/include/linux/mm_types_task.h @@ -32,7 +32,7 @@ #define VMACACHE_MASK (VMACACHE_SIZE - 1) struct vmacache { - u32 seqnum; + u64 seqnum; struct vm_area_struct *vmas[VMACACHE_SIZE]; }; diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 5c7f010676a74..47a3441cf4c4a 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -105,7 +105,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_DEBUG_VM_VMACACHE VMACACHE_FIND_CALLS, VMACACHE_FIND_HITS, - VMACACHE_FULL_FLUSHES, #endif #ifdef CONFIG_SWAP SWAP_RA, diff --git a/include/linux/vmacache.h b/include/linux/vmacache.h index a5b3aa8d281f8..a09b28f76460b 100644 --- a/include/linux/vmacache.h +++ b/include/linux/vmacache.h @@ -16,7 +16,6 @@ static inline void vmacache_flush(struct task_struct *tsk) memset(tsk->vmacache.vmas, 0, sizeof(tsk->vmacache.vmas)); } -extern void vmacache_flush_all(struct mm_struct *mm); extern void vmacache_update(unsigned long addr, struct vm_area_struct *newvma); extern struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr); @@ -30,10 +29,6 @@ extern struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm, static inline void vmacache_invalidate(struct mm_struct *mm) { mm->vmacache_seqnum++; - - /* deal with overflows */ - if (unlikely(mm->vmacache_seqnum == 0)) - vmacache_flush_all(mm); } #endif /* __LINUX_VMACACHE_H */ diff --git a/mm/debug.c b/mm/debug.c index 6726bec731c95..c55abc893fdcf 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -100,7 +100,7 @@ EXPORT_SYMBOL(dump_vma); void dump_mm(const struct mm_struct *mm) { - pr_emerg("mm %p mmap %p seqnum %d task_size %lu\n" + pr_emerg("mm %p mmap %p seqnum %llu task_size %lu\n" #ifdef CONFIG_MMU "get_unmapped_area %p\n" #endif @@ -128,7 +128,7 @@ void dump_mm(const struct mm_struct *mm) "tlb_flush_pending %d\n" "def_flags: %#lx(%pGv)\n", - mm, mm->mmap, mm->vmacache_seqnum, mm->task_size, + mm, mm->mmap, (long long) mm->vmacache_seqnum, mm->task_size, #ifdef CONFIG_MMU mm->get_unmapped_area, #endif diff --git a/mm/vmacache.c b/mm/vmacache.c index db7596eb6132e..f1729617dc85c 100644 --- a/mm/vmacache.c +++ b/mm/vmacache.c @@ -7,44 +7,6 @@ #include #include -/* - * Flush vma caches for threads that share a given mm. - * - * The operation is safe because the caller holds the mmap_sem - * exclusively and other threads accessing the vma cache will - * have mmap_sem held at least for read, so no extra locking - * is required to maintain the vma cache. - */ -void vmacache_flush_all(struct mm_struct *mm) -{ - struct task_struct *g, *p; - - count_vm_vmacache_event(VMACACHE_FULL_FLUSHES); - - /* - * Single threaded tasks need not iterate the entire - * list of process. We can avoid the flushing as well - * since the mm's seqnum was increased and don't have - * to worry about other threads' seqnum. Current's - * flush will occur upon the next lookup. - */ - if (atomic_read(&mm->mm_users) == 1) - return; - - rcu_read_lock(); - for_each_process_thread(g, p) { - /* - * Only flush the vmacache pointers as the - * mm seqnum is already set and curr's will - * be set upon invalidation when the next - * lookup is done. - */ - if (mm == p->mm) - vmacache_flush(p); - } - rcu_read_unlock(); -} - /* * This task may be accessing a foreign mm via (for example) * get_user_pages()->find_vma(). The vmacache is task-local and this -- GitLab From 1244bbb3e92135d247e2dddfa6fe5e3e171a9635 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 19 Sep 2018 22:43:49 +0200 Subject: [PATCH 0332/2269] Linux 4.14.71 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index aa458afa7fa2c..dd4eaeeb2050d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 70 +SUBLEVEL = 71 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 666c420fa3ea13f407550576c08f592d0f7d8202 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Sat, 25 Aug 2018 13:50:56 -0700 Subject: [PATCH 0333/2269] FROMLIST: ANDROID: binder: Add BINDER_GET_NODE_INFO_FOR_REF ioctl. This allows the context manager to retrieve information about nodes that it holds a reference to, such as the current number of references to those nodes. Such information can for example be used to determine whether the servicemanager is the only process holding a reference to a node. This information can then be passed on to the process holding the node, which can in turn decide whether it wants to shut down to reduce resource usage. Signed-off-by: Martijn Coenen --- drivers/android/binder.c | 55 +++++++++++++++++++++++++++++ include/uapi/linux/android/binder.h | 10 ++++++ 2 files changed, 65 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 9c06e7f46d7f0..dcd75ed81d003 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -4712,6 +4712,42 @@ out: return ret; } +static int binder_ioctl_get_node_info_for_ref(struct binder_proc *proc, + struct binder_node_info_for_ref *info) +{ + struct binder_node *node; + struct binder_context *context = proc->context; + __u32 handle = info->handle; + + if (info->strong_count || info->weak_count || info->reserved1 || + info->reserved2 || info->reserved3) { + binder_user_error("%d BINDER_GET_NODE_INFO_FOR_REF: only handle may be non-zero.", + proc->pid); + return -EINVAL; + } + + /* This ioctl may only be used by the context manager */ + mutex_lock(&context->context_mgr_node_lock); + if (!context->binder_context_mgr_node || + context->binder_context_mgr_node->proc != proc) { + mutex_unlock(&context->context_mgr_node_lock); + return -EPERM; + } + mutex_unlock(&context->context_mgr_node_lock); + + node = binder_get_node_from_ref(proc, handle, true, NULL); + if (!node) + return -EINVAL; + + info->strong_count = node->local_strong_refs + + node->internal_strong_refs; + info->weak_count = node->local_weak_refs; + + binder_put_node(node); + + return 0; +} + static int binder_ioctl_get_node_debug_info(struct binder_proc *proc, struct binder_node_debug_info *info) { @@ -4806,6 +4842,25 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } break; } + case BINDER_GET_NODE_INFO_FOR_REF: { + struct binder_node_info_for_ref info; + + if (copy_from_user(&info, ubuf, sizeof(info))) { + ret = -EFAULT; + goto err; + } + + ret = binder_ioctl_get_node_info_for_ref(proc, &info); + if (ret < 0) + goto err; + + if (copy_to_user(ubuf, &info, sizeof(info))) { + ret = -EFAULT; + goto err; + } + + break; + } case BINDER_GET_NODE_DEBUG_INFO: { struct binder_node_debug_info info; diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index b4723e36b6cf2..4a1c285b97b19 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -247,6 +247,15 @@ struct binder_node_debug_info { __u32 has_weak_ref; }; +struct binder_node_info_for_ref { + __u32 handle; + __u32 strong_count; + __u32 weak_count; + __u32 reserved1; + __u32 reserved2; + __u32 reserved3; +}; + #define BINDER_WRITE_READ _IOWR('b', 1, struct binder_write_read) #define BINDER_SET_IDLE_TIMEOUT _IOW('b', 3, __s64) #define BINDER_SET_MAX_THREADS _IOW('b', 5, __u32) @@ -255,6 +264,7 @@ struct binder_node_debug_info { #define BINDER_THREAD_EXIT _IOW('b', 8, __s32) #define BINDER_VERSION _IOWR('b', 9, struct binder_version) #define BINDER_GET_NODE_DEBUG_INFO _IOWR('b', 11, struct binder_node_debug_info) +#define BINDER_GET_NODE_INFO_FOR_REF _IOWR('b', 12, struct binder_node_info_for_ref) /* * NOTE: Two special error codes you should check for when calling -- GitLab From 799d8f1f0de86caf67af23324bfc54997a2d277f Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Wed, 5 Sep 2018 14:37:45 +0200 Subject: [PATCH 0334/2269] be2net: Fix memory leak in be_cmd_get_profile_config() [ Upstream commit 9d7f19dc4673fbafebfcbf30eb90e09fa7d1c037 ] DMA allocated memory is lost in be_cmd_get_profile_config() when we call it with non-NULL port_res parameter. Signed-off-by: Petr Oros Reviewed-by: Ivan Vecera Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/emulex/benet/be_cmds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 02dd5246dfae9..1589a568bfe0a 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -4500,7 +4500,7 @@ int be_cmd_get_profile_config(struct be_adapter *adapter, port_res->max_vfs += le16_to_cpu(pcie->num_vfs); } } - return status; + goto err; } pcie = be_get_pcie_desc(resp->func_param, desc_count, -- GitLab From 47f74ff0024397550a2fe5ecef7db3638223bafb Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sun, 5 Aug 2018 09:19:33 +0300 Subject: [PATCH 0335/2269] net/mlx5: Fix use-after-free in self-healing flow [ Upstream commit 76d5581c870454be5f1f1a106c57985902e7ea20 ] When the mlx5 health mechanism detects a problem while the driver is in the middle of init_one or remove_one, the driver needs to prevent the health mechanism from scheduling future work; if future work is scheduled, there is a problem with use-after-free: the system WQ tries to run the work item (which has been freed) at the scheduled future time. Prevent this by disabling work item scheduling in the health mechanism when the driver is in the middle of init_one() or remove_one(). Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Jack Morgenstein Reviewed-by: Feras Daoud Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 10 +++++++++- drivers/net/ethernet/mellanox/mlx5/core/main.c | 6 +++--- include/linux/mlx5/driver.h | 2 +- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index db86e1506c8b6..61f284966a8c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -333,9 +333,17 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) add_timer(&health->timer); } -void mlx5_stop_health_poll(struct mlx5_core_dev *dev) +void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) { struct mlx5_core_health *health = &dev->priv.health; + unsigned long flags; + + if (disable_health) { + spin_lock_irqsave(&health->wq_lock, flags); + set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); + set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); + spin_unlock_irqrestore(&health->wq_lock, flags); + } del_timer_sync(&health->timer); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 4ddd632d10f99..68e48b1884588 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1227,7 +1227,7 @@ err_cleanup_once: mlx5_cleanup_once(dev); err_stop_poll: - mlx5_stop_health_poll(dev); + mlx5_stop_health_poll(dev, boot); if (mlx5_cmd_teardown_hca(dev)) { dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n"); goto out_err; @@ -1286,7 +1286,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_free_irq_vectors(dev); if (cleanup) mlx5_cleanup_once(dev); - mlx5_stop_health_poll(dev); + mlx5_stop_health_poll(dev, cleanup); err = mlx5_cmd_teardown_hca(dev); if (err) { dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n"); @@ -1548,7 +1548,7 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev) * with the HCA, so the health polll is no longer needed. */ mlx5_drain_health_wq(dev); - mlx5_stop_health_poll(dev); + mlx5_stop_health_poll(dev, false); ret = mlx5_cmd_force_teardown_hca(dev); if (ret) { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f2f9e957bf1b5..c4d19e77fea8b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -950,7 +950,7 @@ int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn); void mlx5_health_cleanup(struct mlx5_core_dev *dev); int mlx5_health_init(struct mlx5_core_dev *dev); void mlx5_start_health_poll(struct mlx5_core_dev *dev); -void mlx5_stop_health_poll(struct mlx5_core_dev *dev); +void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health); void mlx5_drain_health_wq(struct mlx5_core_dev *dev); void mlx5_trigger_health_work(struct mlx5_core_dev *dev); void mlx5_drain_health_recovery(struct mlx5_core_dev *dev); -- GitLab From e4df3c97c379a7b0d067a954c9f84559d0a73c8e Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 5 Sep 2018 15:23:18 +0200 Subject: [PATCH 0336/2269] net: qca_spi: Fix race condition in spi transfers [ Upstream commit e65a9e480e91ddf9e15155454d370cead64689c8 ] With performance optimization the spi transfer and messages of basic register operations like qcaspi_read_register moved into the private driver structure. But they weren't protected against mutual access (e.g. between driver kthread and ethtool). So dumping the QCA7000 registers via ethtool during network traffic could make spi_sync hang forever, because the completion in spi_message is overwritten. So revert the optimization completely. Fixes: 291ab06ecf676 ("net: qualcomm: new Ethernet over SPI driver for QCA700") Signed-off-by: Stefan Wahren Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qualcomm/qca_7k.c | 76 ++++++++-------- drivers/net/ethernet/qualcomm/qca_spi.c | 110 ++++++++++++------------ drivers/net/ethernet/qualcomm/qca_spi.h | 5 -- 3 files changed, 93 insertions(+), 98 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/qca_7k.c b/drivers/net/ethernet/qualcomm/qca_7k.c index ffe7a16bdfc84..6c8543fb90c0a 100644 --- a/drivers/net/ethernet/qualcomm/qca_7k.c +++ b/drivers/net/ethernet/qualcomm/qca_7k.c @@ -45,34 +45,33 @@ qcaspi_read_register(struct qcaspi *qca, u16 reg, u16 *result) { __be16 rx_data; __be16 tx_data; - struct spi_transfer *transfer; - struct spi_message *msg; + struct spi_transfer transfer[2]; + struct spi_message msg; int ret; + memset(transfer, 0, sizeof(transfer)); + + spi_message_init(&msg); + tx_data = cpu_to_be16(QCA7K_SPI_READ | QCA7K_SPI_INTERNAL | reg); + *result = 0; + + transfer[0].tx_buf = &tx_data; + transfer[0].len = QCASPI_CMD_LEN; + transfer[1].rx_buf = &rx_data; + transfer[1].len = QCASPI_CMD_LEN; + + spi_message_add_tail(&transfer[0], &msg); if (qca->legacy_mode) { - msg = &qca->spi_msg1; - transfer = &qca->spi_xfer1; - transfer->tx_buf = &tx_data; - transfer->rx_buf = NULL; - transfer->len = QCASPI_CMD_LEN; - spi_sync(qca->spi_dev, msg); - } else { - msg = &qca->spi_msg2; - transfer = &qca->spi_xfer2[0]; - transfer->tx_buf = &tx_data; - transfer->rx_buf = NULL; - transfer->len = QCASPI_CMD_LEN; - transfer = &qca->spi_xfer2[1]; + spi_sync(qca->spi_dev, &msg); + spi_message_init(&msg); } - transfer->tx_buf = NULL; - transfer->rx_buf = &rx_data; - transfer->len = QCASPI_CMD_LEN; - ret = spi_sync(qca->spi_dev, msg); + spi_message_add_tail(&transfer[1], &msg); + ret = spi_sync(qca->spi_dev, &msg); if (!ret) - ret = msg->status; + ret = msg.status; if (ret) qcaspi_spi_error(qca); @@ -86,35 +85,32 @@ int qcaspi_write_register(struct qcaspi *qca, u16 reg, u16 value) { __be16 tx_data[2]; - struct spi_transfer *transfer; - struct spi_message *msg; + struct spi_transfer transfer[2]; + struct spi_message msg; int ret; + memset(&transfer, 0, sizeof(transfer)); + + spi_message_init(&msg); + tx_data[0] = cpu_to_be16(QCA7K_SPI_WRITE | QCA7K_SPI_INTERNAL | reg); tx_data[1] = cpu_to_be16(value); + transfer[0].tx_buf = &tx_data[0]; + transfer[0].len = QCASPI_CMD_LEN; + transfer[1].tx_buf = &tx_data[1]; + transfer[1].len = QCASPI_CMD_LEN; + + spi_message_add_tail(&transfer[0], &msg); if (qca->legacy_mode) { - msg = &qca->spi_msg1; - transfer = &qca->spi_xfer1; - transfer->tx_buf = &tx_data[0]; - transfer->rx_buf = NULL; - transfer->len = QCASPI_CMD_LEN; - spi_sync(qca->spi_dev, msg); - } else { - msg = &qca->spi_msg2; - transfer = &qca->spi_xfer2[0]; - transfer->tx_buf = &tx_data[0]; - transfer->rx_buf = NULL; - transfer->len = QCASPI_CMD_LEN; - transfer = &qca->spi_xfer2[1]; + spi_sync(qca->spi_dev, &msg); + spi_message_init(&msg); } - transfer->tx_buf = &tx_data[1]; - transfer->rx_buf = NULL; - transfer->len = QCASPI_CMD_LEN; - ret = spi_sync(qca->spi_dev, msg); + spi_message_add_tail(&transfer[1], &msg); + ret = spi_sync(qca->spi_dev, &msg); if (!ret) - ret = msg->status; + ret = msg.status; if (ret) qcaspi_spi_error(qca); diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index b1f5f0b8e546e..275fc6f154a71 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -99,22 +99,24 @@ static u32 qcaspi_write_burst(struct qcaspi *qca, u8 *src, u32 len) { __be16 cmd; - struct spi_message *msg = &qca->spi_msg2; - struct spi_transfer *transfer = &qca->spi_xfer2[0]; + struct spi_message msg; + struct spi_transfer transfer[2]; int ret; + memset(&transfer, 0, sizeof(transfer)); + spi_message_init(&msg); + cmd = cpu_to_be16(QCA7K_SPI_WRITE | QCA7K_SPI_EXTERNAL); - transfer->tx_buf = &cmd; - transfer->rx_buf = NULL; - transfer->len = QCASPI_CMD_LEN; - transfer = &qca->spi_xfer2[1]; - transfer->tx_buf = src; - transfer->rx_buf = NULL; - transfer->len = len; + transfer[0].tx_buf = &cmd; + transfer[0].len = QCASPI_CMD_LEN; + transfer[1].tx_buf = src; + transfer[1].len = len; - ret = spi_sync(qca->spi_dev, msg); + spi_message_add_tail(&transfer[0], &msg); + spi_message_add_tail(&transfer[1], &msg); + ret = spi_sync(qca->spi_dev, &msg); - if (ret || (msg->actual_length != QCASPI_CMD_LEN + len)) { + if (ret || (msg.actual_length != QCASPI_CMD_LEN + len)) { qcaspi_spi_error(qca); return 0; } @@ -125,17 +127,20 @@ qcaspi_write_burst(struct qcaspi *qca, u8 *src, u32 len) static u32 qcaspi_write_legacy(struct qcaspi *qca, u8 *src, u32 len) { - struct spi_message *msg = &qca->spi_msg1; - struct spi_transfer *transfer = &qca->spi_xfer1; + struct spi_message msg; + struct spi_transfer transfer; int ret; - transfer->tx_buf = src; - transfer->rx_buf = NULL; - transfer->len = len; + memset(&transfer, 0, sizeof(transfer)); + spi_message_init(&msg); + + transfer.tx_buf = src; + transfer.len = len; - ret = spi_sync(qca->spi_dev, msg); + spi_message_add_tail(&transfer, &msg); + ret = spi_sync(qca->spi_dev, &msg); - if (ret || (msg->actual_length != len)) { + if (ret || (msg.actual_length != len)) { qcaspi_spi_error(qca); return 0; } @@ -146,23 +151,25 @@ qcaspi_write_legacy(struct qcaspi *qca, u8 *src, u32 len) static u32 qcaspi_read_burst(struct qcaspi *qca, u8 *dst, u32 len) { - struct spi_message *msg = &qca->spi_msg2; + struct spi_message msg; __be16 cmd; - struct spi_transfer *transfer = &qca->spi_xfer2[0]; + struct spi_transfer transfer[2]; int ret; + memset(&transfer, 0, sizeof(transfer)); + spi_message_init(&msg); + cmd = cpu_to_be16(QCA7K_SPI_READ | QCA7K_SPI_EXTERNAL); - transfer->tx_buf = &cmd; - transfer->rx_buf = NULL; - transfer->len = QCASPI_CMD_LEN; - transfer = &qca->spi_xfer2[1]; - transfer->tx_buf = NULL; - transfer->rx_buf = dst; - transfer->len = len; + transfer[0].tx_buf = &cmd; + transfer[0].len = QCASPI_CMD_LEN; + transfer[1].rx_buf = dst; + transfer[1].len = len; - ret = spi_sync(qca->spi_dev, msg); + spi_message_add_tail(&transfer[0], &msg); + spi_message_add_tail(&transfer[1], &msg); + ret = spi_sync(qca->spi_dev, &msg); - if (ret || (msg->actual_length != QCASPI_CMD_LEN + len)) { + if (ret || (msg.actual_length != QCASPI_CMD_LEN + len)) { qcaspi_spi_error(qca); return 0; } @@ -173,17 +180,20 @@ qcaspi_read_burst(struct qcaspi *qca, u8 *dst, u32 len) static u32 qcaspi_read_legacy(struct qcaspi *qca, u8 *dst, u32 len) { - struct spi_message *msg = &qca->spi_msg1; - struct spi_transfer *transfer = &qca->spi_xfer1; + struct spi_message msg; + struct spi_transfer transfer; int ret; - transfer->tx_buf = NULL; - transfer->rx_buf = dst; - transfer->len = len; + memset(&transfer, 0, sizeof(transfer)); + spi_message_init(&msg); - ret = spi_sync(qca->spi_dev, msg); + transfer.rx_buf = dst; + transfer.len = len; - if (ret || (msg->actual_length != len)) { + spi_message_add_tail(&transfer, &msg); + ret = spi_sync(qca->spi_dev, &msg); + + if (ret || (msg.actual_length != len)) { qcaspi_spi_error(qca); return 0; } @@ -195,19 +205,23 @@ static int qcaspi_tx_cmd(struct qcaspi *qca, u16 cmd) { __be16 tx_data; - struct spi_message *msg = &qca->spi_msg1; - struct spi_transfer *transfer = &qca->spi_xfer1; + struct spi_message msg; + struct spi_transfer transfer; int ret; + memset(&transfer, 0, sizeof(transfer)); + + spi_message_init(&msg); + tx_data = cpu_to_be16(cmd); - transfer->len = sizeof(tx_data); - transfer->tx_buf = &tx_data; - transfer->rx_buf = NULL; + transfer.len = sizeof(cmd); + transfer.tx_buf = &tx_data; + spi_message_add_tail(&transfer, &msg); - ret = spi_sync(qca->spi_dev, msg); + ret = spi_sync(qca->spi_dev, &msg); if (!ret) - ret = msg->status; + ret = msg.status; if (ret) qcaspi_spi_error(qca); @@ -836,16 +850,6 @@ qcaspi_netdev_setup(struct net_device *dev) qca = netdev_priv(dev); memset(qca, 0, sizeof(struct qcaspi)); - memset(&qca->spi_xfer1, 0, sizeof(struct spi_transfer)); - memset(&qca->spi_xfer2, 0, sizeof(struct spi_transfer) * 2); - - spi_message_init(&qca->spi_msg1); - spi_message_add_tail(&qca->spi_xfer1, &qca->spi_msg1); - - spi_message_init(&qca->spi_msg2); - spi_message_add_tail(&qca->spi_xfer2[0], &qca->spi_msg2); - spi_message_add_tail(&qca->spi_xfer2[1], &qca->spi_msg2); - memset(&qca->txr, 0, sizeof(qca->txr)); qca->txr.count = TX_RING_MAX_LEN; } diff --git a/drivers/net/ethernet/qualcomm/qca_spi.h b/drivers/net/ethernet/qualcomm/qca_spi.h index fc4beb1b32d1a..fc0e98726b361 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.h +++ b/drivers/net/ethernet/qualcomm/qca_spi.h @@ -83,11 +83,6 @@ struct qcaspi { struct tx_ring txr; struct qcaspi_stats stats; - struct spi_message spi_msg1; - struct spi_message spi_msg2; - struct spi_transfer spi_xfer1; - struct spi_transfer spi_xfer2[2]; - u8 *rx_buffer; u32 buffer_size; u8 sync; -- GitLab From 5ff9c51cbd66e53b70ce50c6ff59908e54576f51 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 10 Sep 2018 18:27:26 -0700 Subject: [PATCH 0337/2269] rds: fix two RCU related problems [ Upstream commit cc4dfb7f70a344f24c1c71e298deea0771dadcb2 ] When a rds sock is bound, it is inserted into the bind_hash_table which is protected by RCU. But when releasing rds sock, after it is removed from this hash table, it is freed immediately without respecting RCU grace period. This could cause some use-after-free as reported by syzbot. Mark the rds sock with SOCK_RCU_FREE before inserting it into the bind_hash_table, so that it would be always freed after a RCU grace period. The other problem is in rds_find_bound(), the rds sock could be freed in between rhashtable_lookup_fast() and rds_sock_addref(), so we need to extend RCU read lock protection in rds_find_bound() to close this race condition. Reported-and-tested-by: syzbot+8967084bcac563795dc6@syzkaller.appspotmail.com Reported-by: syzbot+93a5839deb355537440f@syzkaller.appspotmail.com Cc: Sowmini Varadhan Cc: Santosh Shilimkar Cc: rds-devel@oss.oracle.com Signed-off-by: Cong Wang Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/bind.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/rds/bind.c b/net/rds/bind.c index 5aa3a64aa4f0e..48257d3a42015 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -60,11 +60,13 @@ struct rds_sock *rds_find_bound(__be32 addr, __be16 port) u64 key = ((u64)addr << 32) | port; struct rds_sock *rs; - rs = rhashtable_lookup_fast(&bind_hash_table, &key, ht_parms); + rcu_read_lock(); + rs = rhashtable_lookup(&bind_hash_table, &key, ht_parms); if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) rds_sock_addref(rs); else rs = NULL; + rcu_read_unlock(); rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr, ntohs(port)); @@ -157,6 +159,7 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; } + sock_set_flag(sk, SOCK_RCU_FREE); ret = rds_add_bound(rs, sin->sin_addr.s_addr, &sin->sin_port); if (ret) goto out; -- GitLab From 571f1f6862bf8dfdffe2e833089122a75a19a7b8 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Wed, 15 Aug 2018 11:08:48 -0500 Subject: [PATCH 0338/2269] net/mlx5: Check for error in mlx5_attach_interface [ Upstream commit 47bc94b82291e007da61ee1b3d18c77871f3e158 ] Currently, mlx5_attach_interface does not check for error after calling intf->attach or intf->add. When these two calls fails, the client is not initialized and will cause issues such as kernel panic on invalid address in the teardown path (mlx5_detach_interface) Fixes: 737a234bb638 ("net/mlx5: Introduce attach/detach to interface API") Signed-off-by: Huy Nguyen Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 17b723218b0c0..9f9c9ff107354 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -132,11 +132,11 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) delayed_event_start(priv); dev_ctx->context = intf->add(dev); - set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); - if (intf->attach) - set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); - if (dev_ctx->context) { + set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); + if (intf->attach) + set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); + spin_lock_irq(&priv->ctx_lock); list_add_tail(&dev_ctx->list, &priv->ctx_list); @@ -211,12 +211,17 @@ static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv if (intf->attach) { if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) goto out; - intf->attach(dev, dev_ctx->context); + if (intf->attach(dev, dev_ctx->context)) + goto out; + set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); } else { if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) goto out; dev_ctx->context = intf->add(dev); + if (!dev_ctx->context) + goto out; + set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); } -- GitLab From f8ce022004ca10d218c50f7822726fcd651431d5 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 7 Aug 2018 09:59:03 +0300 Subject: [PATCH 0339/2269] net/mlx5: Fix debugfs cleanup in the device init/remove flow [ Upstream commit 5df816e7f43f1297c40021ef17ec6e722b45c82f ] When initializing the device (procedure init_one), the driver calls mlx5_pci_init to perform pci initialization. As part of this initialization, mlx5_pci_init creates a debugfs directory. If this creation fails, init_one aborts, returning failure to the caller (which is the probe method caller). The main reason for such a failure to occur is if the debugfs directory already exists. This can happen if the last time mlx5_pci_close was called, debugfs_remove (silently) failed due to the debugfs directory not being empty. Guarantee that such a debugfs_remove failure will not occur by instead calling debugfs_remove_recursive in procedure mlx5_pci_close. Fixes: 59211bd3b632 ("net/mlx5: Split the load/unload flow into hardware and software flows") Signed-off-by: Jack Morgenstein Reviewed-by: Daniel Jurgens Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 68e48b1884588..e99f1382a4f0c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -857,8 +857,10 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) priv->numa_node = dev_to_node(&dev->pdev->dev); priv->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), mlx5_debugfs_root); - if (!priv->dbg_root) + if (!priv->dbg_root) { + dev_err(&pdev->dev, "Cannot create debugfs dir, aborting\n"); return -ENOMEM; + } err = mlx5_pci_enable_device(dev); if (err) { @@ -907,7 +909,7 @@ static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv) pci_clear_master(dev->pdev); release_bar(dev->pdev); mlx5_pci_disable_device(dev); - debugfs_remove(priv->dbg_root); + debugfs_remove_recursive(priv->dbg_root); } static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) -- GitLab From 1de5a956681d09a1c64d569bf5c799ba1e645522 Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Tue, 21 Aug 2018 15:22:42 +0300 Subject: [PATCH 0340/2269] net/mlx5: E-Switch, Fix memory leak when creating switchdev mode FDB tables [ Upstream commit c88a026e01219488e745f4f0267fd76c2bb68421 ] The memory allocated for the slow path table flow group input structure was not freed upon successful return, fix that. Fixes: 1967ce6ea5c8 ("net/mlx5: E-Switch, Refactor fast path FDB table creation in switchdev mode") Signed-off-by: Raed Salem Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index c699055c0ffde..4b52b722135d9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -557,6 +557,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) if (err) goto miss_rule_err; + kvfree(flow_group_in); return 0; miss_rule_err: -- GitLab From 04f625fc5a68905f47349ec54538eff5523f29a5 Mon Sep 17 00:00:00 2001 From: Vakul Garg Date: Thu, 6 Sep 2018 21:41:40 +0530 Subject: [PATCH 0341/2269] net/tls: Set count of SG entries if sk_alloc_sg returns -ENOSPC [ Upstream commit 52ea992cfac357b73180d5c051dca43bc8d20c2a ] tls_sw_sendmsg() allocates plaintext and encrypted SG entries using function sk_alloc_sg(). In case the number of SG entries hit MAX_SKB_FRAGS, sk_alloc_sg() returns -ENOSPC and sets the variable for current SG index to '0'. This leads to calling of function tls_push_record() with 'sg_encrypted_num_elem = 0' and later causes kernel crash. To fix this, set the number of SG elements to the number of elements in plaintext/encrypted SG arrays in case sk_alloc_sg() returns -ENOSPC. Fixes: 3c4d7559159b ("tls: kernel TLS support") Signed-off-by: Vakul Garg Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_sw.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index fb79caf56d0e8..b81aa6d7dc45e 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -170,6 +170,9 @@ static int alloc_encrypted_sg(struct sock *sk, int len) rc = alloc_sg(sk, len, ctx->sg_encrypted_data, &ctx->sg_encrypted_num_elem, &ctx->sg_encrypted_size, 0); + if (rc == -ENOSPC) + ctx->sg_encrypted_num_elem = ARRAY_SIZE(ctx->sg_encrypted_data); + return rc; } @@ -183,6 +186,9 @@ static int alloc_plaintext_sg(struct sock *sk, int len) &ctx->sg_plaintext_num_elem, &ctx->sg_plaintext_size, tls_ctx->pending_open_record_frags); + if (rc == -ENOSPC) + ctx->sg_plaintext_num_elem = ARRAY_SIZE(ctx->sg_plaintext_data); + return rc; } -- GitLab From 456191a8554ac6a16d6f497d1bf64d53ca173a68 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Mon, 10 Sep 2018 22:19:48 +0800 Subject: [PATCH 0342/2269] erspan: fix error handling for erspan tunnel [ Upstream commit 51dc63e3911fbb1f0a7a32da2fe56253e2040ea4 ] When processing icmp unreachable message for erspan tunnel, tunnel id should be erspan_net_id instead of ipgre_net_id. Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN") Cc: William Tu Signed-off-by: Haishuang Yan Acked-by: William Tu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_gre.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2459e9cc22a69..d40ef41a12b2b 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -177,6 +177,8 @@ static void ipgre_err(struct sk_buff *skb, u32 info, if (tpi->proto == htons(ETH_P_TEB)) itn = net_generic(net, gre_tap_net_id); + else if (tpi->proto == htons(ETH_P_ERSPAN)) + itn = net_generic(net, erspan_net_id); else itn = net_generic(net, ipgre_net_id); -- GitLab From 1beb52cea6cbfe24137587087e7818b620324714 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Mon, 10 Sep 2018 22:19:47 +0800 Subject: [PATCH 0343/2269] erspan: return PACKET_REJECT when the appropriate tunnel is not found [ Upstream commit 5a64506b5c2c3cdb29d817723205330378075448 ] If erspan tunnel hasn't been established, we'd better send icmp port unreachable message after receive erspan packets. Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN") Cc: William Tu Signed-off-by: Haishuang Yan Acked-by: William Tu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_gre.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d40ef41a12b2b..dd3bcf22fe8b4 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -322,6 +322,8 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); return PACKET_RCVD; } + return PACKET_REJECT; + drop: kfree_skb(skb); return PACKET_RCVD; -- GitLab From effa7afc5283ecbbdbb4af86eb1be4e710edb136 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Thu, 6 Sep 2018 15:54:59 +0200 Subject: [PATCH 0344/2269] tcp: really ignore MSG_ZEROCOPY if no SO_ZEROCOPY [ Upstream commit 5cf4a8532c992bb22a9ecd5f6d93f873f4eaccc2 ] According to the documentation in msg_zerocopy.rst, the SO_ZEROCOPY flag was introduced because send(2) ignores unknown message flags and any legacy application which was accidentally passing the equivalent of MSG_ZEROCOPY earlier should not see any new behaviour. Before commit f214f915e7db ("tcp: enable MSG_ZEROCOPY"), a send(2) call which passed the equivalent of MSG_ZEROCOPY without setting SO_ZEROCOPY would succeed. However, after that commit, it fails with -ENOBUFS. So it appears that the SO_ZEROCOPY flag fails to fulfill its intended purpose. Fix it. Fixes: f214f915e7db ("tcp: enable MSG_ZEROCOPY") Signed-off-by: Vincent Whitchurch Acked-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 3 --- net/ipv4/tcp.c | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 168a3e8883d4b..9f80b947f53b1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -937,9 +937,6 @@ struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size) WARN_ON_ONCE(!in_task()); - if (!sock_flag(sk, SOCK_ZEROCOPY)) - return NULL; - skb = sock_omalloc(sk, 0, GFP_KERNEL); if (!skb) return NULL; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7462ec7587ced..f9c985460faa3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1177,7 +1177,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) flags = msg->msg_flags; - if (flags & MSG_ZEROCOPY && size) { + if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) { if (sk->sk_state != TCP_ESTABLISHED) { err = -EINVAL; goto out_err; -- GitLab From 4b2a6ecd21859d5097af6ef009582c4a416480c5 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 14 Aug 2018 19:10:50 +0200 Subject: [PATCH 0345/2269] hv/netvsc: Fix NULL dereference at single queue mode fallback commit b19b46346f483ae055fa027cb2d5c2ca91484b91 upstream. The recent commit 916c5e1413be ("hv/netvsc: fix handling of fallback to single queue mode") tried to fix the fallback behavior to a single queue mode, but it changed the function to return zero incorrectly, while the function should return an object pointer. Eventually this leads to a NULL dereference at the callers that expect non-NULL value. Fix it by returning the proper net_device object. Fixes: 916c5e1413be ("hv/netvsc: fix handling of fallback to single queue mode") Signed-off-by: Takashi Iwai Reviewed-by: Stephen Hemminger Signed-off-by: David S. Miller Cc: Alakesh Haloi Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/rndis_filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 328c37e9096d1..17025d46bdac8 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1299,7 +1299,7 @@ out: /* setting up multiple channels failed */ net_device->max_chn = 1; net_device->num_chn = 1; - return 0; + return net_device; err_dev_remv: rndis_filter_device_remove(dev, net_device); -- GitLab From a574b059c0dff9fb6560c5b6fa113314e32b0bcd Mon Sep 17 00:00:00 2001 From: "Erich E. Hoover" Date: Thu, 19 Jul 2018 17:26:24 -0600 Subject: [PATCH 0346/2269] usb: dwc3: change stream event enable bit back to 13 [ Upstream commit 9a7faac3650216112e034b157289bf1a48a99e2d ] Commit ff3f0789b3dc ("usb: dwc3: use BIT() macro where possible") changed DWC3_DEPCFG_STREAM_EVENT_EN from bit 13 to bit 12. Spotted this cleanup typo while looking at diffs between 4.9.35 and 4.14.16 for a separate issue. Fixes: ff3f0789b3dc ("usb: dwc3: use BIT() macro where possible") Signed-off-by: Erich E. Hoover Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.h b/drivers/usb/dwc3/gadget.h index 4a3227543255e..5cc9fd7306560 100644 --- a/drivers/usb/dwc3/gadget.h +++ b/drivers/usb/dwc3/gadget.h @@ -33,7 +33,7 @@ struct dwc3; #define DWC3_DEPCFG_XFER_IN_PROGRESS_EN BIT(9) #define DWC3_DEPCFG_XFER_NOT_READY_EN BIT(10) #define DWC3_DEPCFG_FIFO_ERROR_EN BIT(11) -#define DWC3_DEPCFG_STREAM_EVENT_EN BIT(12) +#define DWC3_DEPCFG_STREAM_EVENT_EN BIT(13) #define DWC3_DEPCFG_BINTERVAL_M1(n) (((n) & 0xff) << 16) #define DWC3_DEPCFG_STREAM_CAPABLE BIT(24) #define DWC3_DEPCFG_EP_NUMBER(n) (((n) & 0x1f) << 25) -- GitLab From ea4b3539ab5dfb449a951a38605b553762d34c89 Mon Sep 17 00:00:00 2001 From: Miao Zhong Date: Mon, 23 Jul 2018 20:56:58 +0800 Subject: [PATCH 0347/2269] iommu/arm-smmu-v3: sync the OVACKFLG to PRIQ consumer register [ Upstream commit 0d535967ac658966c6ade8f82b5799092f7d5441 ] When PRI queue occurs overflow, driver should update the OVACKFLG to the PRIQ consumer register, otherwise subsequent PRI requests will not be processed. Cc: Will Deacon Cc: Robin Murphy Signed-off-by: Miao Zhong Signed-off-by: Will Deacon Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/arm-smmu-v3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 8f7a3c00b6cf3..26e99c03390fa 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1272,6 +1272,7 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev) /* Sync our overflow flag, as we believe we're up to speed */ q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons); + writel(q->cons, q->cons_reg); return IRQ_HANDLED; } -- GitLab From 918cad16b45810da7b0d0fa605377630fa9ebedd Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 19 Jun 2018 13:52:24 +0100 Subject: [PATCH 0348/2269] iommu/io-pgtable-arm-v7s: Abort allocation when table address overflows the PTE [ Upstream commit 29859aeb8a6ea17ba207933a81b6b77b4d4df81a ] When run on a 64-bit system in selftest, the v7s driver may obtain page table with physical addresses larger than 32-bit. Level-2 tables are 1KB and are are allocated with slab, which doesn't accept the GFP_DMA32 flag. Currently map() truncates the address written in the PTE, causing iova_to_phys() or unmap() to access invalid memory. Kasan reports it as a use-after-free. To avoid any nasty surprise, test if the physical address fits in a PTE before returning a new table. 32-bit systems, which are the main users of this page table format, shouldn't see any difference. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/io-pgtable-arm-v7s.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 6961fc393f0b2..29b7a6755fcda 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -192,6 +192,7 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, { struct io_pgtable_cfg *cfg = &data->iop.cfg; struct device *dev = cfg->iommu_dev; + phys_addr_t phys; dma_addr_t dma; size_t size = ARM_V7S_TABLE_SIZE(lvl); void *table = NULL; @@ -200,6 +201,10 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, table = (void *)__get_dma_pages(__GFP_ZERO, get_order(size)); else if (lvl == 2) table = kmem_cache_zalloc(data->l2_tables, gfp | GFP_DMA); + phys = virt_to_phys(table); + if (phys != (arm_v7s_iopte)phys) + /* Doesn't fit in PTE */ + goto out_free; if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { dma = dma_map_single(dev, table, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) @@ -209,7 +214,7 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, * address directly, so if the DMA layer suggests otherwise by * translating or truncating them, that bodes very badly... */ - if (dma != virt_to_phys(table)) + if (dma != phys) goto out_unmap; } kmemleak_ignore(table); -- GitLab From f402334e5d328c9480f32ded3fe3bfeba53ac7fb Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 25 Jul 2018 23:00:48 +0200 Subject: [PATCH 0349/2269] ALSA: msnd: Fix the default sample sizes [ Upstream commit 7c500f9ea139d0c9b80fdea5a9c911db3166ea54 ] The default sample sizes set by msnd driver are bogus; it sets ALSA PCM format, not the actual bit width. Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/isa/msnd/msnd_pinnacle.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/isa/msnd/msnd_pinnacle.c b/sound/isa/msnd/msnd_pinnacle.c index 45e561c425bfb..a3cf837c0ffdf 100644 --- a/sound/isa/msnd/msnd_pinnacle.c +++ b/sound/isa/msnd/msnd_pinnacle.c @@ -82,10 +82,10 @@ static void set_default_audio_parameters(struct snd_msnd *chip) { - chip->play_sample_size = DEFSAMPLESIZE; + chip->play_sample_size = snd_pcm_format_width(DEFSAMPLESIZE); chip->play_sample_rate = DEFSAMPLERATE; chip->play_channels = DEFCHANNELS; - chip->capture_sample_size = DEFSAMPLESIZE; + chip->capture_sample_size = snd_pcm_format_width(DEFSAMPLESIZE); chip->capture_sample_rate = DEFSAMPLERATE; chip->capture_channels = DEFCHANNELS; } -- GitLab From a51e519d5b4b20d63af112301e515160c588e039 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 25 Jul 2018 23:00:46 +0200 Subject: [PATCH 0350/2269] ALSA: usb-audio: Fix multiple definitions in AU0828_DEVICE() macro [ Upstream commit bd1cd0eb2ce9141100628d476ead4de485501b29 ] AU0828_DEVICE() macro in quirks-table.h uses USB_DEVICE_VENDOR_SPEC() for expanding idVendor and idProduct fields. However, the latter macro adds also match_flags and bInterfaceClass, which are different from the values AU0828_DEVICE() macro sets after that. For fixing them, just expand idVendor and idProduct fields manually in AU0828_DEVICE(). This fixes sparse warnings like: sound/usb/quirks-table.h:2892:1: warning: Initializer entry defined twice Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks-table.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 69bf5cf1e91ef..15cbe25657037 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -2875,7 +2875,8 @@ YAMAHA_DEVICE(0x7010, "UB99"), */ #define AU0828_DEVICE(vid, pid, vname, pname) { \ - USB_DEVICE_VENDOR_SPEC(vid, pid), \ + .idVendor = vid, \ + .idProduct = pid, \ .match_flags = USB_DEVICE_ID_MATCH_DEVICE | \ USB_DEVICE_ID_MATCH_INT_CLASS | \ USB_DEVICE_ID_MATCH_INT_SUBCLASS, \ -- GitLab From 318f224d1214178870fd096304607cc8c8aadd1b Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 25 Jul 2018 16:54:33 +0800 Subject: [PATCH 0351/2269] xfrm: fix 'passing zero to ERR_PTR()' warning [ Upstream commit 934ffce1343f22ed5e2d0bd6da4440f4848074de ] Fix a static code checker warning: net/xfrm/xfrm_policy.c:1836 xfrm_resolve_and_create_bundle() warn: passing zero to 'ERR_PTR' xfrm_tmpl_resolve return 0 just means no xdst found, return NULL instead of passing zero to ERR_PTR. Fixes: d809ec895505 ("xfrm: do not assume that template resolving always returns xfrms") Signed-off-by: YueHaibing Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_policy.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index a6c0027cadb5b..2fb7a78308e1a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1831,7 +1831,10 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, /* Try to instantiate a bundle */ err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); if (err <= 0) { - if (err != 0 && err != -EAGAIN) + if (err == 0) + return NULL; + + if (err != -EAGAIN) XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); return ERR_PTR(err); } -- GitLab From 36eb78a6cec7ac9902a84bdb9e11209f415f75ca Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 26 Jul 2018 09:51:27 +0800 Subject: [PATCH 0352/2269] amd-xgbe: use dma_mapping_error to check map errors [ Upstream commit b24dbfe9ce03d9f83306616f22fb0e04e8960abe ] The dma_mapping_error() returns true or false, but we want to return -ENOMEM if there was an error. Fixes: 174fd2597b0b ("amd-xgbe: Implement split header receive support") Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/amd/xgbe/xgbe-desc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c index 45d92304068eb..a5eaf174d914c 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c @@ -289,7 +289,7 @@ static int xgbe_alloc_pages(struct xgbe_prv_data *pdata, struct page *pages = NULL; dma_addr_t pages_dma; gfp_t gfp; - int order, ret; + int order; again: order = alloc_order; @@ -316,10 +316,9 @@ again: /* Map the pages */ pages_dma = dma_map_page(pdata->dev, pages, 0, PAGE_SIZE << order, DMA_FROM_DEVICE); - ret = dma_mapping_error(pdata->dev, pages_dma); - if (ret) { + if (dma_mapping_error(pdata->dev, pages_dma)) { put_page(pages); - return ret; + return -ENOMEM; } pa->pages = pages; -- GitLab From 0fe570942c0d50abb19f3f533ff964ea0cb94ace Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 25 Jul 2018 18:45:08 +0100 Subject: [PATCH 0353/2269] gfs2: Special-case rindex for gfs2_grow [ Upstream commit 776125785a87ff05d49938bd5b9f336f2a05bff6 ] To speed up the common case of appending to a file, gfs2_write_alloc_required presumes that writing beyond the end of a file will always require additional blocks to be allocated. This assumption is incorrect for preallocates files, but there are no negative consequences as long as *some* space is still left on the filesystem. One special file that always has some space preallocated beyond the end of the file is the rindex: when growing a filesystem, gfs2_grow adds one or more new resource groups and appends records describing those resource groups to the rindex; the preallocated space ensures that this is always possible. However, when a filesystem is completely full, gfs2_write_alloc_required will indicate that an additional allocation is required, and appending the next record to the rindex will fail even though space for that record has already been preallocated. To fix that, skip the incorrect optimization in gfs2_write_alloc_required, but for the rindex only. Other writes to preallocated space beyond the end of the file are still allowed to fail on completely full filesystems. Signed-off-by: Andreas Gruenbacher Reviewed-by: Bob Peterson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/bmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 3dd0cceefa435..bc8787718febb 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1680,7 +1680,7 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift; lblock = offset >> shift; lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; - if (lblock_stop > end_of_file) + if (lblock_stop > end_of_file && ip != GFS2_I(sdp->sd_rindex)) return 1; size = (lblock_stop - lblock) << shift; -- GitLab From 45c800f555b08ce80d3f178907fde3ea8658c0ab Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Fri, 13 Jul 2018 13:13:20 +0200 Subject: [PATCH 0354/2269] clk: imx6ul: fix missing of_node_put() [ Upstream commit 11177e7a7aaef95935592072985526ebf0a3df43 ] of_find_compatible_node() is returning a device node with refcount incremented and must be explicitly decremented after the last use which is right after the us in of_iomap() here. Signed-off-by: Nicholas Mc Guire Fixes: 787b4271a6a0 ("clk: imx: add imx6ul clk tree support") Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/clk/imx/clk-imx6ul.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/imx/clk-imx6ul.c b/drivers/clk/imx/clk-imx6ul.c index 41c08fc892b97..5cc5ff1b4e1f5 100644 --- a/drivers/clk/imx/clk-imx6ul.c +++ b/drivers/clk/imx/clk-imx6ul.c @@ -135,6 +135,7 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node) np = of_find_compatible_node(NULL, NULL, "fsl,imx6ul-anatop"); base = of_iomap(np, 0); + of_node_put(np); WARN_ON(!base); clks[IMX6UL_PLL1_BYPASS_SRC] = imx_clk_mux("pll1_bypass_src", base + 0x00, 14, 1, pll_bypass_src_sels, ARRAY_SIZE(pll_bypass_src_sels)); -- GitLab From d8e7792fae4f54cab45f9877ac3c19966e0f2ce3 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Wed, 11 Jul 2018 11:21:04 +0300 Subject: [PATCH 0355/2269] clk: core: Potentially free connection id [ Upstream commit 365f7a89c881e84f1ebc925f65f899d5d7ce547e ] Patch "clk: core: Copy connection id" made it so that the connector id 'con_id' is kstrdup_const()ed to cater to drivers that pass non-constant connection ids. The patch added the corresponding kfree_const to __clk_free_clk(), but struct clk's can be freed also via __clk_put(). Add the kfree_const call to __clk_put() and add comments to both functions to remind that the logic in them should be kept in sync. Fixes: 253160a8ad06 ("clk: core: Copy connection id") Signed-off-by: Mikko Perttunen Reviewed-by: Leonard Crestez Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/clk/clk.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 6f4c98ca6e508..a3f52f6782115 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -2557,6 +2557,7 @@ struct clk *__clk_create_clk(struct clk_hw *hw, const char *dev_id, return clk; } +/* keep in sync with __clk_put */ void __clk_free_clk(struct clk *clk) { clk_prepare_lock(); @@ -2922,6 +2923,7 @@ int __clk_get(struct clk *clk) return 1; } +/* keep in sync with __clk_free_clk */ void __clk_put(struct clk *clk) { struct module *owner; @@ -2943,6 +2945,7 @@ void __clk_put(struct clk *clk) module_put(owner); + kfree_const(clk->con_id); kfree(clk); } -- GitLab From 62e442fdbcb25ef7adbcad5145137806df327a3f Mon Sep 17 00:00:00 2001 From: Rajan Vaja Date: Tue, 17 Jul 2018 06:17:00 -0700 Subject: [PATCH 0356/2269] clk: clk-fixed-factor: Clear OF_POPULATED flag in case of failure [ Upstream commit f6dab4233d6b64d719109040503b567f71fbfa01 ] Fixed factor clock has two initializations at of_clk_init() time and during platform driver probe. Before of_clk_init() call, node is marked as populated and so its probe never gets called. During of_clk_init() fixed factor clock registration may fail if any of its parent clock is not registered. In this case, it doesn't get chance to retry registration from probe. Clear OF_POPULATED flag if fixed factor clock registration fails so that clock registration is attempted again from probe. Signed-off-by: Rajan Vaja Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/clk/clk-fixed-factor.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/clk/clk-fixed-factor.c b/drivers/clk/clk-fixed-factor.c index a5d402de55841..20724abd38bd1 100644 --- a/drivers/clk/clk-fixed-factor.c +++ b/drivers/clk/clk-fixed-factor.c @@ -177,8 +177,15 @@ static struct clk *_of_fixed_factor_clk_setup(struct device_node *node) clk = clk_register_fixed_factor(NULL, clk_name, parent_name, flags, mult, div); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { + /* + * If parent clock is not registered, registration would fail. + * Clear OF_POPULATED flag so that clock registration can be + * attempted again from probe function. + */ + of_node_clear_flag(node, OF_POPULATED); return clk; + } ret = of_clk_add_provider(node, of_clk_src_simple_get, clk); if (ret) { -- GitLab From ee83ce188e0b11a7cac40b2625cd2f9889eaa025 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 20 Jul 2018 16:46:33 +0900 Subject: [PATCH 0357/2269] kbuild: add .DELETE_ON_ERROR special target MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9c2af1c7377a8a6ef86e5cabf80978f3dbbb25c0 ] If Make gets a fatal signal while a shell is executing, it may delete the target file that the recipe was supposed to update. This is needed to make sure that it is remade from scratch when Make is next run; if Make is interrupted after the recipe has begun to write the target file, it results in an incomplete file whose time stamp is newer than that of the prerequisites files. Make automatically deletes the incomplete file on interrupt unless the target is marked .PRECIOUS. The situation is just the same as when the shell fails for some reasons. Usually when a recipe line fails, if it has changed the target file at all, the file is corrupted, or at least it is not completely updated. Yet the file’s time stamp says that it is now up to date, so the next time Make runs, it will not try to update that file. However, Make does not cater to delete the incomplete target file in this case. We need to add .DELETE_ON_ERROR somewhere in the Makefile to request it. scripts/Kbuild.include seems a suitable place to add it because it is included from almost all sub-makes. Please note .DELETE_ON_ERROR is not effective for phony targets. The external module building should never ever touch the kernel tree. The following recipe fails if include/generated/autoconf.h is missing. However, include/config/auto.conf is not deleted since it is a phony target. PHONY += include/config/auto.conf include/config/auto.conf: $(Q)test -e include/generated/autoconf.h -a -e $@ || ( \ echo >&2; \ echo >&2 " ERROR: Kernel configuration is invalid."; \ echo >&2 " include/generated/autoconf.h or $@ are missing.";\ echo >&2 " Run 'make oldconfig && make prepare' on kernel src to fix it."; \ echo >&2 ; \ /bin/false) Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- scripts/Kbuild.include | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index fcbbecf923957..a0ad87e869f97 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -403,3 +403,6 @@ endif endef # ############################################################################### + +# delete partially updated (i.e. corrupted) files on error +.DELETE_ON_ERROR: -- GitLab From 58119f9bd94df9f52304d95296245daf3373e382 Mon Sep 17 00:00:00 2001 From: Krzysztof Ha?asa Date: Thu, 28 Jun 2018 17:45:07 -0400 Subject: [PATCH 0358/2269] media: tw686x: Fix oops on buffer alloc failure [ Upstream commit 5a1a2f63d840dc2631505b607e11ff65ac1b7d3c ] The error path currently calls tw686x_video_free() which requires vc->dev to be initialized, causing a NULL dereference on uninitizalized channels. Fix this by setting the vc->dev fields for all the channels first. Fixes: f8afaa8dbc0d ("[media] tw686x: Introduce an interface to support multiple DMA modes") Signed-off-by: Krzysztof Ha?asa Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/pci/tw686x/tw686x-video.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/media/pci/tw686x/tw686x-video.c b/drivers/media/pci/tw686x/tw686x-video.c index 0ea8dd44026c3..3a06c000f97bb 100644 --- a/drivers/media/pci/tw686x/tw686x-video.c +++ b/drivers/media/pci/tw686x/tw686x-video.c @@ -1190,6 +1190,14 @@ int tw686x_video_init(struct tw686x_dev *dev) return err; } + /* Initialize vc->dev and vc->ch for the error path */ + for (ch = 0; ch < max_channels(dev); ch++) { + struct tw686x_video_channel *vc = &dev->video_channels[ch]; + + vc->dev = dev; + vc->ch = ch; + } + for (ch = 0; ch < max_channels(dev); ch++) { struct tw686x_video_channel *vc = &dev->video_channels[ch]; struct video_device *vdev; @@ -1198,9 +1206,6 @@ int tw686x_video_init(struct tw686x_dev *dev) spin_lock_init(&vc->qlock); INIT_LIST_HEAD(&vc->vidq_queued); - vc->dev = dev; - vc->ch = ch; - /* default settings */ err = tw686x_set_standard(vc, V4L2_STD_NTSC); if (err) -- GitLab From e1cfd4533ffb2d882ef7a49e6b8d9ef033a95cd1 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Tue, 17 Jul 2018 11:48:16 +0100 Subject: [PATCH 0359/2269] dmaengine: pl330: fix irq race with terminate_all [ Upstream commit e49756544a21f5625b379b3871d27d8500764670 ] In pl330_update() when checking if a channel has been aborted, the channel's lock is not taken, only the overall pl330_dmac lock. But in pl330_terminate_all() the aborted flag (req_running==-1) is set under the channel lock and not the pl330_dmac lock. With threaded interrupts, this leads to a potential race: pl330_terminate_all pl330_update ------------------- ------------ lock channel entry lock pl330 _stop channel unlock pl330 lock pl330 check req_running != -1 req_running = -1 _start channel Signed-off-by: John Keeping Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/dma/pl330.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index d19862f4dc9a2..6afd42cfbf5d5 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2142,13 +2142,14 @@ static int pl330_terminate_all(struct dma_chan *chan) pm_runtime_get_sync(pl330->ddma.dev); spin_lock_irqsave(&pch->lock, flags); + spin_lock(&pl330->lock); _stop(pch->thread); - spin_unlock(&pl330->lock); - pch->thread->req[0].desc = NULL; pch->thread->req[1].desc = NULL; pch->thread->req_running = -1; + spin_unlock(&pl330->lock); + power_down = pch->active; pch->active = false; -- GitLab From 9b4328303638947bd06604ed5dc906bf4c279cb8 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 20 Jul 2018 13:58:22 +0200 Subject: [PATCH 0360/2269] MIPS: ath79: fix system restart [ Upstream commit f8a7bfe1cb2c1ebfa07775c9c8ac0ad3ba8e5ff5 ] This patch disables irq on reboot to fix hang issues that were observed due to pending interrupts. Signed-off-by: Felix Fietkau Signed-off-by: John Crispin Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/19913/ Cc: James Hogan Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/mips/ath79/setup.c | 1 + arch/mips/include/asm/mach-ath79/ath79.h | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index f206dafbb0a35..26a058d58d37b 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -40,6 +40,7 @@ static char ath79_sys_type[ATH79_SYS_TYPE_LEN]; static void ath79_restart(char *command) { + local_irq_disable(); ath79_device_reset_set(AR71XX_RESET_FULL_CHIP); for (;;) if (cpu_wait) diff --git a/arch/mips/include/asm/mach-ath79/ath79.h b/arch/mips/include/asm/mach-ath79/ath79.h index 441faa92c3cd4..6e6c0fead776d 100644 --- a/arch/mips/include/asm/mach-ath79/ath79.h +++ b/arch/mips/include/asm/mach-ath79/ath79.h @@ -134,6 +134,7 @@ static inline u32 ath79_pll_rr(unsigned reg) static inline void ath79_reset_wr(unsigned reg, u32 val) { __raw_writel(val, ath79_reset_base + reg); + (void) __raw_readl(ath79_reset_base + reg); /* flush */ } static inline u32 ath79_reset_rr(unsigned reg) -- GitLab From 5b253f742006e6edfea85df31c69bf3e29fbf536 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 5 Jul 2018 04:25:19 -0400 Subject: [PATCH 0361/2269] media: videobuf2-core: check for q->error in vb2_core_qbuf() [ Upstream commit b509d733d337417bcb7fa4a35be3b9a49332b724 ] The vb2_core_qbuf() function didn't check if q->error was set. It is checked in __buf_prepare(), but that function isn't called if the buffer was already prepared before with VIDIOC_PREPARE_BUF. So check it at the start of vb2_core_qbuf() as well. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/videobuf2-core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c index 2dbf632c10de3..43522a09b11d5 100644 --- a/drivers/media/v4l2-core/videobuf2-core.c +++ b/drivers/media/v4l2-core/videobuf2-core.c @@ -1373,6 +1373,11 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb) struct vb2_buffer *vb; int ret; + if (q->error) { + dprintk(1, "fatal error occurred on queue\n"); + return -EIO; + } + vb = q->bufs[index]; switch (vb->state) { -- GitLab From 394df591433deffef83b794bf95bd7ab12d56497 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Fri, 13 Jul 2018 03:10:20 -0400 Subject: [PATCH 0362/2269] IB/rxe: Drop QP0 silently [ Upstream commit 536ca245c512aedfd84cde072d7b3ca14b6e1792 ] According to "Annex A16: RDMA over Converged Ethernet (RoCE)": A16.4.3 MANAGEMENT INTERFACES As defined in the base specification, a special Queue Pair, QP0 is defined solely for communication between subnet manager(s) and subnet management agents. Since such an IB-defined subnet management architecture is outside the scope of this annex, it follows that there is also no requirement that a port which conforms to this annex be associated with a QP0. Thus, for end nodes designed to conform to this annex, the concept of QP0 is undefined and unused for any port connected to an Ethernet network. CA16-8: A packet arriving at a RoCE port containing a BTH with the destination QP field set to QP0 shall be silently dropped. Signed-off-by: Zhu Yanjun Acked-by: Moni Shoua Reviewed-by: Yuval Shaia Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/rxe/rxe_recv.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index fb8c83e055e14..83412df726a51 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -225,9 +225,14 @@ static int hdr_check(struct rxe_pkt_info *pkt) goto err1; } + if (unlikely(qpn == 0)) { + pr_warn_once("QP 0 not supported"); + goto err1; + } + if (qpn != IB_MULTICAST_QPN) { - index = (qpn == 0) ? port->qp_smi_index : - ((qpn == 1) ? port->qp_gsi_index : qpn); + index = (qpn == 1) ? port->qp_gsi_index : qpn; + qp = rxe_pool_get_index(&rxe->qp_pool, index); if (unlikely(!qp)) { pr_warn_ratelimited("no qp matches qpn 0x%x\n", qpn); -- GitLab From 030f2ad6ce16f3c49e4d9c65a4cd36f477f8e7b9 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 20 Jul 2018 14:57:38 -0400 Subject: [PATCH 0363/2269] block: allow max_discard_segments to be stacked [ Upstream commit 42c9cdfe1e11e083dceb0f0c4977b758cf7403b9 ] Set max_discard_segments to USHRT_MAX in blk_set_stacking_limits() so that blk_stack_limits() can stack up this limit for stacked devices. before: $ cat /sys/block/nvme0n1/queue/max_discard_segments 256 $ cat /sys/block/dm-0/queue/max_discard_segments 1 after: $ cat /sys/block/nvme0n1/queue/max_discard_segments 256 $ cat /sys/block/dm-0/queue/max_discard_segments 256 Fixes: 1e739730c5b9e ("block: optionally merge discontiguous discard bios into a single request") Reviewed-by: Christoph Hellwig Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- block/blk-settings.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 8559e9563c525..474b0b95fcd16 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -128,7 +128,7 @@ void blk_set_stacking_limits(struct queue_limits *lim) /* Inherit limits from component devices */ lim->max_segments = USHRT_MAX; - lim->max_discard_segments = 1; + lim->max_discard_segments = USHRT_MAX; lim->max_hw_sectors = UINT_MAX; lim->max_segment_size = UINT_MAX; lim->max_sectors = UINT_MAX; -- GitLab From 745cb5eb3cef672b8a5acf0d63d8c9c2a1a3bf19 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 11 Jul 2018 13:15:42 +0000 Subject: [PATCH 0364/2269] IB/ipoib: Fix error return code in ipoib_dev_init() [ Upstream commit 99a7e2bf704d64c966dfacede1ba2d9b47cb676e ] Fix to return a negative error code from the ipoib_neigh_hash_init() error handling case instead of 0, as done elsewhere in this function. Fixes: 515ed4f3aab4 ("IB/IPoIB: Separate control and data related initializations") Signed-off-by: Wei Yongjun Reviewed-by: Yuval Shaia Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 6bc9a768f7219..e6ff16b27acdb 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1752,7 +1752,8 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) goto out_free_pd; } - if (ipoib_neigh_hash_init(priv) < 0) { + ret = ipoib_neigh_hash_init(priv); + if (ret) { pr_warn("%s failed to init neigh hash\n", dev->name); goto out_dev_uninit; } -- GitLab From 28b6561183ed02dcb3a5f2d261c4a5b46976fa5c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 24 Jul 2018 11:29:01 -0700 Subject: [PATCH 0365/2269] mtd/maps: fix solutionengine.c printk format warnings [ Upstream commit 1d25e3eeed1d987404e2d2e451eebac8c15cecc1 ] Fix 2 printk format warnings (this driver is currently only used by arch/sh/) by using "%pap" instead of "%lx". Fixes these build warnings: ../drivers/mtd/maps/solutionengine.c: In function 'init_soleng_maps': ../include/linux/kern_levels.h:5:18: warning: format '%lx' expects argument of type 'long unsigned int', but argument 2 has type 'resource_size_t' {aka 'unsigned int'} [-Wformat=] ../drivers/mtd/maps/solutionengine.c:62:54: note: format string is defined here printk(KERN_NOTICE "Solution Engine: Flash at 0x%08lx, EPROM at 0x%08lx\n", ~~~~^ %08x ../include/linux/kern_levels.h:5:18: warning: format '%lx' expects argument of type 'long unsigned int', but argument 3 has type 'resource_size_t' {aka 'unsigned int'} [-Wformat=] ../drivers/mtd/maps/solutionengine.c:62:72: note: format string is defined here printk(KERN_NOTICE "Solution Engine: Flash at 0x%08lx, EPROM at 0x%08lx\n", ~~~~^ %08x Cc: David Woodhouse Cc: Brian Norris Cc: Boris Brezillon Cc: Marek Vasut Cc: Richard Weinberger Cc: linux-mtd@lists.infradead.org Cc: Yoshinori Sato Cc: Rich Felker Cc: linux-sh@vger.kernel.org Cc: Sergei Shtylyov Signed-off-by: Randy Dunlap Signed-off-by: Boris Brezillon Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/maps/solutionengine.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/maps/solutionengine.c b/drivers/mtd/maps/solutionengine.c index bb580bc164459..c07f21b204632 100644 --- a/drivers/mtd/maps/solutionengine.c +++ b/drivers/mtd/maps/solutionengine.c @@ -59,9 +59,9 @@ static int __init init_soleng_maps(void) return -ENXIO; } } - printk(KERN_NOTICE "Solution Engine: Flash at 0x%08lx, EPROM at 0x%08lx\n", - soleng_flash_map.phys & 0x1fffffff, - soleng_eprom_map.phys & 0x1fffffff); + printk(KERN_NOTICE "Solution Engine: Flash at 0x%pap, EPROM at 0x%pap\n", + &soleng_flash_map.phys, + &soleng_eprom_map.phys); flash_mtd->owner = THIS_MODULE; eprom_mtd = do_map_probe("map_rom", &soleng_eprom_map); -- GitLab From f86f6ebc1bf5ab2394a2866ea66ebed6ae3ba978 Mon Sep 17 00:00:00 2001 From: Todor Tomov Date: Mon, 18 Jun 2018 04:06:58 -0400 Subject: [PATCH 0366/2269] media: ov5645: Supported external clock is 24MHz [ Upstream commit 4adb0a0432f489c5eb802b33dae7737f69e6fd7a ] The external clock frequency was set to 23.88MHz by mistake because of a platform which cannot get closer to 24MHz. The supported by the driver external clock is 24MHz so set it correctly and also fix the values of the pixel clock and link clock. However allow 1% tolerance to the external clock as this difference is small enough to be insignificant. Signed-off-by: Todor Tomov Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/ov5645.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/media/i2c/ov5645.c b/drivers/media/i2c/ov5645.c index a31fe18c71d6d..2d96c18497593 100644 --- a/drivers/media/i2c/ov5645.c +++ b/drivers/media/i2c/ov5645.c @@ -510,8 +510,8 @@ static const struct reg_value ov5645_setting_full[] = { }; static const s64 link_freq[] = { - 222880000, - 334320000 + 224000000, + 336000000 }; static const struct ov5645_mode_info ov5645_mode_info_data[] = { @@ -520,7 +520,7 @@ static const struct ov5645_mode_info ov5645_mode_info_data[] = { .height = 960, .data = ov5645_setting_sxga, .data_size = ARRAY_SIZE(ov5645_setting_sxga), - .pixel_clock = 111440000, + .pixel_clock = 112000000, .link_freq = 0 /* an index in link_freq[] */ }, { @@ -528,7 +528,7 @@ static const struct ov5645_mode_info ov5645_mode_info_data[] = { .height = 1080, .data = ov5645_setting_1080p, .data_size = ARRAY_SIZE(ov5645_setting_1080p), - .pixel_clock = 167160000, + .pixel_clock = 168000000, .link_freq = 1 /* an index in link_freq[] */ }, { @@ -536,7 +536,7 @@ static const struct ov5645_mode_info ov5645_mode_info_data[] = { .height = 1944, .data = ov5645_setting_full, .data_size = ARRAY_SIZE(ov5645_setting_full), - .pixel_clock = 167160000, + .pixel_clock = 168000000, .link_freq = 1 /* an index in link_freq[] */ }, }; @@ -1157,7 +1157,8 @@ static int ov5645_probe(struct i2c_client *client, return ret; } - if (xclk_freq != 23880000) { + /* external clock must be 24MHz, allow 1% tolerance */ + if (xclk_freq < 23760000 || xclk_freq > 24240000) { dev_err(dev, "external clock frequency %u is not supported\n", xclk_freq); return -EINVAL; -- GitLab From b435dd667b9a225513165d78b7ec142635a060be Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 24 Jul 2018 15:48:58 +0200 Subject: [PATCH 0367/2269] perf test: Fix subtest number when showing results [ Upstream commit 9ef0112442bdddef5fb55adf20b3a5464b33de75 ] Perf test 40 for example has several subtests numbered 1-4 when displaying the start of the subtest. When the subtest results are displayed the subtests are numbered 0-3. Use this command to generate trace output: [root@s35lp76 perf]# ./perf test -Fv 40 2>/tmp/bpf1 Fix this by adjusting the subtest number when show the subtest result. Output before: [root@s35lp76 perf]# egrep '(^40\.[0-4]| subtest [0-4]:)' /tmp/bpf1 40.1: Basic BPF filtering : BPF filter subtest 0: Ok 40.2: BPF pinning : BPF filter subtest 1: Ok 40.3: BPF prologue generation : BPF filter subtest 2: Ok 40.4: BPF relocation checker : BPF filter subtest 3: Ok [root@s35lp76 perf]# Output after: root@s35lp76 ~]# egrep '(^40\.[0-4]| subtest [0-4]:)' /tmp/bpf1 40.1: Basic BPF filtering : BPF filter subtest 1: Ok 40.2: BPF pinning : BPF filter subtest 2: Ok 40.3: BPF prologue generation : BPF filter subtest 3: Ok 40.4: BPF relocation checker : BPF filter subtest 4: Ok [root@s35lp76 ~]# Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Cc: Heiko Carstens Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20180724134858.100644-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/tests/builtin-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 53d06f37406a2..5966f1f9b160e 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -589,7 +589,7 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) for (subi = 0; subi < subn; subi++) { pr_info("%2d.%1d: %-*s:", i, subi + 1, subw, t->subtest.get_desc(subi)); - err = test_and_print(t, skip, subi); + err = test_and_print(t, skip, subi + 1); if (err != TEST_OK && t->subtest.skip_if_fail) skip = true; } -- GitLab From d074912d2edeae8eb08d3f85a3f0dab0b3606089 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Mon, 18 Jun 2018 13:24:13 -0500 Subject: [PATCH 0368/2269] gfs2: Don't reject a supposedly full bitmap if we have blocks reserved [ Upstream commit e79e0e1428188b24c3b57309ffa54a33c4ae40c4 ] Before this patch, you could get into situations like this: 1. Process 1 searches for X free blocks, finds them, makes a reservation 2. Process 2 searches for free blocks in the same rgrp, but now the bitmap is full because process 1's reservation is skipped over. So it marks the bitmap as GBF_FULL. 3. Process 1 tries to allocate blocks from its own reservation, but since the GBF_FULL bit is set, it skips over the rgrp and searches elsewhere, thus not using its own reservation. This patch adds an additional check to allow processes to use their own reservations. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/rgrp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 95b2a57ded330..5fe033131f036 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1665,7 +1665,8 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, while(1) { bi = rbm_bi(rbm); - if (test_bit(GBF_FULL, &bi->bi_flags) && + if ((ip == NULL || !gfs2_rs_active(&ip->i_res)) && + test_bit(GBF_FULL, &bi->bi_flags) && (state == GFS2_BLKST_FREE)) goto next_bitmap; -- GitLab From d38d272592737ea88a20bd93bed0f1cf88791c07 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Jul 2018 15:52:02 +0200 Subject: [PATCH 0369/2269] perf tools: Synthesize GROUP_DESC feature in pipe mode [ Upstream commit e8fedff1cc729fd227924305152ccc6f580e8c83 ] Stephan reported, that pipe mode does not carry the group information and thus the piped report won't display the grouped output for following command: # perf record -e '{cycles,instructions,branches}' -a sleep 4 | perf report It has no idea about the group setup, so it will display events separately: # Overhead Command Shared Object ... # ........ ............... ....................... # 6.71% swapper [kernel.kallsyms] 2.28% offlineimap libpython2.7.so.1.0 0.78% perf [kernel.kallsyms] ... Fix GROUP_DESC feature record to be synthesized in pipe mode, so the report output is grouped if there are groups defined in record: # Overhead Command Shared ... # ........................ ............... ....... # 7.57% 0.16% 0.30% swapper [kernel 1.87% 3.15% 2.46% offlineimap libpyth 1.33% 0.00% 0.00% perf [kernel ... Reported-by: Stephane Eranian Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: Stephane Eranian Cc: Alexander Shishkin Cc: David Ahern Cc: David Carrillo-Cisneros Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180712135202.14774-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/header.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 8a678a3d5a2a0..1ceb332575bd1 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2209,7 +2209,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPR(NUMA_TOPOLOGY, numa_topology, true), FEAT_OPN(BRANCH_STACK, branch_stack, false), FEAT_OPR(PMU_MAPPINGS, pmu_mappings, false), - FEAT_OPN(GROUP_DESC, group_desc, false), + FEAT_OPR(GROUP_DESC, group_desc, false), FEAT_OPN(AUXTRACE, auxtrace, false), FEAT_OPN(STAT, stat, false), FEAT_OPN(CACHE, cache, true), -- GitLab From 3cfa558660f82586adc675a1d05796ce324a0a1d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 24 Jul 2018 19:11:28 +0200 Subject: [PATCH 0370/2269] fbdev: omapfb: off by one in omapfb_register_client() [ Upstream commit 5ec1ec35b2979b59d0b33381e7c9aac17e159d16 ] The omapfb_register_client[] array has OMAPFB_PLANE_NUM elements so the > should be >= or we are one element beyond the end of the array. Fixes: 8b08cf2b64f5 ("OMAP: add TI OMAP framebuffer driver") Signed-off-by: Dan Carpenter Cc: Imre Deak Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/omap/omapfb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c index 3479a47a30820..0eee8a29d28d0 100644 --- a/drivers/video/fbdev/omap/omapfb_main.c +++ b/drivers/video/fbdev/omap/omapfb_main.c @@ -958,7 +958,7 @@ int omapfb_register_client(struct omapfb_notifier_block *omapfb_nb, { int r; - if ((unsigned)omapfb_nb->plane_idx > OMAPFB_PLANE_NUM) + if ((unsigned)omapfb_nb->plane_idx >= OMAPFB_PLANE_NUM) return -EINVAL; if (!notifier_inited) { -- GitLab From b0c7f4ddbf18cbced70ae39c4bc01722de2b637c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 20 Jul 2018 12:17:40 +0200 Subject: [PATCH 0371/2269] perf tools: Fix struct comm_str removal crash [ Upstream commit 46b3722cc7765582354488da633aafffcb138458 ] We occasionaly hit following assert failure in 'perf top', when processing the /proc info in multiple threads. perf: ...include/linux/refcount.h:109: refcount_inc: Assertion `!(!refcount_inc_not_zero(r))' failed. The gdb backtrace looks like this: [Switching to Thread 0x7ffff11ba700 (LWP 13749)] 0x00007ffff50839fb in raise () from /lib64/libc.so.6 (gdb) #0 0x00007ffff50839fb in raise () from /lib64/libc.so.6 #1 0x00007ffff5085800 in abort () from /lib64/libc.so.6 #2 0x00007ffff507c0da in __assert_fail_base () from /lib64/libc.so.6 #3 0x00007ffff507c152 in __assert_fail () from /lib64/libc.so.6 #4 0x0000000000535373 in refcount_inc (r=0x7fffdc009be0) at ...include/linux/refcount.h:109 #5 0x00000000005354f1 in comm_str__get (cs=0x7fffdc009bc0) at util/comm.c:24 #6 0x00000000005356bd in __comm_str__findnew (str=0x7fffd000b260 ":2", root=0xbed5c0 ) at util/comm.c:72 #7 0x000000000053579e in comm_str__findnew (str=0x7fffd000b260 ":2", root=0xbed5c0 ) at util/comm.c:95 #8 0x000000000053582e in comm__new (str=0x7fffd000b260 ":2", timestamp=0, exec=false) at util/comm.c:111 #9 0x00000000005363bc in thread__new (pid=2, tid=2) at util/thread.c:57 #10 0x0000000000523da0 in ____machine__findnew_thread (machine=0xbfde38, threads=0xbfdf28, pid=2, tid=2, create=true) at util/machine.c:457 #11 0x0000000000523eb4 in __machine__findnew_thread (machine=0xbfde38, ... The failing assertion is this one: REFCOUNT_WARN(!refcount_inc_not_zero(r), ... The problem is that we keep global comm_str_root list, which is accessed by multiple threads during the 'perf top' startup and following 2 paths can race: thread 1: ... thread__new comm__new comm_str__findnew down_write(&comm_str_lock); __comm_str__findnew comm_str__get thread 2: ... comm__override or comm__free comm_str__put refcount_dec_and_test down_write(&comm_str_lock); rb_erase(&cs->rb_node, &comm_str_root); Because thread 2 first decrements the refcnt and only after then it removes the struct comm_str from the list, the thread 1 can find this object on the list with refcnt equls to 0 and hit the assert. This patch fixes the thread 1 __comm_str__findnew path, by ignoring objects that already dropped the refcnt to 0. For the rest of the objects we take the refcnt before comparing its name and release it afterwards with comm_str__put, which can also release the object completely. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Kan Liang Cc: Lukasz Odzioba Cc: Peter Zijlstra Cc: Wang Nan Cc: kernel-team@lge.com Link: http://lkml.kernel.org/r/20180720101740.GA27176@krava Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/comm.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c index 8808570f8e9c2..327e01cb1aa2f 100644 --- a/tools/perf/util/comm.c +++ b/tools/perf/util/comm.c @@ -18,9 +18,10 @@ static struct rb_root comm_str_root; static struct comm_str *comm_str__get(struct comm_str *cs) { - if (cs) - refcount_inc(&cs->refcnt); - return cs; + if (cs && refcount_inc_not_zero(&cs->refcnt)) + return cs; + + return NULL; } static void comm_str__put(struct comm_str *cs) @@ -62,9 +63,14 @@ static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root) parent = *p; iter = rb_entry(parent, struct comm_str, rb_node); + /* + * If we race with comm_str__put, iter->refcnt is 0 + * and it will be removed within comm_str__put call + * shortly, ignore it in this search. + */ cmp = strcmp(str, iter->str); - if (!cmp) - return comm_str__get(iter); + if (!cmp && comm_str__get(iter)) + return iter; if (cmp < 0) p = &(*p)->rb_left; -- GitLab From 6a736057f1bb0f2401da13b117daa4ecdab78032 Mon Sep 17 00:00:00 2001 From: Anton Vasilyev Date: Tue, 24 Jul 2018 19:11:27 +0200 Subject: [PATCH 0372/2269] video: goldfishfb: fix memory leak on driver remove [ Upstream commit 5958fde72d04e7b8c6de3669d1f794a90997e3eb ] goldfish_fb_probe() allocates memory for fb, but goldfish_fb_remove() does not have deallocation of fb, which leads to memory leak on probe/remove. The patch adds deallocation into goldfish_fb_remove(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Anton Vasilyev Cc: Aleksandar Markovic Cc: Miodrag Dinic Cc: Goran Ferenc Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/goldfishfb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/video/fbdev/goldfishfb.c b/drivers/video/fbdev/goldfishfb.c index 7f6c9e6cfc6c9..14a93cb213100 100644 --- a/drivers/video/fbdev/goldfishfb.c +++ b/drivers/video/fbdev/goldfishfb.c @@ -301,6 +301,7 @@ static int goldfish_fb_remove(struct platform_device *pdev) dma_free_coherent(&pdev->dev, framesize, (void *)fb->fb.screen_base, fb->fb.fix.smem_start); iounmap(fb->reg_base); + kfree(fb); return 0; } -- GitLab From 7ff8989cecf370f8aa4ee09fab4af244e0d165f6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 24 Jul 2018 19:11:27 +0200 Subject: [PATCH 0373/2269] fbdev/via: fix defined but not used warning [ Upstream commit b6566b47a67e07fdca44cf51abb14e2fbe17d3eb ] Fix a build warning in viafbdev.c when CONFIG_PROC_FS is not enabled by marking the unused function as __maybe_unused. ../drivers/video/fbdev/via/viafbdev.c:1471:12: warning: 'viafb_sup_odev_proc_show' defined but not used [-Wunused-function] Signed-off-by: Randy Dunlap Cc: Florian Tobias Schandinat Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/via/viafbdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/via/viafbdev.c b/drivers/video/fbdev/via/viafbdev.c index badee04ef496c..71b5dca95bdbd 100644 --- a/drivers/video/fbdev/via/viafbdev.c +++ b/drivers/video/fbdev/via/viafbdev.c @@ -19,6 +19,7 @@ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#include #include #include #include @@ -1468,7 +1469,7 @@ static const struct file_operations viafb_vt1636_proc_fops = { #endif /* CONFIG_FB_VIA_DIRECT_PROCFS */ -static int viafb_sup_odev_proc_show(struct seq_file *m, void *v) +static int __maybe_unused viafb_sup_odev_proc_show(struct seq_file *m, void *v) { via_odev_to_seq(m, supported_odev_map[ viaparinfo->shared->chip_info.gfx_chip_name]); -- GitLab From b2b5343639d1f187fc93dccea356bec7a8036554 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Tue, 10 Jul 2018 19:28:14 +0530 Subject: [PATCH 0374/2269] perf powerpc: Fix callchain ip filtering when return address is in a register [ Upstream commit 9068533e4f470daf2b0f29c71d865990acd8826e ] For powerpc64, perf will filter out the second entry in the callchain, i.e. the LR value, if the return address of the function corresponding to the probed location has already been saved on its caller's stack. The state of the return address is determined using debug information. At any point within a function, if the return address is already saved somewhere, a DWARF expression can tell us about its location. If the return address in still in LR only, no DWARF expression would exist. Typically, the instructions in a function's prologue first copy the LR value to R0 and then pushes R0 on to the stack. If LR has already been copied to R0 but R0 is yet to be pushed to the stack, we can still get a DWARF expression that says that the return address is in R0. This is indicating that getting a DWARF expression for the return address does not guarantee the fact that it has already been saved on the stack. This can be observed on a powerpc64le system running Fedora 27 as shown below. # objdump -d /usr/lib64/libc-2.26.so | less ... 000000000015af20 : 15af20: 0b 00 4c 3c addis r2,r12,11 15af24: e0 c1 42 38 addi r2,r2,-15904 15af28: a6 02 08 7c mflr r0 15af2c: f0 ff c1 fb std r30,-16(r1) 15af30: f8 ff e1 fb std r31,-8(r1) 15af34: 78 1b 7f 7c mr r31,r3 15af38: 78 23 83 7c mr r3,r4 15af3c: 78 2b be 7c mr r30,r5 15af40: 10 00 01 f8 std r0,16(r1) 15af44: c1 ff 21 f8 stdu r1,-64(r1) 15af48: 28 00 81 f8 std r4,40(r1) ... # readelf --debug-dump=frames-interp /usr/lib64/libc-2.26.so | less ... 00027024 0000000000000024 00027028 FDE cie=00000000 pc=000000000015af20..000000000015af88 LOC CFA r30 r31 ra 000000000015af20 r1+0 u u u 000000000015af34 r1+0 c-16 c-8 r0 000000000015af48 r1+64 c-16 c-8 c+16 000000000015af5c r1+0 c-16 c-8 c+16 000000000015af78 r1+0 u u ... # perf probe -x /usr/lib64/libc-2.26.so -a inet_pton+0x18 # perf record -e probe_libc:inet_pton -g ping -6 -c 1 ::1 # perf script Before: ping 2829 [005] 512917.460174: probe_libc:inet_pton: (7fff7e2baf38) 7fff7e2baf38 __GI___inet_pton+0x18 (/usr/lib64/libc-2.26.so) 7fff7e2705b4 getaddrinfo+0x164 (/usr/lib64/libc-2.26.so) 12f152d70 _init+0xbfc (/usr/bin/ping) 7fff7e1836a0 generic_start_main.isra.0+0x140 (/usr/lib64/libc-2.26.so) 7fff7e183898 __libc_start_main+0xb8 (/usr/lib64/libc-2.26.so) 0 [unknown] ([unknown]) After: ping 2829 [005] 512917.460174: probe_libc:inet_pton: (7fff7e2baf38) 7fff7e2baf38 __GI___inet_pton+0x18 (/usr/lib64/libc-2.26.so) 7fff7e26fa54 gaih_inet.constprop.7+0xf44 (/usr/lib64/libc-2.26.so) 7fff7e2705b4 getaddrinfo+0x164 (/usr/lib64/libc-2.26.so) 12f152d70 _init+0xbfc (/usr/bin/ping) 7fff7e1836a0 generic_start_main.isra.0+0x140 (/usr/lib64/libc-2.26.so) 7fff7e183898 __libc_start_main+0xb8 (/usr/lib64/libc-2.26.so) 0 [unknown] ([unknown]) Reported-by: Ravi Bangoria Signed-off-by: Sandipan Das Cc: Jiri Olsa Cc: Maynard Johnson Cc: Naveen N. Rao Cc: Ravi Bangoria Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/66e848a7bdf2d43b39210a705ff6d828a0865661.1530724939.git.sandipan@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/arch/powerpc/util/skip-callchain-idx.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c index bd630c222e658..9d1f6e976a5af 100644 --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c @@ -58,9 +58,13 @@ static int check_return_reg(int ra_regno, Dwarf_Frame *frame) } /* - * Check if return address is on the stack. + * Check if return address is on the stack. If return address + * is in a register (typically R0), it is yet to be saved on + * the stack. */ - if (nops != 0 || ops != NULL) + if ((nops != 0 || ops != NULL) && + !(nops == 1 && ops[0].atom == DW_OP_regx && + ops[0].number2 == 0 && ops[0].offset == 0)) return 0; /* -- GitLab From 0b339773a30ea35b8193552926aaeeeab8be7d83 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Tue, 24 Jul 2018 19:11:25 +0200 Subject: [PATCH 0375/2269] video: fbdev: pxafb: clear allocated memory for video modes [ Upstream commit b951d80aaf224b1f774e10def672f5e37488e4ee ] When parsing the video modes from DT properties, make sure to zero out memory before using it. This is important because not all fields in the mode struct are explicitly initialized, even though they are used later on. Fixes: 420a488278e86 ("video: fbdev: pxafb: initial devicetree conversion") Reviewed-by: Robert Jarzmik Signed-off-by: Daniel Mack Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/pxafb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c index c3d49e13643cd..29f00eccdd015 100644 --- a/drivers/video/fbdev/pxafb.c +++ b/drivers/video/fbdev/pxafb.c @@ -2130,8 +2130,8 @@ static int of_get_pxafb_display(struct device *dev, struct device_node *disp, return -EINVAL; ret = -ENOMEM; - info->modes = kmalloc_array(timings->num_timings, - sizeof(info->modes[0]), GFP_KERNEL); + info->modes = kcalloc(timings->num_timings, sizeof(info->modes[0]), + GFP_KERNEL); if (!info->modes) goto out; info->num_modes = timings->num_timings; -- GitLab From c7c53dc8aad12c0112169a4fd4d2246f5ed5e42e Mon Sep 17 00:00:00 2001 From: Fredrik Noring Date: Tue, 24 Jul 2018 19:11:24 +0200 Subject: [PATCH 0376/2269] fbdev: Distinguish between interlaced and progressive modes [ Upstream commit 1ba0a59cea41ea05fda92daaf2a2958a2246b9cf ] I discovered the problem when developing a frame buffer driver for the PlayStation 2 (not yet merged), using the following video modes for the PlayStation 3 in drivers/video/fbdev/ps3fb.c: }, { /* 1080if */ "1080if", 50, 1920, 1080, 13468, 148, 484, 36, 4, 88, 5, FB_SYNC_BROADCAST, FB_VMODE_INTERLACED }, { /* 1080pf */ "1080pf", 50, 1920, 1080, 6734, 148, 484, 36, 4, 88, 5, FB_SYNC_BROADCAST, FB_VMODE_NONINTERLACED }, In ps3fb_probe, the mode_option module parameter is used with fb_find_mode but it can only select the interlaced variant of 1920x1080 since the loop matching the modes does not take the difference between interlaced and progressive modes into account. In short, without the patch, progressive 1920x1080 cannot be chosen as a mode_option parameter since fb_find_mode (falsely) thinks interlace is a perfect match. Signed-off-by: Fredrik Noring Cc: "Maciej W. Rozycki" [b.zolnierkie: updated patch description] Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/core/modedb.c | 41 ++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/drivers/video/fbdev/core/modedb.c b/drivers/video/fbdev/core/modedb.c index 2510fa728d771..de119f11b78f9 100644 --- a/drivers/video/fbdev/core/modedb.c +++ b/drivers/video/fbdev/core/modedb.c @@ -644,7 +644,7 @@ static int fb_try_mode(struct fb_var_screeninfo *var, struct fb_info *info, * * Valid mode specifiers for @mode_option: * - * x[M][R][-][@][i][m] or + * x[M][R][-][@][i][p][m] or * [-][@] * * with , , and decimal numbers and @@ -653,10 +653,10 @@ static int fb_try_mode(struct fb_var_screeninfo *var, struct fb_info *info, * If 'M' is present after yres (and before refresh/bpp if present), * the function will compute the timings using VESA(tm) Coordinated * Video Timings (CVT). If 'R' is present after 'M', will compute with - * reduced blanking (for flatpanels). If 'i' is present, compute - * interlaced mode. If 'm' is present, add margins equal to 1.8% - * of xres rounded down to 8 pixels, and 1.8% of yres. The char - * 'i' and 'm' must be after 'M' and 'R'. Example: + * reduced blanking (for flatpanels). If 'i' or 'p' are present, compute + * interlaced or progressive mode. If 'm' is present, add margins equal + * to 1.8% of xres rounded down to 8 pixels, and 1.8% of yres. The chars + * 'i', 'p' and 'm' must be after 'M' and 'R'. Example: * * 1024x768MR-8@60m - Reduced blank with margins at 60Hz. * @@ -697,7 +697,8 @@ int fb_find_mode(struct fb_var_screeninfo *var, unsigned int namelen = strlen(name); int res_specified = 0, bpp_specified = 0, refresh_specified = 0; unsigned int xres = 0, yres = 0, bpp = default_bpp, refresh = 0; - int yres_specified = 0, cvt = 0, rb = 0, interlace = 0; + int yres_specified = 0, cvt = 0, rb = 0; + int interlace_specified = 0, interlace = 0; int margins = 0; u32 best, diff, tdiff; @@ -748,9 +749,17 @@ int fb_find_mode(struct fb_var_screeninfo *var, if (!cvt) margins = 1; break; + case 'p': + if (!cvt) { + interlace = 0; + interlace_specified = 1; + } + break; case 'i': - if (!cvt) + if (!cvt) { interlace = 1; + interlace_specified = 1; + } break; default: goto done; @@ -819,11 +828,21 @@ done: if ((name_matches(db[i], name, namelen) || (res_specified && res_matches(db[i], xres, yres))) && !fb_try_mode(var, info, &db[i], bpp)) { - if (refresh_specified && db[i].refresh == refresh) - return 1; + const int db_interlace = (db[i].vmode & + FB_VMODE_INTERLACED ? 1 : 0); + int score = abs(db[i].refresh - refresh); + + if (interlace_specified) + score += abs(db_interlace - interlace); + + if (!interlace_specified || + db_interlace == interlace) + if (refresh_specified && + db[i].refresh == refresh) + return 1; - if (abs(db[i].refresh - refresh) < diff) { - diff = abs(db[i].refresh - refresh); + if (score < diff) { + diff = score; best = i; } } -- GitLab From 94b37e160cfeb80faccbb4f73709962e4ee169e9 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 24 Jul 2018 18:48:14 +0200 Subject: [PATCH 0377/2269] ARM: exynos: Clear global variable on init error path [ Upstream commit cd4806911cee3901bc2b5eb95603cf1958720b57 ] For most of Exynos SoCs, Power Management Unit (PMU) address space is mapped into global variable 'pmu_base_addr' very early when initializing PMU interrupt controller. A lot of other machine code depends on it so when doing iounmap() on this address, clear the global as well to avoid usage of invalid value (pointing to unmapped memory region). Properly mapped PMU address space is a requirement for all other machine code so this fix is purely theoretical. Boot will fail immediately in many other places after following this error path. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-exynos/suspend.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c index b529ba04ed166..eafa26d9f692d 100644 --- a/arch/arm/mach-exynos/suspend.c +++ b/arch/arm/mach-exynos/suspend.c @@ -209,6 +209,7 @@ static int __init exynos_pmu_irq_init(struct device_node *node, NULL); if (!domain) { iounmap(pmu_base_addr); + pmu_base_addr = NULL; return -ENOMEM; } -- GitLab From fdfa7139899c0e1c21fb521d60d728c521bae822 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Tue, 10 Jul 2018 19:28:13 +0530 Subject: [PATCH 0378/2269] perf powerpc: Fix callchain ip filtering [ Upstream commit c715fcfda5a08edabaa15508742be926b7ee51db ] For powerpc64, redundant entries in the callchain are filtered out by determining the state of the return address and the stack frame using DWARF debug information. For making these filtering decisions we must analyze the debug information for the location corresponding to the program counter value, i.e. the first entry in the callchain, and not the LR value; otherwise, perf may filter out either the second or the third entry in the callchain incorrectly. This can be observed on a powerpc64le system running Fedora 27 as shown below. Case 1 - Attaching a probe at inet_pton+0x8 (binary offset 0x15af28). Return address is still in LR and a new stack frame is not yet allocated. The LR value, i.e. the second entry, should not be filtered out. # objdump -d /usr/lib64/libc-2.26.so | less ... 000000000010eb10 : ... 10fa48: 78 bb e4 7e mr r4,r23 10fa4c: 0a 00 60 38 li r3,10 10fa50: d9 b4 04 48 bl 15af28 10fa54: 00 00 00 60 nop 10fa58: ac f4 ff 4b b 10ef04 ... 0000000000110450 : ... 1105a8: 54 00 ff 38 addi r7,r31,84 1105ac: 58 00 df 38 addi r6,r31,88 1105b0: 69 e5 ff 4b bl 10eb18 1105b4: 78 1b 71 7c mr r17,r3 1105b8: 50 01 7f e8 ld r3,336(r31) ... 000000000015af20 : 15af20: 0b 00 4c 3c addis r2,r12,11 15af24: e0 c1 42 38 addi r2,r2,-15904 15af28: a6 02 08 7c mflr r0 15af2c: f0 ff c1 fb std r30,-16(r1) 15af30: f8 ff e1 fb std r31,-8(r1) ... # perf probe -x /usr/lib64/libc-2.26.so -a inet_pton+0x8 # perf record -e probe_libc:inet_pton -g ping -6 -c 1 ::1 # perf script Before: ping 4507 [002] 514985.546540: probe_libc:inet_pton: (7fffa7dbaf28) 7fffa7dbaf28 __GI___inet_pton+0x8 (/usr/lib64/libc-2.26.so) 7fffa7d705b4 getaddrinfo+0x164 (/usr/lib64/libc-2.26.so) 13fb52d70 _init+0xbfc (/usr/bin/ping) 7fffa7c836a0 generic_start_main.isra.0+0x140 (/usr/lib64/libc-2.26.so) 7fffa7c83898 __libc_start_main+0xb8 (/usr/lib64/libc-2.26.so) 0 [unknown] ([unknown]) After: ping 4507 [002] 514985.546540: probe_libc:inet_pton: (7fffa7dbaf28) 7fffa7dbaf28 __GI___inet_pton+0x8 (/usr/lib64/libc-2.26.so) 7fffa7d6fa54 gaih_inet.constprop.7+0xf44 (/usr/lib64/libc-2.26.so) 7fffa7d705b4 getaddrinfo+0x164 (/usr/lib64/libc-2.26.so) 13fb52d70 _init+0xbfc (/usr/bin/ping) 7fffa7c836a0 generic_start_main.isra.0+0x140 (/usr/lib64/libc-2.26.so) 7fffa7c83898 __libc_start_main+0xb8 (/usr/lib64/libc-2.26.so) 0 [unknown] ([unknown]) Case 2 - Attaching a probe at _int_malloc+0x180 (binary offset 0x9cf10). Return address in still in LR and a new stack frame has already been allocated but not used. The caller's caller, i.e. the third entry, is invalid and should be filtered out and not the second one. # objdump -d /usr/lib64/libc-2.26.so | less ... 000000000009cd90 <_int_malloc>: 9cd90: 17 00 4c 3c addis r2,r12,23 9cd94: 70 a3 42 38 addi r2,r2,-23696 9cd98: 26 00 80 7d mfcr r12 9cd9c: f8 ff e1 fb std r31,-8(r1) 9cda0: 17 00 e4 3b addi r31,r4,23 9cda4: d8 ff 61 fb std r27,-40(r1) 9cda8: 78 23 9b 7c mr r27,r4 9cdac: 1f 00 bf 2b cmpldi cr7,r31,31 9cdb0: f0 ff c1 fb std r30,-16(r1) 9cdb4: b0 ff c1 fa std r22,-80(r1) 9cdb8: 78 1b 7e 7c mr r30,r3 9cdbc: 08 00 81 91 stw r12,8(r1) 9cdc0: 11 ff 21 f8 stdu r1,-240(r1) 9cdc4: 4c 01 9d 41 bgt cr7,9cf10 <_int_malloc+0x180> 9cdc8: 20 00 a4 2b cmpldi cr7,r4,32 ... 9cf08: 00 00 00 60 nop 9cf0c: 00 00 42 60 ori r2,r2,0 9cf10: e4 06 ff 7b rldicr r31,r31,0,59 9cf14: 40 f8 a4 7f cmpld cr7,r4,r31 9cf18: 68 05 9d 41 bgt cr7,9d480 <_int_malloc+0x6f0> ... 000000000009e3c0 : ... 9e420: 40 02 80 38 li r4,576 9e424: 78 fb e3 7f mr r3,r31 9e428: 71 e9 ff 4b bl 9cd98 <_int_malloc+0x8> 9e42c: 00 00 a3 2f cmpdi cr7,r3,0 9e430: 78 1b 7e 7c mr r30,r3 ... 000000000009f7a0 <__libc_malloc>: ... 9f8f8: 00 00 89 2f cmpwi cr7,r9,0 9f8fc: 1c ff 9e 40 bne cr7,9f818 <__libc_malloc+0x78> 9f900: c9 ea ff 4b bl 9e3c8 9f904: 00 00 00 60 nop 9f908: e8 90 22 e9 ld r9,-28440(r2) ... # perf probe -x /usr/lib64/libc-2.26.so -a _int_malloc+0x180 # perf record -e probe_libc:_int_malloc -g ./test-malloc # perf script Before: test-malloc 6554 [009] 515975.797403: probe_libc:_int_malloc: (7fffa6e6cf10) 7fffa6e6cf10 _int_malloc+0x180 (/usr/lib64/libc-2.26.so) 7fffa6dd0000 [unknown] (/usr/lib64/libc-2.26.so) 7fffa6e6f904 malloc+0x164 (/usr/lib64/libc-2.26.so) 7fffa6e6f9fc malloc+0x25c (/usr/lib64/libc-2.26.so) 100006b4 main+0x38 (/home/testuser/test-malloc) 7fffa6df36a0 generic_start_main.isra.0+0x140 (/usr/lib64/libc-2.26.so) 7fffa6df3898 __libc_start_main+0xb8 (/usr/lib64/libc-2.26.so) 0 [unknown] ([unknown]) After: test-malloc 6554 [009] 515975.797403: probe_libc:_int_malloc: (7fffa6e6cf10) 7fffa6e6cf10 _int_malloc+0x180 (/usr/lib64/libc-2.26.so) 7fffa6e6e42c tcache_init.part.4+0x6c (/usr/lib64/libc-2.26.so) 7fffa6e6f904 malloc+0x164 (/usr/lib64/libc-2.26.so) 7fffa6e6f9fc malloc+0x25c (/usr/lib64/libc-2.26.so) 100006b4 main+0x38 (/home/sandipan/test-malloc) 7fffa6df36a0 generic_start_main.isra.0+0x140 (/usr/lib64/libc-2.26.so) 7fffa6df3898 __libc_start_main+0xb8 (/usr/lib64/libc-2.26.so) 0 [unknown] ([unknown]) Signed-off-by: Sandipan Das Cc: Jiri Olsa Cc: Maynard Johnson Cc: Naveen N. Rao Cc: Ravi Bangoria Cc: Sukadev Bhattiprolu Fixes: a60335ba3298 ("perf tools powerpc: Adjust callchain based on DWARF debug info") Link: http://lkml.kernel.org/r/24bb726d91ed173aebc972ec3f41a2ef2249434e.1530724939.git.sandipan@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/arch/powerpc/util/skip-callchain-idx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c index 9d1f6e976a5af..9a53f6e9ef43e 100644 --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c @@ -250,7 +250,7 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain) if (!chain || chain->nr < 3) return skip_slot; - ip = chain->ips[2]; + ip = chain->ips[1]; thread__find_addr_location(thread, PERF_RECORD_MISC_USER, MAP__FUNCTION, ip, &al); -- GitLab From 3cb3868f98f53468160eb732d30ef29aaf046ae3 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 9 Jul 2018 12:49:05 +0300 Subject: [PATCH 0379/2269] nvme-rdma: unquiesce queues when deleting the controller [ Upstream commit 90140624e8face94207003ac9a9d2a329b309d68 ] If the controller is going away, we need to unquiesce the IO queues so that all pending request can fail gracefully before moving forward with controller deletion. Do that before we destroy the IO queues so blk_cleanup_queue won't block in freeze. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/rdma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 48a831d58e7ae..9fffe41ead500 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1728,6 +1728,8 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) nvme_stop_queues(&ctrl->ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request, &ctrl->ctrl); + if (shutdown) + nvme_start_queues(&ctrl->ctrl); nvme_rdma_destroy_io_queues(ctrl, shutdown); } -- GitLab From 737066efec608045559ce62abda4210962526349 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 10 Jul 2018 19:01:23 +0100 Subject: [PATCH 0380/2269] KVM: arm/arm64: vgic: Fix possible spectre-v1 write in vgic_mmio_write_apr() [ Upstream commit 6b8b9a48545e08345b8ff77c9fd51b1aebdbefb3 ] It's possible for userspace to control n. Sanitize n when using it as an array index, to inhibit the potential spectre-v1 write gadget. Note that while it appears that n must be bound to the interval [0,3] due to the way it is extracted from addr, we cannot guarantee that compiler transformations (and/or future refactoring) will ensure this is the case, and given this is a slow path it's better to always perform the masking. Found by smatch. Signed-off-by: Mark Rutland Cc: Christoffer Dall Cc: Marc Zyngier Cc: kvmarm@lists.cs.columbia.edu Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/vgic/vgic-mmio-v2.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index af003268bf3ef..7ea5928244fa3 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -348,6 +348,9 @@ static void vgic_mmio_write_apr(struct kvm_vcpu *vcpu, if (n > vgic_v3_max_apr_idx(vcpu)) return; + + n = array_index_nospec(n, 4); + /* GICv3 only uses ICH_AP1Rn for memory mapped (GICv2) guests */ vgicv3->vgic_ap1r[n] = val; } -- GitLab From c7afa2064c06b9520f7e3931db7456a166989caf Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 1 May 2018 00:55:44 +1000 Subject: [PATCH 0381/2269] powerpc/powernv: opal_put_chars partial write fix [ Upstream commit bd90284cc6c1c9e8e48c8eadd0c79574fcce0b81 ] The intention here is to consume and discard the remaining buffer upon error. This works if there has not been a previous partial write. If there has been, then total_len is no longer total number of bytes to copy. total_len is always "bytes left to copy", so it should be added to written bytes. This code may not be exercised any more if partial writes will not be hit, but this is a small bugfix before a larger change. Reviewed-by: Benjamin Herrenschmidt Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/opal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 65c79ecf5a4d6..c8a743af6bf50 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -388,7 +388,7 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len) /* Closed or other error drop */ if (rc != OPAL_SUCCESS && rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT) { - written = total_len; + written += total_len; break; } if (rc == OPAL_SUCCESS) { -- GitLab From f313b0593d4a95c701edb509fcd0db608b1ab673 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Sat, 21 Jul 2018 15:20:28 +0200 Subject: [PATCH 0382/2269] staging: bcm2835-camera: fix timeout handling in wait_for_completion_timeout [ Upstream commit b7afce51d95726a619743aaad8870db66dfa1479 ] wait_for_completion_timeout returns unsigned long not int so a variable of proper type is introduced. Further the check for <= 0 is ambiguous and should be == 0 here indicating timeout which is the only error case so no additional check needed here. Signed-off-by: Nicholas Mc Guire Fixes: 7b3ad5abf027 ("staging: Import the BCM2835 MMAL-based V4L2 camera driver.") Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../staging/vc04_services/bcm2835-camera/mmal-vchiq.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/staging/vc04_services/bcm2835-camera/mmal-vchiq.c b/drivers/staging/vc04_services/bcm2835-camera/mmal-vchiq.c index 4360db6d43927..b3176f42c8207 100644 --- a/drivers/staging/vc04_services/bcm2835-camera/mmal-vchiq.c +++ b/drivers/staging/vc04_services/bcm2835-camera/mmal-vchiq.c @@ -834,6 +834,7 @@ static int send_synchronous_mmal_msg(struct vchiq_mmal_instance *instance, { struct mmal_msg_context *msg_context; int ret; + unsigned long timeout; /* payload size must not cause message to exceed max size */ if (payload_len > @@ -872,11 +873,11 @@ static int send_synchronous_mmal_msg(struct vchiq_mmal_instance *instance, return ret; } - ret = wait_for_completion_timeout(&msg_context->u.sync.cmplt, 3 * HZ); - if (ret <= 0) { - pr_err("error %d waiting for sync completion\n", ret); - if (ret == 0) - ret = -ETIME; + timeout = wait_for_completion_timeout(&msg_context->u.sync.cmplt, + 3 * HZ); + if (timeout == 0) { + pr_err("timed out waiting for sync completion\n"); + ret = -ETIME; /* todo: what happens if the message arrives after aborting */ release_msg_context(msg_context); return ret; -- GitLab From f80c5cf3de84c08803975e9e1846e414f3fedd4f Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Sat, 21 Jul 2018 13:31:24 +0200 Subject: [PATCH 0383/2269] staging: bcm2835-camera: handle wait_for_completion_timeout return properly [ Upstream commit 5b70084f6cbcd53f615433f9d216e01bd71de0bb ] wait_for_completion_timeout returns unsigned long not int so a variable of proper type is introduced. Further the check for <= 0 is ambiguous and should be == 0 here indicating timeout. Signed-off-by: Nicholas Mc Guire Fixes: 7b3ad5abf027 ("staging: Import the BCM2835 MMAL-based V4L2 camera driver.") Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../staging/vc04_services/bcm2835-camera/bcm2835-camera.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c b/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c index be936b8fe3175..377da037f31c3 100644 --- a/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c +++ b/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c @@ -580,6 +580,7 @@ static int start_streaming(struct vb2_queue *vq, unsigned int count) static void stop_streaming(struct vb2_queue *vq) { int ret; + unsigned long timeout; struct bm2835_mmal_dev *dev = vb2_get_drv_priv(vq); v4l2_dbg(1, bcm2835_v4l2_debug, &dev->v4l2_dev, "%s: dev:%p\n", @@ -605,10 +606,10 @@ static void stop_streaming(struct vb2_queue *vq) sizeof(dev->capture.frame_count)); /* wait for last frame to complete */ - ret = wait_for_completion_timeout(&dev->capture.frame_cmplt, HZ); - if (ret <= 0) + timeout = wait_for_completion_timeout(&dev->capture.frame_cmplt, HZ); + if (timeout == 0) v4l2_err(&dev->v4l2_dev, - "error %d waiting for frame completion\n", ret); + "timed out waiting for frame completion\n"); v4l2_dbg(1, bcm2835_v4l2_debug, &dev->v4l2_dev, "disabling connection\n"); -- GitLab From c27516e62712ed2b8453fb61f17bf537e7c302ee Mon Sep 17 00:00:00 2001 From: Oder Chiou Date: Tue, 24 Jul 2018 15:49:23 +0800 Subject: [PATCH 0384/2269] ASoC: rt5514: Fix the issue of the delay volume applied [ Upstream commit d96f8bd28cd0bae3e6702ae90df593628ef6906f ] The patch fixes the issue of the delay volume applied. Signed-off-by: Oder Chiou Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/rt5514.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/rt5514.c b/sound/soc/codecs/rt5514.c index e52e68b562382..1bfc8db1826a1 100644 --- a/sound/soc/codecs/rt5514.c +++ b/sound/soc/codecs/rt5514.c @@ -64,8 +64,8 @@ static const struct reg_sequence rt5514_patch[] = { {RT5514_ANA_CTRL_LDO10, 0x00028604}, {RT5514_ANA_CTRL_ADCFED, 0x00000800}, {RT5514_ASRC_IN_CTRL1, 0x00000003}, - {RT5514_DOWNFILTER0_CTRL3, 0x10000362}, - {RT5514_DOWNFILTER1_CTRL3, 0x10000362}, + {RT5514_DOWNFILTER0_CTRL3, 0x10000352}, + {RT5514_DOWNFILTER1_CTRL3, 0x10000352}, }; static const struct reg_default rt5514_reg[] = { @@ -92,10 +92,10 @@ static const struct reg_default rt5514_reg[] = { {RT5514_ASRC_IN_CTRL1, 0x00000003}, {RT5514_DOWNFILTER0_CTRL1, 0x00020c2f}, {RT5514_DOWNFILTER0_CTRL2, 0x00020c2f}, - {RT5514_DOWNFILTER0_CTRL3, 0x10000362}, + {RT5514_DOWNFILTER0_CTRL3, 0x10000352}, {RT5514_DOWNFILTER1_CTRL1, 0x00020c2f}, {RT5514_DOWNFILTER1_CTRL2, 0x00020c2f}, - {RT5514_DOWNFILTER1_CTRL3, 0x10000362}, + {RT5514_DOWNFILTER1_CTRL3, 0x10000352}, {RT5514_ANA_CTRL_LDO10, 0x00028604}, {RT5514_ANA_CTRL_LDO18_16, 0x02000345}, {RT5514_ANA_CTRL_ADC12, 0x0000a2a8}, -- GitLab From 68c087ecddbc2ebab02287de5b9c2f68bbeddc57 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sun, 8 Jul 2018 17:07:12 +0200 Subject: [PATCH 0385/2269] MIPS: jz4740: Bump zload address [ Upstream commit c6ea7e9747318e5a6774995f4f8e3e0f7c0fa8ba ] Having the zload address at 0x8060.0000 means the size of the uncompressed kernel cannot be bigger than around 6 MiB, as it is deflated at address 0x8001.0000. This limit is too small; a kernel with some built-in drivers and things like debugfs enabled will already be over 6 MiB in size, and so will fail to extract properly. To fix this, we bump the zload address from 0x8060.0000 to 0x8100.0000. This is fine, as all the boards featuring Ingenic JZ SoCs have at least 32 MiB of RAM, and use u-boot or compatible bootloaders which won't hardcode the load address but read it from the uImage's header. Signed-off-by: Paul Cercueil Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/19787/ Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/mips/jz4740/Platform | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/jz4740/Platform b/arch/mips/jz4740/Platform index 28448d358c10d..a2a5a85ea1f93 100644 --- a/arch/mips/jz4740/Platform +++ b/arch/mips/jz4740/Platform @@ -1,4 +1,4 @@ platform-$(CONFIG_MACH_INGENIC) += jz4740/ cflags-$(CONFIG_MACH_INGENIC) += -I$(srctree)/arch/mips/include/asm/mach-jz4740 load-$(CONFIG_MACH_INGENIC) += 0xffffffff80010000 -zload-$(CONFIG_MACH_INGENIC) += 0xffffffff80600000 +zload-$(CONFIG_MACH_INGENIC) += 0xffffffff81000000 -- GitLab From 2b7844ed3c7a6939090260eec43309035ed44a57 Mon Sep 17 00:00:00 2001 From: Manikanta Pubbisetty Date: Tue, 10 Jul 2018 16:48:27 +0530 Subject: [PATCH 0386/2269] mac80211: restrict delayed tailroom needed decrement [ Upstream commit 133bf90dbb8b873286f8ec2e81ba26e863114b8c ] As explained in ieee80211_delayed_tailroom_dec(), during roam, keys of the old AP will be destroyed and new keys will be installed. Deletion of the old key causes crypto_tx_tailroom_needed_cnt to go from 1 to 0 and the new key installation causes a transition from 0 to 1. Whenever crypto_tx_tailroom_needed_cnt transitions from 0 to 1, we invoke synchronize_net(); the reason for doing this is to avoid a race in the TX path as explained in increment_tailroom_need_count(). This synchronize_net() operation can be slow and can affect the station roam time. To avoid this, decrementing the crypto_tx_tailroom_needed_cnt is delayed for a while so that upon installation of new key the transition would be from 1 to 2 instead of 0 to 1 and thereby improving the roam time. This is all correct for a STA iftype, but deferring the tailroom_needed decrement for other iftypes may be unnecessary. For example, let's consider the case of a 4-addr client connecting to an AP for which AP_VLAN interface is also created, let the initial value for tailroom_needed on the AP be 1. * 4-addr client connects to the AP (AP: tailroom_needed = 1) * AP will clear old keys, delay decrement of tailroom_needed count * AP_VLAN is created, it takes the tailroom count from master (AP_VLAN: tailroom_needed = 1, AP: tailroom_needed = 1) * Install new key for the station, assume key is plumbed in the HW, there won't be any change in tailroom_needed count on AP iface * Delayed decrement of tailroom_needed count on AP (AP: tailroom_needed = 0, AP_VLAN: tailroom_needed = 1) Because of the delayed decrement on AP iface, tailroom_needed count goes out of sync between AP(master iface) and AP_VLAN(slave iface) and there would be unnecessary tailroom created for the packets going through AP_VLAN iface. Also, WARN_ONs were observed while trying to bring down the AP_VLAN interface: (warn_slowpath_common) (warn_slowpath_null+0x18/0x20) (warn_slowpath_null) (ieee80211_free_keys+0x114/0x1e4) (ieee80211_free_keys) (ieee80211_del_virtual_monitor+0x51c/0x850) (ieee80211_del_virtual_monitor) (ieee80211_stop+0x30/0x3c) (ieee80211_stop) (__dev_close_many+0x94/0xb8) (__dev_close_many) (dev_close_many+0x5c/0xc8) Restricting delayed decrement to station interface alone fixes the problem and it makes sense to do so because delayed decrement is done to improve roam time which is applicable only for client devices. Signed-off-by: Manikanta Pubbisetty Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/cfg.c | 2 +- net/mac80211/key.c | 24 +++++++++++++++--------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 288640471c2fa..b456b882a6ea5 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -494,7 +494,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, goto out_unlock; } - ieee80211_key_free(key, true); + ieee80211_key_free(key, sdata->vif.type == NL80211_IFTYPE_STATION); ret = 0; out_unlock: diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 938049395f907..fa10c61422440 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -649,11 +649,15 @@ int ieee80211_key_link(struct ieee80211_key *key, { struct ieee80211_local *local = sdata->local; struct ieee80211_key *old_key; - int idx, ret; - bool pairwise; - - pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE; - idx = key->conf.keyidx; + int idx = key->conf.keyidx; + bool pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE; + /* + * We want to delay tailroom updates only for station - in that + * case it helps roaming speed, but in other cases it hurts and + * can cause warnings to appear. + */ + bool delay_tailroom = sdata->vif.type == NL80211_IFTYPE_STATION; + int ret; mutex_lock(&sdata->local->key_mtx); @@ -681,14 +685,14 @@ int ieee80211_key_link(struct ieee80211_key *key, increment_tailroom_need_count(sdata); ieee80211_key_replace(sdata, sta, pairwise, old_key, key); - ieee80211_key_destroy(old_key, true); + ieee80211_key_destroy(old_key, delay_tailroom); ieee80211_debugfs_key_add(key); if (!local->wowlan) { ret = ieee80211_key_enable_hw_accel(key); if (ret) - ieee80211_key_free(key, true); + ieee80211_key_free(key, delay_tailroom); } else { ret = 0; } @@ -923,7 +927,8 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local, ieee80211_key_replace(key->sdata, key->sta, key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, key, NULL); - __ieee80211_key_destroy(key, true); + __ieee80211_key_destroy(key, key->sdata->vif.type == + NL80211_IFTYPE_STATION); } for (i = 0; i < NUM_DEFAULT_KEYS; i++) { @@ -933,7 +938,8 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local, ieee80211_key_replace(key->sdata, key->sta, key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, key, NULL); - __ieee80211_key_destroy(key, true); + __ieee80211_key_destroy(key, key->sdata->vif.type == + NL80211_IFTYPE_STATION); } mutex_unlock(&local->key_mtx); -- GitLab From bff663136d76ca017acc3fec1e2c108751429ffd Mon Sep 17 00:00:00 2001 From: Piotr Sawicki Date: Thu, 19 Jul 2018 11:42:58 +0200 Subject: [PATCH 0387/2269] Smack: Fix handling of IPv4 traffic received by PF_INET6 sockets [ Upstream commit 129a99890936766f4b69b9da7ed88366313a9210 ] A socket which has sk_family set to PF_INET6 is able to receive not only IPv6 but also IPv4 traffic (IPv4-mapped IPv6 addresses). Prior to this patch, the smk_skb_to_addr_ipv6() could have been called for socket buffers containing IPv4 packets, in result such traffic was allowed. Signed-off-by: Piotr Sawicki Signed-off-by: Casey Schaufler Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- security/smack/smack_lsm.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index fdf01bfd1b07b..c8fd5c10b7c67 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -3960,15 +3960,19 @@ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) struct smack_known *skp = NULL; int rc = 0; struct smk_audit_info ad; + u16 family = sk->sk_family; #ifdef CONFIG_AUDIT struct lsm_network_audit net; #endif #if IS_ENABLED(CONFIG_IPV6) struct sockaddr_in6 sadd; int proto; + + if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP)) + family = PF_INET; #endif /* CONFIG_IPV6 */ - switch (sk->sk_family) { + switch (family) { case PF_INET: #ifdef CONFIG_SECURITY_SMACK_NETFILTER /* @@ -3986,7 +3990,7 @@ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) */ netlbl_secattr_init(&secattr); - rc = netlbl_skbuff_getattr(skb, sk->sk_family, &secattr); + rc = netlbl_skbuff_getattr(skb, family, &secattr); if (rc == 0) skp = smack_from_secattr(&secattr, ssp); else @@ -3999,7 +4003,7 @@ access_check: #endif #ifdef CONFIG_AUDIT smk_ad_init_net(&ad, __func__, LSM_AUDIT_DATA_NET, &net); - ad.a.u.net->family = sk->sk_family; + ad.a.u.net->family = family; ad.a.u.net->netif = skb->skb_iif; ipv4_skb_to_auditdata(skb, &ad.a, NULL); #endif @@ -4013,7 +4017,7 @@ access_check: rc = smk_bu_note("IPv4 delivery", skp, ssp->smk_in, MAY_WRITE, rc); if (rc != 0) - netlbl_skbuff_err(skb, sk->sk_family, rc, 0); + netlbl_skbuff_err(skb, family, rc, 0); break; #if IS_ENABLED(CONFIG_IPV6) case PF_INET6: @@ -4029,7 +4033,7 @@ access_check: skp = smack_net_ambient; #ifdef CONFIG_AUDIT smk_ad_init_net(&ad, __func__, LSM_AUDIT_DATA_NET, &net); - ad.a.u.net->family = sk->sk_family; + ad.a.u.net->family = family; ad.a.u.net->netif = skb->skb_iif; ipv6_skb_to_auditdata(skb, &ad.a, NULL); #endif /* CONFIG_AUDIT */ -- GitLab From 991bad26b38fbb1484c8dfe825c238bb182a208f Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 23 Jul 2018 22:12:33 +0800 Subject: [PATCH 0388/2269] wan/fsl_ucc_hdlc: use IS_ERR_VALUE() to check return value of qe_muram_alloc [ Upstream commit fd800f646402c0f85547166b59ca065175928b7b ] qe_muram_alloc return a unsigned long integer,which should not compared with zero. check it using IS_ERR_VALUE() to fix this. Fixes: c19b6d246a35 ("drivers/net: support hdlc function for QE-UCC") Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wan/fsl_ucc_hdlc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 33df76405b869..18b648648adb2 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -192,7 +192,7 @@ static int uhdlc_init(struct ucc_hdlc_private *priv) priv->ucc_pram_offset = qe_muram_alloc(sizeof(struct ucc_hdlc_param), ALIGNMENT_OF_UCC_HDLC_PRAM); - if (priv->ucc_pram_offset < 0) { + if (IS_ERR_VALUE(priv->ucc_pram_offset)) { dev_err(priv->dev, "Can not allocate MURAM for hdlc parameter.\n"); ret = -ENOMEM; goto free_tx_bd; @@ -228,14 +228,14 @@ static int uhdlc_init(struct ucc_hdlc_private *priv) /* Alloc riptr, tiptr */ riptr = qe_muram_alloc(32, 32); - if (riptr < 0) { + if (IS_ERR_VALUE(riptr)) { dev_err(priv->dev, "Cannot allocate MURAM mem for Receive internal temp data pointer\n"); ret = -ENOMEM; goto free_tx_skbuff; } tiptr = qe_muram_alloc(32, 32); - if (tiptr < 0) { + if (IS_ERR_VALUE(tiptr)) { dev_err(priv->dev, "Cannot allocate MURAM mem for Transmit internal temp data pointer\n"); ret = -ENOMEM; goto free_riptr; -- GitLab From c1e2aee9952b8fa32606595d0ed1c05fd3f0086f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 10 Jul 2018 19:01:22 +0100 Subject: [PATCH 0389/2269] arm64: fix possible spectre-v1 write in ptrace_hbp_set_event() [ Upstream commit 14d6e289a89780377f8bb09de8926d3c62d763cd ] It's possible for userspace to control idx. Sanitize idx when using it as an array index, to inhibit the potential spectre-v1 write gadget. Found by smatch. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/ptrace.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index edaf346d13d5f..34d915b6974be 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -274,19 +274,22 @@ static int ptrace_hbp_set_event(unsigned int note_type, switch (note_type) { case NT_ARM_HW_BREAK: - if (idx < ARM_MAX_BRP) { - tsk->thread.debug.hbp_break[idx] = bp; - err = 0; - } + if (idx >= ARM_MAX_BRP) + goto out; + idx = array_index_nospec(idx, ARM_MAX_BRP); + tsk->thread.debug.hbp_break[idx] = bp; + err = 0; break; case NT_ARM_HW_WATCH: - if (idx < ARM_MAX_WRP) { - tsk->thread.debug.hbp_watch[idx] = bp; - err = 0; - } + if (idx >= ARM_MAX_WRP) + goto out; + idx = array_index_nospec(idx, ARM_MAX_WRP); + tsk->thread.debug.hbp_watch[idx] = bp; + err = 0; break; } +out: return err; } -- GitLab From 27adb89d1fd44f5f4d625a1a6679d835b3cd677e Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Fri, 20 Jul 2018 15:47:43 +0300 Subject: [PATCH 0390/2269] reset: imx7: Fix always writing bits as 0 [ Upstream commit 26fce0557fa639fb7bbc33e31a57cff7df25c3a0 ] Right now the only user of reset-imx7 is pci-imx6 and the reset_control_assert and deassert calls on pciephy_reset don't toggle the PCIEPHY_BTN and PCIEPHY_G_RST bits as expected. Fix this by writing 1 or 0 respectively. The reference manual is not very clear regarding SRC_PCIEPHY_RCR but for other registers like MIPIPHY and HSICPHY the bits are explicitly documented as "1 means assert, 0 means deassert". The values are still reversed for IMX7_RESET_PCIE_CTRL_APPS_EN. Signed-off-by: Leonard Crestez Reviewed-by: Lucas Stach Signed-off-by: Philipp Zabel Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/reset/reset-imx7.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/reset/reset-imx7.c b/drivers/reset/reset-imx7.c index 4db177bc89bc4..fdeac19464294 100644 --- a/drivers/reset/reset-imx7.c +++ b/drivers/reset/reset-imx7.c @@ -80,7 +80,7 @@ static int imx7_reset_set(struct reset_controller_dev *rcdev, { struct imx7_src *imx7src = to_imx7_src(rcdev); const struct imx7_src_signal *signal = &imx7_src_signals[id]; - unsigned int value = 0; + unsigned int value = assert ? signal->bit : 0; switch (id) { case IMX7_RESET_PCIEPHY: -- GitLab From d08c50e8530bbcdbc8479d07b3668341d5ce5dfc Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 23 Jul 2018 10:57:30 +0900 Subject: [PATCH 0391/2269] efi/arm: preserve early mapping of UEFI memory map longer for BGRT [ Upstream commit 3ea86495aef2f6de26b7cb1599ba350dd6a0c521 ] The BGRT code validates the contents of the table against the UEFI memory map, and so it expects it to be mapped when the code runs. On ARM, this is currently not the case, since we tear down the early mapping after efi_init() completes, and only create the permanent mapping in arm_enable_runtime_services(), which executes as an early initcall, but still leaves a window where the UEFI memory map is not mapped. So move the call to efi_memmap_unmap() from efi_init() to arm_enable_runtime_services(). Signed-off-by: Ard Biesheuvel [will: fold in EFI_MEMMAP attribute check from Ard] Signed-off-by: Will Deacon Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/arm-init.c | 1 - drivers/firmware/efi/arm-runtime.c | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c index 80d1a885def5c..a7c522eac640f 100644 --- a/drivers/firmware/efi/arm-init.c +++ b/drivers/firmware/efi/arm-init.c @@ -259,7 +259,6 @@ void __init efi_init(void) reserve_regions(); efi_esrt_init(); - efi_memmap_unmap(); memblock_reserve(params.mmap & PAGE_MASK, PAGE_ALIGN(params.mmap_size + diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 86a1ad17a32e2..8995a48bd0676 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -122,11 +122,13 @@ static int __init arm_enable_runtime_services(void) { u64 mapsize; - if (!efi_enabled(EFI_BOOT)) { + if (!efi_enabled(EFI_BOOT) || !efi_enabled(EFI_MEMMAP)) { pr_info("EFI services will not be available.\n"); return 0; } + efi_memmap_unmap(); + if (efi_runtime_disabled()) { pr_info("EFI runtime services will be disabled.\n"); return 0; -- GitLab From 69b400c1b15b6962ef52212d05e289b484c0decc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 20 Jul 2018 21:14:39 -0700 Subject: [PATCH 0392/2269] nfp: avoid buffer leak when FW communication fails [ Upstream commit 07300f774fec9519663a597987a4083225588be4 ] After device is stopped we reset the rings by moving all free buffers to positions [0, cnt - 2], and clear the position cnt - 1 in the ring. We then proceed to clear the read/write pointers. This means that if we try to reset the ring again the code will assume that the next to fill buffer is at position 0 and swap it with cnt - 1. Since we previously cleared position cnt - 1 it will lead to leaking the first buffer and leaving ring in a bad state. This scenario can only happen if FW communication fails, in which case the ring will never be used again, so the fact it's in a bad state will not be noticed. Buffer leak is the only problem. Don't try to move buffers in the ring if the read/write pointers indicate the ring was never used or have already been reset. nfp_net_clear_config_and_disable() is now fully idempotent. Found by code inspection, FW communication failures are very rare, and reconfiguring a live device is not common either, so it's unlikely anyone has ever noticed the leak. Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index b482a8fb0e927..56751990bceef 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1087,7 +1087,7 @@ static bool nfp_net_xdp_complete(struct nfp_net_tx_ring *tx_ring) * @dp: NFP Net data path struct * @tx_ring: TX ring structure * - * Assumes that the device is stopped + * Assumes that the device is stopped, must be idempotent. */ static void nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) @@ -1289,13 +1289,18 @@ static void nfp_net_rx_give_one(const struct nfp_net_dp *dp, * nfp_net_rx_ring_reset() - Reflect in SW state of freelist after disable * @rx_ring: RX ring structure * - * Warning: Do *not* call if ring buffers were never put on the FW freelist - * (i.e. device was not enabled)! + * Assumes that the device is stopped, must be idempotent. */ static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring) { unsigned int wr_idx, last_idx; + /* wr_p == rd_p means ring was never fed FL bufs. RX rings are always + * kept at cnt - 1 FL bufs. + */ + if (rx_ring->wr_p == 0 && rx_ring->rd_p == 0) + return; + /* Move the empty entry to the end of the list */ wr_idx = D_IDX(rx_ring, rx_ring->wr_p); last_idx = rx_ring->cnt - 1; @@ -2505,6 +2510,8 @@ static void nfp_net_vec_clear_ring_data(struct nfp_net *nn, unsigned int idx) /** * nfp_net_clear_config_and_disable() - Clear control BAR and disable NFP * @nn: NFP Net device to reconfigure + * + * Warning: must be fully idempotent. */ static void nfp_net_clear_config_and_disable(struct nfp_net *nn) { -- GitLab From 45ac2120e7c412a26afb1f2cbda1dd3b8390df21 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 20 Jul 2018 18:33:59 +0200 Subject: [PATCH 0393/2269] xen-netfront: fix queue name setting [ Upstream commit 2d408c0d4574b01b9ed45e02516888bf925e11a9 ] Commit f599c64fdf7d ("xen-netfront: Fix race between device setup and open") changed the initialization order: xennet_create_queues() now happens before we do register_netdev() so using netdev->name in xennet_init_queue() is incorrect, we end up with the following in /proc/interrupts: 60: 139 0 xen-dyn -event eth%d-q0-tx 61: 265 0 xen-dyn -event eth%d-q0-rx 62: 234 0 xen-dyn -event eth%d-q1-tx 63: 1 0 xen-dyn -event eth%d-q1-rx and this looks ugly. Actually, using early netdev name (even when it's already set) is also not ideal: nowadays we tend to rename eth devices and queue name may end up not corresponding to the netdev name. Use nodename from xenbus device for queue naming: this can't change in VM's lifetime. Now /proc/interrupts looks like 62: 202 0 xen-dyn -event device/vif/0-q0-tx 63: 317 0 xen-dyn -event device/vif/0-q0-rx 64: 262 0 xen-dyn -event device/vif/0-q1-tx 65: 17 0 xen-dyn -event device/vif/0-q1-rx Fixes: f599c64fdf7d ("xen-netfront: Fix race between device setup and open") Signed-off-by: Vitaly Kuznetsov Reviewed-by: Ross Lagerwall Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index d5e790dd589a2..00c214348e691 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1611,7 +1611,7 @@ static int xennet_init_queue(struct netfront_queue *queue) (unsigned long)queue); snprintf(queue->name, sizeof(queue->name), "%s-q%u", - queue->info->netdev->name, queue->id); + queue->info->xbdev->nodename, queue->id); /* Initialise tx_skbs as a free chain containing every entry. */ queue->tx_skb_freelist = 0; -- GitLab From 2e0a4d3f7c54beb48f7a5ea132c5b7246219296d Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Wed, 11 Jul 2018 14:18:23 +0200 Subject: [PATCH 0394/2269] arm64: dts: qcom: db410c: Fix Bluetooth LED trigger [ Upstream commit e53db018315b7660bb7000a29e79faff2496c2c2 ] Current LED trigger, 'bt', is not known/used by any existing driver. Fix this by renaming it to 'bluetooth-power' trigger which is controlled by the Bluetooth subsystem. Fixes: 9943230c8860 ("arm64: dts: qcom: Add apq8016-sbc board LED's related device nodes") Signed-off-by: Loic Poulain Signed-off-by: Andy Gross Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi b/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi index 1d63e6b879de7..b6b44fdf7face 100644 --- a/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi +++ b/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi @@ -187,7 +187,7 @@ led@6 { label = "apq8016-sbc:blue:bt"; gpios = <&pm8916_mpps 3 GPIO_ACTIVE_HIGH>; - linux,default-trigger = "bt"; + linux,default-trigger = "bluetooth-power"; default-state = "off"; }; }; -- GitLab From a7909d3fabd3d2fe2227c1a6fdc9f99b97481a34 Mon Sep 17 00:00:00 2001 From: Bhushan Shah Date: Mon, 9 Jul 2018 14:46:28 +0530 Subject: [PATCH 0395/2269] ARM: dts: qcom: msm8974-hammerhead: increase load on l20 for sdhci [ Upstream commit 03864e57770a9541e7ff3990bacf2d9a2fffcd5d ] The kernel would not boot on the hammerhead hardware due to the following error: mmc0: Timeout waiting for hardware interrupt. mmc0: sdhci: ============ SDHCI REGISTER DUMP =========== mmc0: sdhci: Sys addr: 0x00000200 | Version: 0x00003802 mmc0: sdhci: Blk size: 0x00000200 | Blk cnt: 0x00000200 mmc0: sdhci: Argument: 0x00000000 | Trn mode: 0x00000023 mmc0: sdhci: Present: 0x03e80000 | Host ctl: 0x00000034 mmc0: sdhci: Power: 0x00000001 | Blk gap: 0x00000000 mmc0: sdhci: Wake-up: 0x00000000 | Clock: 0x00000007 mmc0: sdhci: Timeout: 0x0000000e | Int stat: 0x00000000 mmc0: sdhci: Int enab: 0x02ff900b | Sig enab: 0x02ff100b mmc0: sdhci: AC12 err: 0x00000000 | Slot int: 0x00000000 mmc0: sdhci: Caps: 0x642dc8b2 | Caps_1: 0x00008007 mmc0: sdhci: Cmd: 0x00000c1b | Max curr: 0x00000000 mmc0: sdhci: Resp[0]: 0x00000c00 | Resp[1]: 0x00000000 mmc0: sdhci: Resp[2]: 0x00000000 | Resp[3]: 0x00000000 mmc0: sdhci: Host ctl2: 0x00000008 mmc0: sdhci: ADMA Err: 0x00000000 | ADMA Ptr: 0x70040220 mmc0: sdhci: ============================================ mmc0: Card stuck in wrong state! mmcblk0 card_busy_detect status: 0xe00 mmc0: cache flush error -110 mmc0: Reset 0x1 never completed. This patch increases the load on l20 to 0.2 amps for the sdhci and allows the device to boot normally. Signed-off-by: Bhushan Shah Signed-off-by: Brian Masney Suggested-by: Bjorn Andersson Tested-by: Brian Masney Signed-off-by: Andy Gross Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/qcom-msm8974-lge-nexus5-hammerhead.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/qcom-msm8974-lge-nexus5-hammerhead.dts b/arch/arm/boot/dts/qcom-msm8974-lge-nexus5-hammerhead.dts index 4dc0b347b1eed..c2dc9d09484ab 100644 --- a/arch/arm/boot/dts/qcom-msm8974-lge-nexus5-hammerhead.dts +++ b/arch/arm/boot/dts/qcom-msm8974-lge-nexus5-hammerhead.dts @@ -189,6 +189,8 @@ regulator-max-microvolt = <2950000>; regulator-boot-on; + regulator-system-load = <200000>; + regulator-allow-set-load; }; l21 { -- GitLab From 6530985bcf82c68f22c77d79febd736896e7a5c1 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 19 Jul 2018 12:43:48 +0200 Subject: [PATCH 0396/2269] s390/qeth: fix race in used-buffer accounting [ Upstream commit a702349a4099cd5a7bab0904689d8e0bf8dcd622 ] By updating q->used_buffers only _after_ do_QDIO() has completed, there is a potential race against the buffer's TX completion. In the unlikely case that the TX completion path wins, qeth_qdio_output_handler() would decrement the counter before qeth_flush_buffers() even incremented it. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_core_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 939b5b5e97efe..0a6afd4b283d4 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -3507,13 +3507,14 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index, qdio_flags = QDIO_FLAG_SYNC_OUTPUT; if (atomic_read(&queue->set_pci_flags_count)) qdio_flags |= QDIO_FLAG_PCI_OUT; + atomic_add(count, &queue->used_buffers); + rc = do_QDIO(CARD_DDEV(queue->card), qdio_flags, queue->queue_no, index, count); if (queue->card->options.performance_stats) queue->card->perf_stats.outbound_do_qdio_time += qeth_get_micros() - queue->card->perf_stats.outbound_do_qdio_start_time; - atomic_add(count, &queue->used_buffers); if (rc) { queue->card->stats.tx_errors += count; /* ignore temporary SIGA errors without busy condition */ -- GitLab From 774367e940fcf1af3ec710d6a1b966bb8d94035f Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 19 Jul 2018 12:43:49 +0200 Subject: [PATCH 0397/2269] s390/qeth: reset layer2 attribute on layer switch [ Upstream commit 70551dc46ffa3555a0b5f3545b0cd87ab67fd002 ] After the subdriver's remove() routine has completed, the card's layer mode is undetermined again. Reflect this in the layer2 field. If qeth_dev_layer2_store() hits an error after remove() was called, the card _always_ requires a setup(), even if the previous layer mode is requested again. But qeth_dev_layer2_store() bails out early if the requested layer mode still matches the current one. So unless we reset the layer2 field, re-probing the card back to its previous mode is currently not possible. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_core_sys.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c index d1ee9e30c68be..21e91fbb28605 100644 --- a/drivers/s390/net/qeth_core_sys.c +++ b/drivers/s390/net/qeth_core_sys.c @@ -423,6 +423,7 @@ static ssize_t qeth_dev_layer2_store(struct device *dev, if (card->discipline) { card->discipline->remove(card->gdev); qeth_core_free_discipline(card); + card->options.layer2 = -1; } rc = qeth_core_load_discipline(card, newdis); -- GitLab From 9af3a46ec7def24d2df480cbf323036d4914ab12 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 6 Jul 2018 20:53:09 -0700 Subject: [PATCH 0398/2269] platform/x86: toshiba_acpi: Fix defined but not used build warnings [ Upstream commit c2e2a618eb7104e18fdcf739d4d911563812a81c ] Fix a build warning in toshiba_acpi.c when CONFIG_PROC_FS is not enabled by marking the unused function as __maybe_unused. ../drivers/platform/x86/toshiba_acpi.c:1685:12: warning: 'version_proc_show' defined but not used [-Wunused-function] Signed-off-by: Randy Dunlap Cc: Azael Avalos Cc: platform-driver-x86@vger.kernel.org Cc: Andy Shevchenko Signed-off-by: Darren Hart (VMware) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/toshiba_acpi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index bb1dcd7fbdeb8..8221e000c8c23 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -34,6 +34,7 @@ #define TOSHIBA_ACPI_VERSION "0.24" #define PROC_INTERFACE_VERSION 1 +#include #include #include #include @@ -1682,7 +1683,7 @@ static const struct file_operations keys_proc_fops = { .write = keys_proc_write, }; -static int version_proc_show(struct seq_file *m, void *v) +static int __maybe_unused version_proc_show(struct seq_file *m, void *v) { seq_printf(m, "driver: %s\n", TOSHIBA_ACPI_VERSION); seq_printf(m, "proc_interface: %d\n", PROC_INTERFACE_VERSION); -- GitLab From f3662e33251057a00354a48df6e7b0fadba7fb34 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 3 Jul 2018 22:54:14 +0200 Subject: [PATCH 0399/2269] KVM: arm/arm64: Fix vgic init race [ Upstream commit 1d47191de7e15900f8fbfe7cccd7c6e1c2d7c31a ] The vgic_init function can race with kvm_arch_vcpu_create() which does not hold kvm_lock() and we therefore have no synchronization primitives to ensure we're doing the right thing. As the user is trying to initialize or run the VM while at the same time creating more VCPUs, we just have to refuse to initialize the VGIC in this case rather than silently failing with a broken VCPU. Reviewed-by: Eric Auger Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/vgic/vgic-init.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 5801261f3addd..6f7f26ae72b45 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -277,6 +277,10 @@ int vgic_init(struct kvm *kvm) if (vgic_initialized(kvm)) return 0; + /* Are we also in the middle of creating a VCPU? */ + if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus)) + return -EBUSY; + /* freeze the number of spis */ if (!dist->nr_spis) dist->nr_spis = VGIC_NR_IRQS_LEGACY - VGIC_NR_PRIVATE_IRQS; -- GitLab From ab605544f62b5151c2cb276f6771a8c423378926 Mon Sep 17 00:00:00 2001 From: Pingfan Liu Date: Thu, 19 Jul 2018 13:14:58 +0800 Subject: [PATCH 0400/2269] drivers/base: stop new probing during shutdown [ Upstream commit 3297c8fc65af5d40501ea7cddff1b195cae57e4e ] There is a race window in device_shutdown(), which may cause -1. parent device shut down before child or -2. no shutdown on a new probing device. For 1st, taking the following scenario: device_shutdown new plugin device list_del_init(parent_dev); spin_unlock(list_lock); device_add(child) probe child shutdown parent_dev --> now child is on the tail of devices_kset For 2nd, taking the following scenario: device_shutdown new plugin device device_add(dev) device_lock(dev); ... device_unlock(dev); probe dev --> now, the new occurred dev has no opportunity to shutdown To fix this race issue, just prevent the new probing request. With this logic, device_shutdown() is more similar to dpm_prepare(). Signed-off-by: Pingfan Liu Reviewed-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/base/core.c b/drivers/base/core.c index b054cb2fd2b9a..eb066cc827ef4 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2783,6 +2783,9 @@ void device_shutdown(void) { struct device *dev, *parent; + wait_for_device_probe(); + device_block_probing(); + spin_lock(&devices_kset->list_lock); /* * Walk the devices list backward, shutting down each in turn. -- GitLab From a8c0b9acf08f39e3b6fcf518fc0732f6fad64698 Mon Sep 17 00:00:00 2001 From: Jae Hyun Yoo Date: Mon, 2 Jul 2018 14:20:28 -0700 Subject: [PATCH 0401/2269] i2c: aspeed: Fix initial values of master and slave state [ Upstream commit 517fde0eb5a8f46c54ba6e2c36e32563b23cb14f ] This patch changes the order of enum aspeed_i2c_master_state and enum aspeed_i2c_slave_state defines to make their initial value to ASPEED_I2C_MASTER_INACTIVE and ASPEED_I2C_SLAVE_STOP respectively. In case of multi-master use, if a slave data comes ahead of the first master xfer, master_state starts from an invalid state so this change fixes the issue. Signed-off-by: Jae Hyun Yoo Reviewed-by: Brendan Higgins Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-aspeed.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c index 2feae9a421e6c..a074735456bc7 100644 --- a/drivers/i2c/busses/i2c-aspeed.c +++ b/drivers/i2c/busses/i2c-aspeed.c @@ -110,22 +110,22 @@ #define ASPEED_I2CD_DEV_ADDR_MASK GENMASK(6, 0) enum aspeed_i2c_master_state { + ASPEED_I2C_MASTER_INACTIVE, ASPEED_I2C_MASTER_START, ASPEED_I2C_MASTER_TX_FIRST, ASPEED_I2C_MASTER_TX, ASPEED_I2C_MASTER_RX_FIRST, ASPEED_I2C_MASTER_RX, ASPEED_I2C_MASTER_STOP, - ASPEED_I2C_MASTER_INACTIVE, }; enum aspeed_i2c_slave_state { + ASPEED_I2C_SLAVE_STOP, ASPEED_I2C_SLAVE_START, ASPEED_I2C_SLAVE_READ_REQUESTED, ASPEED_I2C_SLAVE_READ_PROCESSED, ASPEED_I2C_SLAVE_WRITE_REQUESTED, ASPEED_I2C_SLAVE_WRITE_RECEIVED, - ASPEED_I2C_SLAVE_STOP, }; struct aspeed_i2c_bus { -- GitLab From 82c53969af8cee3d47181985feed6b19e9b193af Mon Sep 17 00:00:00 2001 From: Hanna Hawa Date: Tue, 17 Jul 2018 13:30:00 +0300 Subject: [PATCH 0402/2269] dmaengine: mv_xor_v2: kill the tasklets upon exit [ Upstream commit 8bbafed8dd5cfa81071b50ead5cb60367fdef3a9 ] The mv_xor_v2 driver uses a tasklet, initialized during the probe() routine. However, it forgets to cleanup the tasklet using tasklet_kill() function during the remove() routine, which this patch fixes. This prevents the tasklet from potentially running after the module has been removed. Fixes: 19a340b1a820 ("dmaengine: mv_xor_v2: new driver") Signed-off-by: Hanna Hawa Reviewed-by: Thomas Petazzoni Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/dma/mv_xor_v2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c index 3548caa9e9339..75eef589d0ecb 100644 --- a/drivers/dma/mv_xor_v2.c +++ b/drivers/dma/mv_xor_v2.c @@ -898,6 +898,8 @@ static int mv_xor_v2_remove(struct platform_device *pdev) platform_msi_domain_free_irqs(&pdev->dev); + tasklet_kill(&xor_dev->irq_tasklet); + clk_disable_unprepare(xor_dev->clk); return 0; -- GitLab From ef275ba514d97398a5a78bd23929663396872c58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20M=C3=BCller?= Date: Sun, 15 Jul 2018 00:27:06 +0200 Subject: [PATCH 0403/2269] crypto: sharah - Unregister correct algorithms for SAHARA 3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0e7d4d932ffc23f75efb31a8c2ac2396c1b81c55 ] This patch fixes two typos related to unregistering algorithms supported by SAHARAH 3. In sahara_register_algs the wrong algorithms are unregistered in case of an error. In sahara_unregister_algs the wrong array is used to determine the iteration count. Signed-off-by: Michael Müller Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/sahara.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c index 08e7bdcaa6e31..085c229eab1de 100644 --- a/drivers/crypto/sahara.c +++ b/drivers/crypto/sahara.c @@ -1351,7 +1351,7 @@ err_sha_v4_algs: err_sha_v3_algs: for (j = 0; j < k; j++) - crypto_unregister_ahash(&sha_v4_algs[j]); + crypto_unregister_ahash(&sha_v3_algs[j]); err_aes_algs: for (j = 0; j < i; j++) @@ -1367,7 +1367,7 @@ static void sahara_unregister_algs(struct sahara_dev *dev) for (i = 0; i < ARRAY_SIZE(aes_algs); i++) crypto_unregister_alg(&aes_algs[i]); - for (i = 0; i < ARRAY_SIZE(sha_v4_algs); i++) + for (i = 0; i < ARRAY_SIZE(sha_v3_algs); i++) crypto_unregister_ahash(&sha_v3_algs[i]); if (dev->version > SAHARA_VERSION_3) -- GitLab From 36d6a43a16e5ee79917ae58b85d5df8283483c1d Mon Sep 17 00:00:00 2001 From: Jiang Biao Date: Fri, 20 Jul 2018 08:06:31 +0800 Subject: [PATCH 0404/2269] x86/pti: Check the return value of pti_user_pagetable_walk_p4d() [ Upstream commit b2b7d986a89b6c94b1331a909de1217214fb08c1 ] pti_user_pagetable_walk_p4d() can return NULL, so the return value should be checked to prevent a NULL pointer dereference. Add the check and a warning when the P4D allocation fails. Signed-off-by: Jiang Biao Signed-off-by: Thomas Gleixner Cc: dave.hansen@linux.intel.com Cc: luto@kernel.org Cc: hpa@zytor.com Cc: albcamus@gmail.com Cc: zhong.weidong@zte.com.cn Link: https://lkml.kernel.org/r/1532045192-49622-1-git-send-email-jiang.biao2@zte.com.cn Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/pti.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index d6f11accd37a0..63afd15b32a54 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -162,7 +162,7 @@ static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) if (pgd_none(*pgd)) { unsigned long new_p4d_page = __get_free_page(gfp); - if (!new_p4d_page) + if (WARN_ON_ONCE(!new_p4d_page)) return NULL; set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); @@ -181,9 +181,13 @@ static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) { gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); - p4d_t *p4d = pti_user_pagetable_walk_p4d(address); + p4d_t *p4d; pud_t *pud; + p4d = pti_user_pagetable_walk_p4d(address); + if (!p4d) + return NULL; + BUILD_BUG_ON(p4d_large(*p4d) != 0); if (p4d_none(*p4d)) { unsigned long new_pud_page = __get_free_page(gfp); @@ -319,6 +323,9 @@ static void __init pti_clone_p4d(unsigned long addr) pgd_t *kernel_pgd; user_p4d = pti_user_pagetable_walk_p4d(addr); + if (!user_p4d) + return; + kernel_pgd = pgd_offset_k(addr); kernel_p4d = p4d_offset(kernel_pgd, addr); *user_p4d = *kernel_p4d; -- GitLab From 16939943660cb459ac5abd76f7ed75a53cb48eb1 Mon Sep 17 00:00:00 2001 From: Jiang Biao Date: Fri, 20 Jul 2018 08:06:32 +0800 Subject: [PATCH 0405/2269] x86/pti: Check the return value of pti_user_pagetable_walk_pmd() [ Upstream commit 8c934e01a7ce685d98e970880f5941d79272c654 ] pti_user_pagetable_walk_pmd() can return NULL, so the return value should be checked to prevent a NULL pointer dereference. Add the check and a warning when the PMD allocation fails. Signed-off-by: Jiang Biao Signed-off-by: Thomas Gleixner Cc: dave.hansen@linux.intel.com Cc: luto@kernel.org Cc: hpa@zytor.com Cc: albcamus@gmail.com Cc: zhong.weidong@zte.com.cn Link: https://lkml.kernel.org/r/1532045192-49622-2-git-send-email-jiang.biao2@zte.com.cn Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/pti.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 63afd15b32a54..7786ab306225e 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -191,7 +191,7 @@ static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) BUILD_BUG_ON(p4d_large(*p4d) != 0); if (p4d_none(*p4d)) { unsigned long new_pud_page = __get_free_page(gfp); - if (!new_pud_page) + if (WARN_ON_ONCE(!new_pud_page)) return NULL; set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page))); @@ -205,7 +205,7 @@ static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) } if (pud_none(*pud)) { unsigned long new_pmd_page = __get_free_page(gfp); - if (!new_pmd_page) + if (WARN_ON_ONCE(!new_pmd_page)) return NULL; set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); @@ -227,9 +227,13 @@ static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address) { gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); - pmd_t *pmd = pti_user_pagetable_walk_pmd(address); + pmd_t *pmd; pte_t *pte; + pmd = pti_user_pagetable_walk_pmd(address); + if (!pmd) + return NULL; + /* We can't do anything sensible if we hit a large mapping. */ if (pmd_large(*pmd)) { WARN_ON(1); -- GitLab From ab25ad619343a741da0c8c03ee5cbc4023ca725d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 18 Jul 2018 11:41:01 +0200 Subject: [PATCH 0406/2269] x86/mm/pti: Add an overflow check to pti_clone_pmds() [ Upstream commit 935232ce28dfabff1171e5a7113b2d865fa9ee63 ] The addr counter will overflow if the last PMD of the address space is cloned, resulting in an endless loop. Check for that and bail out of the loop when it happens. Signed-off-by: Joerg Roedel Signed-off-by: Thomas Gleixner Tested-by: Pavel Machek Cc: "H . Peter Anvin" Cc: linux-mm@kvack.org Cc: Linus Torvalds Cc: Andy Lutomirski Cc: Dave Hansen Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Jiri Kosina Cc: Boris Ostrovsky Cc: Brian Gerst Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: Andrea Arcangeli Cc: Waiman Long Cc: "David H . Gutteridge" Cc: joro@8bytes.org Link: https://lkml.kernel.org/r/1531906876-13451-25-git-send-email-joro@8bytes.org Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/pti.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 7786ab306225e..b07e3ffc5ac50 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -291,6 +291,10 @@ pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear) p4d_t *p4d; pud_t *pud; + /* Overflow check */ + if (addr < start) + break; + pgd = pgd_offset_k(addr); if (WARN_ON(pgd_none(*pgd))) return; -- GitLab From a5d247607a5684335b8bedef148694e2e15b0181 Mon Sep 17 00:00:00 2001 From: Xiao Liang Date: Tue, 14 Aug 2018 23:21:28 +0800 Subject: [PATCH 0407/2269] xen-netfront: fix warn message as irq device name has '/' [ Upstream commit 21f2706b20100bb3db378461ab9b8e2035309b5b ] There is a call trace generated after commit 2d408c0d4574b01b9ed45e02516888bf925e11a9( xen-netfront: fix queue name setting). There is no 'device/vif/xx-q0-tx' file found under /proc/irq/xx/. This patch only picks up device type and id as its name. With the patch, now /proc/interrupts looks like below and the warning message gone: 70: 21 0 0 0 xen-dyn -event vif0-q0-tx 71: 15 0 0 0 xen-dyn -event vif0-q0-rx 72: 14 0 0 0 xen-dyn -event vif0-q1-tx 73: 33 0 0 0 xen-dyn -event vif0-q1-rx 74: 12 0 0 0 xen-dyn -event vif0-q2-tx 75: 24 0 0 0 xen-dyn -event vif0-q2-rx 76: 19 0 0 0 xen-dyn -event vif0-q3-tx 77: 21 0 0 0 xen-dyn -event vif0-q3-rx Below is call trace information without this patch: name 'device/vif/0-q0-tx' WARNING: CPU: 2 PID: 37 at fs/proc/generic.c:174 __xlate_proc_name+0x85/0xa0 RIP: 0010:__xlate_proc_name+0x85/0xa0 RSP: 0018:ffffb85c40473c18 EFLAGS: 00010286 RAX: 0000000000000000 RBX: 0000000000000006 RCX: 0000000000000006 RDX: 0000000000000007 RSI: 0000000000000096 RDI: ffff984c7f516930 RBP: ffffb85c40473cb8 R08: 000000000000002c R09: 0000000000000229 R10: 0000000000000000 R11: 0000000000000001 R12: ffffb85c40473c98 R13: ffffb85c40473cb8 R14: ffffb85c40473c50 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff984c7f500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f69b6899038 CR3: 000000001c20a006 CR4: 00000000001606e0 Call Trace: __proc_create+0x45/0x230 ? snprintf+0x49/0x60 proc_mkdir_data+0x35/0x90 register_handler_proc+0xef/0x110 ? proc_register+0xfc/0x110 ? proc_create_data+0x70/0xb0 __setup_irq+0x39b/0x660 ? request_threaded_irq+0xad/0x160 request_threaded_irq+0xf5/0x160 ? xennet_tx_buf_gc+0x1d0/0x1d0 [xen_netfront] bind_evtchn_to_irqhandler+0x3d/0x70 ? xenbus_alloc_evtchn+0x41/0xa0 netback_changed+0xa46/0xcda [xen_netfront] ? find_watch+0x40/0x40 xenwatch_thread+0xc5/0x160 ? finish_wait+0x80/0x80 kthread+0x112/0x130 ? kthread_create_worker_on_cpu+0x70/0x70 ret_from_fork+0x35/0x40 Code: 81 5c 00 48 85 c0 75 cc 5b 49 89 2e 31 c0 5d 4d 89 3c 24 41 5c 41 5d 41 5e 41 5f c3 4c 89 ee 48 c7 c7 40 4f 0e b4 e8 65 ea d8 ff <0f> 0b b8 fe ff ff ff 5b 5d 41 5c 41 5d 41 5e 41 5f c3 66 0f 1f ---[ end trace 650e5561b0caab3a ]--- Signed-off-by: Xiao Liang Reviewed-by: Juergen Gross Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 00c214348e691..08f0c15c90a7b 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1603,6 +1603,7 @@ static int xennet_init_queue(struct netfront_queue *queue) { unsigned short i; int err = 0; + char *devid; spin_lock_init(&queue->tx_lock); spin_lock_init(&queue->rx_lock); @@ -1610,8 +1611,9 @@ static int xennet_init_queue(struct netfront_queue *queue) setup_timer(&queue->rx_refill_timer, rx_refill_timeout, (unsigned long)queue); - snprintf(queue->name, sizeof(queue->name), "%s-q%u", - queue->info->xbdev->nodename, queue->id); + devid = strrchr(queue->info->xbdev->nodename, '/') + 1; + snprintf(queue->name, sizeof(queue->name), "vif%s-q%u", + devid, queue->id); /* Initialise tx_skbs as a free chain containing every entry. */ queue->tx_skb_freelist = 0; -- GitLab From 8c705dea5e59509b55b46b5a833e1ed030485668 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 30 Aug 2018 08:35:19 +0300 Subject: [PATCH 0408/2269] RDMA/cma: Protect cma dev list with lock commit 954a8e3aea87e896e320cf648c1a5bbe47de443e upstream. When AF_IB addresses are used during rdma_resolve_addr() a lock is not held. A cma device can get removed while list traversal is in progress which may lead to crash. ie CPU0 CPU1 ==== ==== rdma_resolve_addr() cma_resolve_ib_dev() list_for_each() cma_remove_one() cur_dev->device mutex_lock(&lock) list_del(); mutex_unlock(&lock); cma_process_remove(); Therefore, hold a lock while traversing the list which avoids such situation. Cc: # 3.10 Fixes: f17df3b0dede ("RDMA/cma: Add support for AF_IB to rdma_resolve_addr()") Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/cma.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 752dbc388c271..7c5eca312aa88 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -730,6 +730,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) dgid = (union ib_gid *) &addr->sib_addr; pkey = ntohs(addr->sib_pkey); + mutex_lock(&lock); list_for_each_entry(cur_dev, &dev_list, list) { for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { if (!rdma_cap_af_ib(cur_dev->device, p)) @@ -756,18 +757,19 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) cma_dev = cur_dev; sgid = gid; id_priv->id.port_num = p; + goto found; } } } } - - if (!cma_dev) - return -ENODEV; + mutex_unlock(&lock); + return -ENODEV; found: cma_attach_to_dev(id_priv, cma_dev); - addr = (struct sockaddr_ib *) cma_src_addr(id_priv); - memcpy(&addr->sib_addr, &sgid, sizeof sgid); + mutex_unlock(&lock); + addr = (struct sockaddr_ib *)cma_src_addr(id_priv); + memcpy(&addr->sib_addr, &sgid, sizeof(sgid)); cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr); return 0; } -- GitLab From 1e5b387747ba285a64348b9a96b246d0b9d52b03 Mon Sep 17 00:00:00 2001 From: Bin Yang Date: Wed, 12 Sep 2018 03:36:34 +0000 Subject: [PATCH 0409/2269] pstore: Fix incorrect persistent ram buffer mapping commit 831b624df1b420c8f9281ed1307a8db23afb72df upstream. persistent_ram_vmap() returns the page start vaddr. persistent_ram_iomap() supports non-page-aligned mapping. persistent_ram_buffer_map() always adds offset-in-page to the vaddr returned from these two functions, which causes incorrect mapping of non-page-aligned persistent ram buffer. By default ftrace_size is 4096 and max_ftrace_cnt is nr_cpu_ids. Without this patch, the zone_sz in ramoops_init_przs() is 4096/nr_cpu_ids which might not be page aligned. If the offset-in-page > 2048, the vaddr will be in next page. If the next page is not mapped, it will cause kernel panic: [ 0.074231] BUG: unable to handle kernel paging request at ffffa19e0081b000 ... [ 0.075000] RIP: 0010:persistent_ram_new+0x1f8/0x39f ... [ 0.075000] Call Trace: [ 0.075000] ramoops_init_przs.part.10.constprop.15+0x105/0x260 [ 0.075000] ramoops_probe+0x232/0x3a0 [ 0.075000] platform_drv_probe+0x3e/0xa0 [ 0.075000] driver_probe_device+0x2cd/0x400 [ 0.075000] __driver_attach+0xe4/0x110 [ 0.075000] ? driver_probe_device+0x400/0x400 [ 0.075000] bus_for_each_dev+0x70/0xa0 [ 0.075000] driver_attach+0x1e/0x20 [ 0.075000] bus_add_driver+0x159/0x230 [ 0.075000] ? do_early_param+0x95/0x95 [ 0.075000] driver_register+0x70/0xc0 [ 0.075000] ? init_pstore_fs+0x4d/0x4d [ 0.075000] __platform_driver_register+0x36/0x40 [ 0.075000] ramoops_init+0x12f/0x131 [ 0.075000] do_one_initcall+0x4d/0x12c [ 0.075000] ? do_early_param+0x95/0x95 [ 0.075000] kernel_init_freeable+0x19b/0x222 [ 0.075000] ? rest_init+0xbb/0xbb [ 0.075000] kernel_init+0xe/0xfc [ 0.075000] ret_from_fork+0x3a/0x50 Signed-off-by: Bin Yang [kees: add comments describing the mapping differences, updated commit log] Fixes: 24c3d2f342ed ("staging: android: persistent_ram: Make it possible to use memory outside of bootmem") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram_core.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index e11672aa45751..ecdb3baa12833 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -421,7 +421,12 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size, vaddr = vmap(pages, page_count, VM_MAP, prot); kfree(pages); - return vaddr; + /* + * Since vmap() uses page granularity, we must add the offset + * into the page here, to get the byte granularity address + * into the mapping to represent the actual "start" location. + */ + return vaddr + offset_in_page(start); } static void *persistent_ram_iomap(phys_addr_t start, size_t size, @@ -440,6 +445,11 @@ static void *persistent_ram_iomap(phys_addr_t start, size_t size, else va = ioremap_wc(start, size); + /* + * Since request_mem_region() and ioremap() are byte-granularity + * there is no need handle anything special like we do when the + * vmap() case in persistent_ram_vmap() above. + */ return va; } @@ -460,7 +470,7 @@ static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size, return -ENOMEM; } - prz->buffer = prz->vaddr + offset_in_page(start); + prz->buffer = prz->vaddr; prz->buffer_size = size - sizeof(struct persistent_ram_buffer); return 0; @@ -507,7 +517,8 @@ void persistent_ram_free(struct persistent_ram_zone *prz) if (prz->vaddr) { if (pfn_valid(prz->paddr >> PAGE_SHIFT)) { - vunmap(prz->vaddr); + /* We must vunmap() at page-granularity. */ + vunmap(prz->vaddr - offset_in_page(prz->paddr)); } else { iounmap(prz->vaddr); release_mem_region(prz->paddr, prz->size); -- GitLab From 33e4afbb44fe3d84dd90274b738f6b1c7d0ef68e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 7 Sep 2018 14:21:30 +0200 Subject: [PATCH 0410/2269] xen/netfront: fix waiting for xenbus state change commit 8edfe2e992b75aee3da9316e9697c531194c2f53 upstream. Commit 822fb18a82aba ("xen-netfront: wait xenbus state change when load module manually") added a new wait queue to wait on for a state change when the module is loaded manually. Unfortunately there is no wakeup anywhere to stop that waiting. Instead of introducing a new wait queue rename the existing module_unload_q to module_wq and use it for both purposes (loading and unloading). As any state change of the backend might be intended to stop waiting do the wake_up_all() in any case when netback_changed() is called. Fixes: 822fb18a82aba ("xen-netfront: wait xenbus state change when load module manually") Cc: #4.18 Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 08f0c15c90a7b..d80343429de5f 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -87,8 +87,7 @@ struct netfront_cb { /* IRQ name is queue name with "-tx" or "-rx" appended */ #define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3) -static DECLARE_WAIT_QUEUE_HEAD(module_load_q); -static DECLARE_WAIT_QUEUE_HEAD(module_unload_q); +static DECLARE_WAIT_QUEUE_HEAD(module_wq); struct netfront_stats { u64 packets; @@ -1331,11 +1330,11 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) netif_carrier_off(netdev); xenbus_switch_state(dev, XenbusStateInitialising); - wait_event(module_load_q, - xenbus_read_driver_state(dev->otherend) != - XenbusStateClosed && - xenbus_read_driver_state(dev->otherend) != - XenbusStateUnknown); + wait_event(module_wq, + xenbus_read_driver_state(dev->otherend) != + XenbusStateClosed && + xenbus_read_driver_state(dev->otherend) != + XenbusStateUnknown); return netdev; exit: @@ -2009,15 +2008,14 @@ static void netback_changed(struct xenbus_device *dev, dev_dbg(&dev->dev, "%s\n", xenbus_strstate(backend_state)); + wake_up_all(&module_wq); + switch (backend_state) { case XenbusStateInitialising: case XenbusStateInitialised: case XenbusStateReconfiguring: case XenbusStateReconfigured: - break; - case XenbusStateUnknown: - wake_up_all(&module_unload_q); break; case XenbusStateInitWait: @@ -2033,12 +2031,10 @@ static void netback_changed(struct xenbus_device *dev, break; case XenbusStateClosed: - wake_up_all(&module_unload_q); if (dev->state == XenbusStateClosed) break; /* Missed the backend's CLOSING state -- fallthrough */ case XenbusStateClosing: - wake_up_all(&module_unload_q); xenbus_frontend_closed(dev); break; } @@ -2146,14 +2142,14 @@ static int xennet_remove(struct xenbus_device *dev) if (xenbus_read_driver_state(dev->otherend) != XenbusStateClosed) { xenbus_switch_state(dev, XenbusStateClosing); - wait_event(module_unload_q, + wait_event(module_wq, xenbus_read_driver_state(dev->otherend) == XenbusStateClosing || xenbus_read_driver_state(dev->otherend) == XenbusStateUnknown); xenbus_switch_state(dev, XenbusStateClosed); - wait_event(module_unload_q, + wait_event(module_wq, xenbus_read_driver_state(dev->otherend) == XenbusStateClosed || xenbus_read_driver_state(dev->otherend) == -- GitLab From b8b9c7f05b3cd11560c0d98dfbb20e06e68cb70e Mon Sep 17 00:00:00 2001 From: Aaron Knister Date: Fri, 24 Aug 2018 08:42:46 -0400 Subject: [PATCH 0411/2269] IB/ipoib: Avoid a race condition between start_xmit and cm_rep_handler commit 816e846c2eb9129a3e0afa5f920c8bbc71efecaa upstream. Inside of start_xmit() the call to check if the connection is up and the queueing of the packets for later transmission is not atomic which leaves a window where cm_rep_handler can run, set the connection up, dequeue pending packets and leave the subsequently queued packets by start_xmit() sitting on neigh->queue until they're dropped when the connection is torn down. This only applies to connected mode. These dropped packets can really upset TCP, for example, and cause multi-minute delays in transmission for open connections. Here's the code in start_xmit where we check to see if the connection is up: if (ipoib_cm_get(neigh)) { if (ipoib_cm_up(neigh)) { ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); goto unref; } } The race occurs if cm_rep_handler execution occurs after the above connection check (specifically if it gets to the point where it acquires priv->lock to dequeue pending skb's) but before the below code snippet in start_xmit where packets are queued. if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { push_pseudo_header(skb, phdr->hwaddr); spin_lock_irqsave(&priv->lock, flags); __skb_queue_tail(&neigh->queue, skb); spin_unlock_irqrestore(&priv->lock, flags); } else { ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); } The patch acquires the netif tx lock in cm_rep_handler for the section where it sets the connection up and dequeues and retransmits deferred skb's. Fixes: 839fcaba355a ("IPoIB: Connected mode experimental support") Cc: stable@vger.kernel.org Signed-off-by: Aaron Knister Tested-by: Ira Weiny Reviewed-by: Ira Weiny Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 7a5ed5a5391e1..9939f32d01548 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1018,12 +1018,14 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even skb_queue_head_init(&skqueue); + netif_tx_lock_bh(p->dev); spin_lock_irq(&priv->lock); set_bit(IPOIB_FLAG_OPER_UP, &p->flags); if (p->neigh) while ((skb = __skb_dequeue(&p->neigh->queue))) __skb_queue_tail(&skqueue, skb); spin_unlock_irq(&priv->lock); + netif_tx_unlock_bh(p->dev); while ((skb = __skb_dequeue(&skqueue))) { skb->dev = p->dev; -- GitLab From 51e8d7d77ca8e6da9fb5162d5090bc06b8957cd8 Mon Sep 17 00:00:00 2001 From: Ingo Franzki Date: Mon, 27 Aug 2018 14:28:47 +0200 Subject: [PATCH 0412/2269] s390/crypto: Fix return code checking in cbc_paes_crypt() commit b81126e01a8c6048249955feea46c8217ebefa91 upstream. The return code of cpacf_kmc() is less than the number of bytes to process in case of an error, not greater. The crypt routines for the other cipher modes already have this correctly. Cc: stable@vger.kernel.org # v4.11+ Fixes: 279378430768 ("s390/crypt: Add protected key AES module") Signed-off-by: Ingo Franzki Acked-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/crypto/paes_s390.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index a4e903ed7e21c..b429aceff050a 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -212,7 +212,7 @@ static int cbc_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier, walk->dst.virt.addr, walk->src.virt.addr, n); if (k) ret = blkcipher_walk_done(desc, walk, nbytes - k); - if (n < k) { + if (k < n) { if (__cbc_paes_set_key(ctx) != 0) return blkcipher_walk_done(desc, walk, -EIO); memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE); -- GitLab From f6e23e57d06395aa690afc321160fafa4a093c87 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Sun, 2 Sep 2018 09:30:58 +0200 Subject: [PATCH 0413/2269] mmc: omap_hsmmc: fix wakeirq handling on removal commit 3c398f3c3bef21961eaaeb93227fa66d440dc83d upstream. after unbinding mmc I get things like this: [ 185.294067] mmc1: card 0001 removed [ 185.305206] omap_hsmmc 480b4000.mmc: wake IRQ with no resume: -13 The wakeirq stays in /proc-interrupts rebinding shows this: [ 289.795959] genirq: Flags mismatch irq 112. 0000200a (480b4000.mmc:wakeup) vs. 0000200a (480b4000.mmc:wakeup) [ 289.808959] omap_hsmmc 480b4000.mmc: Unable to request wake IRQ [ 289.815338] omap_hsmmc 480b4000.mmc: no SDIO IRQ support, falling back to polling That bug seems to be introduced by switching from devm_request_irq() to generic wakeirq handling. So let us cleanup at removal. Signed-off-by: Andreas Kemnade Fixes: 5b83b2234be6 ("mmc: omap_hsmmc: Change wake-up interrupt to use generic wakeirq") Cc: stable@vger.kernel.org # v4.2+ Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/omap_hsmmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 3b5e6d11069bb..9e03fada16dcb 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -2194,6 +2194,7 @@ static int omap_hsmmc_remove(struct platform_device *pdev) dma_release_channel(host->tx_chan); dma_release_channel(host->rx_chan); + dev_pm_clear_wake_irq(host->dev); pm_runtime_dont_use_autosuspend(host->dev); pm_runtime_put_sync(host->dev); pm_runtime_disable(host->dev); -- GitLab From 888e989a753a076d8323fd0c353c0bfcf016f9e8 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Thu, 30 Aug 2018 13:06:21 -0500 Subject: [PATCH 0414/2269] ipmi: Fix I2C client removal in the SSIF driver commit 0745dde62835be7e2afe62fcdb482fcad79cb743 upstream. The SSIF driver was removing any client that came in through the platform interface, but it should only remove clients that it added. On a failure in the probe function, this could result in the following oops when the driver is removed and the client gets unregistered twice: CPU: 107 PID: 30266 Comm: rmmod Not tainted 4.18.0+ #80 Hardware name: Cavium Inc. Saber/Saber, BIOS Cavium reference firmware version 7.0 08/04/2018 pstate: 60400009 (nZCv daif +PAN -UAO) pc : kernfs_find_ns+0x28/0x120 lr : kernfs_find_and_get_ns+0x40/0x60 sp : ffff00002310fb50 x29: ffff00002310fb50 x28: ffff800a8240f800 x27: 0000000000000000 x26: 0000000000000000 x25: 0000000056000000 x24: ffff000009073000 x23: ffff000008998b38 x22: 0000000000000000 x21: ffff800ed86de820 x20: 0000000000000000 x19: ffff00000913a1d8 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: 5300737265766972 x13: 643d4d4554535953 x12: 0000000000000030 x11: 0000000000000030 x10: 0101010101010101 x9 : ffff800ea06cc3f9 x8 : 0000000000000000 x7 : 0000000000000141 x6 : ffff000009073000 x5 : ffff800adb706b00 x4 : 0000000000000000 x3 : 00000000ffffffff x2 : 0000000000000000 x1 : ffff000008998b38 x0 : ffff000008356760 Process rmmod (pid: 30266, stack limit = 0x00000000e218418d) Call trace: kernfs_find_ns+0x28/0x120 kernfs_find_and_get_ns+0x40/0x60 sysfs_unmerge_group+0x2c/0x6c dpm_sysfs_remove+0x34/0x70 device_del+0x58/0x30c device_unregister+0x30/0x7c i2c_unregister_device+0x84/0x90 [i2c_core] ssif_platform_remove+0x38/0x98 [ipmi_ssif] platform_drv_remove+0x2c/0x6c device_release_driver_internal+0x168/0x1f8 driver_detach+0x50/0xbc bus_remove_driver+0x74/0xe8 driver_unregister+0x34/0x5c platform_driver_unregister+0x20/0x2c cleanup_ipmi_ssif+0x50/0xd82c [ipmi_ssif] __arm64_sys_delete_module+0x1b4/0x220 el0_svc_handler+0x104/0x160 el0_svc+0x8/0xc Code: aa1e03e0 aa0203f6 aa0103f7 d503201f (7940e280) ---[ end trace 09f0e34cce8e2d8c ]--- Kernel panic - not syncing: Fatal exception SMP: stopping secondary CPUs Kernel Offset: disabled CPU features: 0x23800c38 So track the clients that the SSIF driver adds and only remove those. Reported-by: George Cherian Signed-off-by: Corey Minyard Tested-by: George Cherian Cc: # 4.14.x Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_ssif.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 6f2eaba1cd6a6..932678617dfa7 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -184,6 +184,8 @@ struct ssif_addr_info { struct device *dev; struct i2c_client *client; + struct i2c_client *added_client; + struct mutex clients_mutex; struct list_head clients; @@ -1710,15 +1712,7 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id) out: if (rv) { - /* - * Note that if addr_info->client is assigned, we - * leave it. The i2c client hangs around even if we - * return a failure here, and the failure here is not - * propagated back to the i2c code. This seems to be - * design intent, strange as it may be. But if we - * don't leave it, ssif_platform_remove will not remove - * the client like it should. - */ + addr_info->client = NULL; dev_err(&client->dev, "Unable to start IPMI SSIF: %d\n", rv); kfree(ssif_info); } @@ -1737,7 +1731,8 @@ static int ssif_adapter_handler(struct device *adev, void *opaque) if (adev->type != &i2c_adapter_type) return 0; - i2c_new_device(to_i2c_adapter(adev), &addr_info->binfo); + addr_info->added_client = i2c_new_device(to_i2c_adapter(adev), + &addr_info->binfo); if (!addr_info->adapter_name) return 1; /* Only try the first I2C adapter by default. */ @@ -2018,8 +2013,8 @@ static int ssif_platform_remove(struct platform_device *dev) return 0; mutex_lock(&ssif_infos_mutex); - if (addr_info->client) - i2c_unregister_device(addr_info->client); + if (addr_info->added_client) + i2c_unregister_device(addr_info->added_client); list_del(&addr_info->link); kfree(addr_info); -- GitLab From 47358b34baa7deacaecda8b2d6196a72d5d54874 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 10 Aug 2018 23:06:07 +0000 Subject: [PATCH 0415/2269] Tools: hv: Fix a bug in the key delete code commit 86503bd35dec0ce363e9fdbf5299927422ed3899 upstream. Fix a bug in the key delete code - the num_records range from 0 to num_records-1. Signed-off-by: K. Y. Srinivasan Reported-by: David Binderman Cc: Reviewed-by: Michael Kelley Signed-off-by: Greg Kroah-Hartman --- tools/hv/hv_kvp_daemon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 4c99c57736cef..3965186b375a1 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -286,7 +286,7 @@ static int kvp_key_delete(int pool, const __u8 *key, int key_size) * Found a match; just move the remaining * entries up. */ - if (i == num_records) { + if (i == (num_records - 1)) { kvp_file_info[pool].num_records--; kvp_update_file(pool); return 0; -- GitLab From fc320be61ff66604309d96cf1dee4ad3bb040d3d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 15 Aug 2018 10:50:41 -0500 Subject: [PATCH 0416/2269] misc: hmc6352: fix potential Spectre v1 commit de916736aaaadddbd6061472969f667b14204aa9 upstream. val is indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/misc/hmc6352.c:54 compass_store() warn: potential spectre issue 'map' [r] Fix this by sanitizing val before using it to index map Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Greg Kroah-Hartman --- drivers/misc/hmc6352.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/misc/hmc6352.c b/drivers/misc/hmc6352.c index eeb7eef62174c..38f90e1799271 100644 --- a/drivers/misc/hmc6352.c +++ b/drivers/misc/hmc6352.c @@ -27,6 +27,7 @@ #include #include #include +#include static DEFINE_MUTEX(compass_mutex); @@ -50,6 +51,7 @@ static int compass_store(struct device *dev, const char *buf, size_t count, return ret; if (val >= strlen(map)) return -EINVAL; + val = array_index_nospec(val, strlen(map)); mutex_lock(&compass_mutex); ret = compass_command(c, map[val]); mutex_unlock(&compass_mutex); -- GitLab From 58eff5e715b078fbd50bdac890b52bed40e60be2 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 31 Aug 2018 17:24:43 +0300 Subject: [PATCH 0417/2269] xhci: Fix use after free for URB cancellation on a reallocated endpoint commit 4937213ba7fafa13f30496b3965ffe93970d8b53 upstream. Make sure the cancelled URB is on the current endpoint ring. If the endpoint ring has been reallocated since the URB was enqueued then the URB may contain TD and TRB pointers to a already freed ring. In this the case return the URB without touching any of the freed ring structure data. Don't try to stop the ring. It would be useless. This can occur if endpoint is not flushed before it is dropped and re-added, which is the case in usb_set_interface() as xhci does things in an odd order. Cc: Tested-by: Sudip Mukherjee Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 6b11fd9d8efec..64ddba3f79a95 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -47,6 +47,21 @@ static unsigned int quirks; module_param(quirks, uint, S_IRUGO); MODULE_PARM_DESC(quirks, "Bit flags for quirks to be enabled as default"); +static bool td_on_ring(struct xhci_td *td, struct xhci_ring *ring) +{ + struct xhci_segment *seg = ring->first_seg; + + if (!td || !td->start_seg) + return false; + do { + if (seg == td->start_seg) + return true; + seg = seg->next; + } while (seg && seg != ring->first_seg); + + return false; +} + /* TODO: copied from ehci-hcd.c - can this be refactored? */ /* * xhci_handshake - spin reading hc until handshake completes or fails @@ -1511,6 +1526,21 @@ static int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) goto done; } + /* + * check ring is not re-allocated since URB was enqueued. If it is, then + * make sure none of the ring related pointers in this URB private data + * are touched, such as td_list, otherwise we overwrite freed data + */ + if (!td_on_ring(&urb_priv->td[0], ep_ring)) { + xhci_err(xhci, "Canceled URB td not found on endpoint ring"); + for (i = urb_priv->num_tds_done; i < urb_priv->num_tds; i++) { + td = &urb_priv->td[i]; + if (!list_empty(&td->cancelled_td_list)) + list_del_init(&td->cancelled_td_list); + } + goto err_giveback; + } + if (xhci->xhc_state & XHCI_STATE_HALTED) { xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, "HC halted, freeing TD manually."); -- GitLab From 4e237cfa575b247906375ee51d8f19584ff5c08d Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 4 Sep 2018 17:35:16 +0300 Subject: [PATCH 0418/2269] usb: Don't die twice if PCI xhci host is not responding in resume commit f3dc41c5d22b2ca14a0802a65d8cdc33a3882d4e upstream. usb_hc_died() should only be called once, and with the primary HCD as parameter. It will mark both primary and secondary hcd's dead. Remove the extra call to usb_cd_died with the shared hcd as parameter. Fixes: ff9d78b36f76 ("USB: Set usb_hcd->state and flags for shared roothubs") Signed-off-by: Mathias Nyman Cc: stable Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd-pci.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c index ea829ad798c07..5340d433cdf0e 100644 --- a/drivers/usb/core/hcd-pci.c +++ b/drivers/usb/core/hcd-pci.c @@ -528,8 +528,6 @@ static int resume_common(struct device *dev, int event) event == PM_EVENT_RESTORE); if (retval) { dev_err(dev, "PCI post-resume error %d!\n", retval); - if (hcd->shared_hcd) - usb_hc_died(hcd->shared_hcd); usb_hc_died(hcd); } } -- GitLab From b5936d2741c09eaa76daf53d1ebfd9886c357598 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Mon, 6 Aug 2018 17:47:33 +0300 Subject: [PATCH 0419/2269] mei: ignore not found client in the enumeration commit 8d2d8935d30cc2acc57a3196dc10dfa8d5cbcdab upstream. Some of the ME clients are available only for BIOS operation and are removed during hand off to an OS. However the removal is not instant. A client may be visible on the client list when the mei driver requests for enumeration, while the subsequent request for properties will be answered with client not found error value. The default behavior for an error is to perform client reset while this error is harmless and the link reset should be prevented. This issue started to be visible due to suspend/resume timing changes. Currently reported only on the Haswell based system. Fixes: [33.564957] mei_me 0000:00:16.0: hbm: properties response: wrong status = 1 CLIENT_NOT_FOUND [33.564978] mei_me 0000:00:16.0: mei_irq_read_handler ret = -71. [33.565270] mei_me 0000:00:16.0: unexpected reset: dev_state = INIT_CLIENTS fw status = 1E000255 60002306 00000200 00004401 00000000 00000010 Cc: Reported-by: Heiner Kallweit Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hbm.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c index fe6595fe94f1f..995ff1b7e7b5b 100644 --- a/drivers/misc/mei/hbm.c +++ b/drivers/misc/mei/hbm.c @@ -1140,15 +1140,18 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr) props_res = (struct hbm_props_response *)mei_msg; - if (props_res->status) { + if (props_res->status == MEI_HBMS_CLIENT_NOT_FOUND) { + dev_dbg(dev->dev, "hbm: properties response: %d CLIENT_NOT_FOUND\n", + props_res->me_addr); + } else if (props_res->status) { dev_err(dev->dev, "hbm: properties response: wrong status = %d %s\n", props_res->status, mei_hbm_status_str(props_res->status)); return -EPROTO; + } else { + mei_hbm_me_cl_add(dev, props_res); } - mei_hbm_me_cl_add(dev, props_res); - /* request property for the next client */ if (mei_hbm_prop_req(dev, props_res->me_addr + 1)) return -EIO; -- GitLab From 599f1e90f709a994f35fc20a3da8259535df5bad Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Mon, 27 Aug 2018 22:40:16 +0300 Subject: [PATCH 0420/2269] mei: bus: need to unlink client before freeing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 34f1166afd67f9f48a08c52f36180048908506a4 upstream. In case a client fails to connect in mei_cldev_enable(), the caller won't call the mei_cldev_disable leaving the client in a linked stated. Upon driver unload the client structure will be freed in mei_cl_bus_dev_release(), leaving a stale pointer on a fail_list. This will eventually end up in crash during power down flow in mei_cl_set_disonnected(). RIP: mei_cl_set_disconnected+0x5/0x260[mei] Call trace: mei_cl_all_disconnect+0x22/0x30 mei_reset+0x194/0x250 __synchronize_hardirq+0x43/0x50 _cond_resched+0x15/0x30 mei_me_intr_clear+0x20/0x100 mei_stop+0x76/0xb0 mei_me_shutdown+0x3f/0x80 pci_device_shutdown+0x34/0x60 kernel_restart+0x0e/0x30 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=200455 Fixes: 'c110cdb17148 ("mei: bus: make a client pointer always available")' Cc: 4.10+ Tested-by: Georg Müller Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 1ac10cb64d6ed..37b13bc5c16fe 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -465,17 +465,15 @@ int mei_cldev_enable(struct mei_cl_device *cldev) cl = cldev->cl; + mutex_lock(&bus->device_lock); if (cl->state == MEI_FILE_UNINITIALIZED) { - mutex_lock(&bus->device_lock); ret = mei_cl_link(cl); - mutex_unlock(&bus->device_lock); if (ret) - return ret; + goto out; /* update pointers */ cl->cldev = cldev; } - mutex_lock(&bus->device_lock); if (mei_cl_is_connected(cl)) { ret = 0; goto out; @@ -841,12 +839,13 @@ static void mei_cl_bus_dev_release(struct device *dev) mei_me_cl_put(cldev->me_cl); mei_dev_bus_put(cldev->bus); + mei_cl_unlink(cldev->cl); kfree(cldev->cl); kfree(cldev); } static const struct device_type mei_cl_device_type = { - .release = mei_cl_bus_dev_release, + .release = mei_cl_bus_dev_release, }; /** -- GitLab From 0845f2a4776d2a2bab88de351ce174aa9f64ed39 Mon Sep 17 00:00:00 2001 From: Tim Anderson Date: Thu, 9 Aug 2018 14:55:34 -0700 Subject: [PATCH 0421/2269] USB: Add quirk to support DJI CineSSD commit f45681f9becaa65111ed0a691ccf080a0cd5feb8 upstream. This device does not correctly handle the LPM operations. Also, the device cannot handle ATA pass-through commands and locks up when attempted while running in super speed. This patch adds the equivalent quirk logic as found in uas. Signed-off-by: Tim Anderson Acked-by: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ drivers/usb/storage/scsiglue.c | 9 +++++++++ drivers/usb/storage/unusual_devs.h | 7 +++++++ 3 files changed, 19 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 99f67764765fd..deab9935c1af9 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -259,6 +259,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x2040, 0x7200), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, + /* DJI CineSSD */ + { USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM }, + /* INTEL VALUE SSD */ { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME }, diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index 8cd2926fb1fe6..344ec8631481e 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -392,6 +392,15 @@ static int queuecommand_lck(struct scsi_cmnd *srb, return 0; } + if ((us->fflags & US_FL_NO_ATA_1X) && + (srb->cmnd[0] == ATA_12 || srb->cmnd[0] == ATA_16)) { + memcpy(srb->sense_buffer, usb_stor_sense_invalidCDB, + sizeof(usb_stor_sense_invalidCDB)); + srb->result = SAM_STAT_CHECK_CONDITION; + done(srb); + return 0; + } + /* enqueue the command and wake up the control thread */ srb->scsi_done = done; us->srb = srb; diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index d100290628bd2..5e9b35a914319 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -2307,6 +2307,13 @@ UNUSUAL_DEV( 0x2735, 0x100b, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_GO_SLOW ), +/* Reported-by: Tim Anderson */ +UNUSUAL_DEV( 0x2ca3, 0x0031, 0x0000, 0x9999, + "DJI", + "CineSSD", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_ATA_1X), + /* * Reported by Frederic Marchal * Mio Moov 330 -- GitLab From 182d130384519b21f1b6dff45375899b099b67dd Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 9 Aug 2018 16:03:37 +0200 Subject: [PATCH 0422/2269] usb: uas: add support for more quirk flags commit 42d1c6d4a06a77b3ab206a919b9050c3080f3a71 upstream. The hope that UAS devices would be less broken than old style storage devices has turned out to be unfounded. Make UAS support more of the quirk flags of the old driver. Signed-off-by: Oliver Neukum Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 33a6d624c8438..24de9c00d8e24 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -842,6 +842,27 @@ static int uas_slave_configure(struct scsi_device *sdev) sdev->skip_ms_page_8 = 1; sdev->wce_default_on = 1; } + + /* + * Some disks return the total number of blocks in response + * to READ CAPACITY rather than the highest block number. + * If this device makes that mistake, tell the sd driver. + */ + if (devinfo->flags & US_FL_FIX_CAPACITY) + sdev->fix_capacity = 1; + + /* + * Some devices don't like MODE SENSE with page=0x3f, + * which is the command used for checking if a device + * is write-protected. Now that we tell the sd driver + * to do a 192-byte transfer with this command the + * majority of devices work fine, but a few still can't + * handle it. The sd driver will simply assume those + * devices are write-enabled. + */ + if (devinfo->flags & US_FL_NO_WP_DETECT) + sdev->skip_ms_page_3f = 1; + scsi_change_queue_depth(sdev, devinfo->qdepth - 2); return 0; } -- GitLab From 760c41fceb300b7388215d386c6e72c957753d26 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 3 Sep 2018 15:44:16 +0300 Subject: [PATCH 0423/2269] usb: Avoid use-after-free by flushing endpoints early in usb_set_interface() commit f9a5b4f58b280c1d26255376713c132f93837621 upstream. The steps taken by usb core to set a new interface is very different from what is done on the xHC host side. xHC hardware will do everything in one go. One command is used to set up new endpoints, free old endpoints, check bandwidth, and run the new endpoints. All this is done by xHC when usb core asks the hcd to check for available bandwidth. At this point usb core has not yet flushed the old endpoints, which will cause use-after-free issues in xhci driver as queued URBs are cancelled on a re-allocated endpoint. To resolve this add a call to usb_disable_interface() which will flush the endpoints before calling usb_hcd_alloc_bandwidth() Additional checks in xhci driver will also be implemented to gracefully handle stale URB cancel on freed and re-allocated endpoints Cc: Reported-by: Sudip Mukherjee Signed-off-by: Mathias Nyman Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/message.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index dd29e6ec1c437..833ddd228e3a8 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -1280,6 +1280,11 @@ void usb_enable_interface(struct usb_device *dev, * is submitted that needs that bandwidth. Some other operating systems * allocate bandwidth early, when a configuration is chosen. * + * xHCI reserves bandwidth and configures the alternate setting in + * usb_hcd_alloc_bandwidth(). If it fails the original interface altsetting + * may be disabled. Drivers cannot rely on any particular alternate + * setting being in effect after a failure. + * * This call is synchronous, and may not be used in an interrupt context. * Also, drivers must not change altsettings while urbs are scheduled for * endpoints in that interface; all such urbs must first be completed @@ -1315,6 +1320,12 @@ int usb_set_interface(struct usb_device *dev, int interface, int alternate) alternate); return -EINVAL; } + /* + * usb3 hosts configure the interface in usb_hcd_alloc_bandwidth, + * including freeing dropped endpoint ring buffers. + * Make sure the interface endpoints are flushed before that + */ + usb_disable_interface(dev, iface, false); /* Make sure we have enough bandwidth for this alternate interface. * Remove the current alt setting and add the new alt setting. -- GitLab From 1dbc1fd713206b93cdc6ea6f915f11c41c8a9447 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Sat, 1 Sep 2018 17:23:47 +0800 Subject: [PATCH 0424/2269] usb: host: u132-hcd: Fix a sleep-in-atomic-context bug in u132_get_frame() commit 6d4f268fa132742fe96dad22307c68d237356d88 upstream. i_usX2Y_subs_startup in usbusx2yaudio.c is a completion handler function for the USB driver. So it should not sleep, but it is can sleep according to the function call paths (from bottom to top) in Linux-4.16. [FUNC] msleep drivers/usb/host/u132-hcd.c, 2558: msleep in u132_get_frame drivers/usb/core/hcd.c, 2231: [FUNC_PTR]u132_get_frame in usb_hcd_get_frame_number drivers/usb/core/usb.c, 822: usb_hcd_get_frame_number in usb_get_current_frame_number sound/usb/usx2y/usbusx2yaudio.c, 303: usb_get_current_frame_number in i_usX2Y_urb_complete sound/usb/usx2y/usbusx2yaudio.c, 366: i_usX2Y_urb_complete in i_usX2Y_subs_startup Note that [FUNC_PTR] means a function pointer call is used. To fix this bug, msleep() is replaced with mdelay(). This bug is found by my static analysis tool DSAC. Signed-off-by: Jia-Ju Bai Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/u132-hcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/u132-hcd.c b/drivers/usb/host/u132-hcd.c index c38855aed62c7..65c0086e25ae6 100644 --- a/drivers/usb/host/u132-hcd.c +++ b/drivers/usb/host/u132-hcd.c @@ -2559,7 +2559,7 @@ static int u132_get_frame(struct usb_hcd *hcd) } else { int frame = 0; dev_err(&u132->platform_dev->dev, "TODO: u132_get_frame\n"); - msleep(100); + mdelay(100); return frame; } } -- GitLab From 6def1c171986c0ec589f29bb9cd061a9eb0e3d6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maxence=20Dupr=C3=A8s?= Date: Wed, 8 Aug 2018 23:56:33 +0000 Subject: [PATCH 0425/2269] USB: add quirk for WORLDE Controller KS49 or Prodipe MIDI 49C USB controller commit 9b83a1c301ad6d24988a128c69b42cbaaf537d82 upstream. WORLDE Controller KS49 or Prodipe MIDI 49C USB controller cause a -EPROTO error, a communication restart and loop again. This issue has already been fixed for KS25. https://lore.kernel.org/patchwork/patch/753077/ I just add device 201 for KS49 in quirks.c to get it works. Signed-off-by: Laurent Roux Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index deab9935c1af9..37a5e07b3488d 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -37,6 +37,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* CBM - Flash disk */ { USB_DEVICE(0x0204, 0x6025), .driver_info = USB_QUIRK_RESET_RESUME }, + /* WORLDE Controller KS49 or Prodipe MIDI 49C USB controller */ + { USB_DEVICE(0x0218, 0x0201), .driver_info = + USB_QUIRK_CONFIG_INTF_STRINGS }, + /* WORLDE easy key (easykey.25) MIDI controller */ { USB_DEVICE(0x0218, 0x0401), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, -- GitLab From 3afbeb5cac2a32a07830676b1b81d02ff3ebe1b5 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Fri, 3 Aug 2018 12:12:46 +0900 Subject: [PATCH 0426/2269] usb: gadget: udc: renesas_usb3: fix maxpacket size of ep0 commit dfe1a51d2a36647f74cbad478801efa7cf394376 upstream. This patch fixes an issue that maxpacket size of ep0 is incorrect for SuperSpeed. Otherwise, CDC NCM class with SuperSpeed doesn't work correctly on this driver because its control read data size is more than 64 bytes. Reported-by: Junki Kato Fixes: 746bfe63bba3 ("usb: gadget: renesas_usb3: add support for Renesas USB3.0 peripheral controller") Cc: # v4.5+ Signed-off-by: Yoshihiro Shimoda Tested-by: Junki Kato Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/renesas_usb3.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index c12a1a6554bad..36a706f475d25 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -788,12 +788,15 @@ static void usb3_irq_epc_int_1_speed(struct renesas_usb3 *usb3) switch (speed) { case USB_STA_SPEED_SS: usb3->gadget.speed = USB_SPEED_SUPER; + usb3->gadget.ep0->maxpacket = USB3_EP0_SS_MAX_PACKET_SIZE; break; case USB_STA_SPEED_HS: usb3->gadget.speed = USB_SPEED_HIGH; + usb3->gadget.ep0->maxpacket = USB3_EP0_HSFS_MAX_PACKET_SIZE; break; case USB_STA_SPEED_FS: usb3->gadget.speed = USB_SPEED_FULL; + usb3->gadget.ep0->maxpacket = USB3_EP0_HSFS_MAX_PACKET_SIZE; break; default: usb3->gadget.speed = USB_SPEED_UNKNOWN; @@ -2458,7 +2461,7 @@ static int renesas_usb3_init_ep(struct renesas_usb3 *usb3, struct device *dev, /* for control pipe */ usb3->gadget.ep0 = &usb3_ep->ep; usb_ep_set_maxpacket_limit(&usb3_ep->ep, - USB3_EP0_HSFS_MAX_PACKET_SIZE); + USB3_EP0_SS_MAX_PACKET_SIZE); usb3_ep->ep.caps.type_control = true; usb3_ep->ep.caps.dir_in = true; usb3_ep->ep.caps.dir_out = true; -- GitLab From d078f295a4c9159a52c63efca70da293cab0e4db Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 8 Aug 2018 11:20:39 -0400 Subject: [PATCH 0427/2269] USB: net2280: Fix erroneous synchronization change commit dec3c23c9aa1815f07d98ae0375b4cbc10971e13 upstream. Commit f16443a034c7 ("USB: gadgetfs, dummy-hcd, net2280: fix locking for callbacks") was based on a serious misunderstanding. It introduced regressions into both the dummy-hcd and net2280 drivers. The problem in dummy-hcd was fixed by commit 7dbd8f4cabd9 ("USB: dummy-hcd: Fix erroneous synchronization change"), but the problem in net2280 remains. Namely: the ->disconnect(), ->suspend(), ->resume(), and ->reset() callbacks must be invoked without the private lock held; otherwise a deadlock will occur when the callback routine tries to interact with the UDC driver. This patch largely is a reversion of the relevant parts of f16443a034c7. It also drops the private lock around the calls to ->suspend() and ->resume() (something the earlier patch forgot to do). This is safe from races with device interrupts because it occurs within the interrupt handler. Finally, the patch changes where the ->disconnect() callback is invoked when net2280_pullup() turns the pullup off. Rather than making the callback from within stop_activity() at a time when dropping the private lock could be unsafe, the callback is moved to a point after the lock has already been dropped. Signed-off-by: Alan Stern Fixes: f16443a034c7 ("USB: gadgetfs, dummy-hcd, net2280: fix locking for callbacks") Reported-by: D. Ziesche Tested-by: D. Ziesche CC: Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/net2280.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c index f608c1f85e611..9cbb061582a77 100644 --- a/drivers/usb/gadget/udc/net2280.c +++ b/drivers/usb/gadget/udc/net2280.c @@ -1549,11 +1549,14 @@ static int net2280_pullup(struct usb_gadget *_gadget, int is_on) writel(tmp | BIT(USB_DETECT_ENABLE), &dev->usb->usbctl); } else { writel(tmp & ~BIT(USB_DETECT_ENABLE), &dev->usb->usbctl); - stop_activity(dev, dev->driver); + stop_activity(dev, NULL); } spin_unlock_irqrestore(&dev->lock, flags); + if (!is_on && dev->driver) + dev->driver->disconnect(&dev->gadget); + return 0; } @@ -2470,8 +2473,11 @@ static void stop_activity(struct net2280 *dev, struct usb_gadget_driver *driver) nuke(&dev->ep[i]); /* report disconnect; the driver is already quiesced */ - if (driver) + if (driver) { + spin_unlock(&dev->lock); driver->disconnect(&dev->gadget); + spin_lock(&dev->lock); + } usb_reinit(dev); } @@ -3345,6 +3351,8 @@ next_endpoints: BIT(PCI_RETRY_ABORT_INTERRUPT)) static void handle_stat1_irqs(struct net2280 *dev, u32 stat) +__releases(dev->lock) +__acquires(dev->lock) { struct net2280_ep *ep; u32 tmp, num, mask, scratch; @@ -3385,12 +3393,14 @@ static void handle_stat1_irqs(struct net2280 *dev, u32 stat) if (disconnect || reset) { stop_activity(dev, dev->driver); ep0_start(dev); + spin_unlock(&dev->lock); if (reset) usb_gadget_udc_reset (&dev->gadget, dev->driver); else (dev->driver->disconnect) (&dev->gadget); + spin_lock(&dev->lock); return; } } @@ -3409,6 +3419,7 @@ static void handle_stat1_irqs(struct net2280 *dev, u32 stat) tmp = BIT(SUSPEND_REQUEST_CHANGE_INTERRUPT); if (stat & tmp) { writel(tmp, &dev->regs->irqstat1); + spin_unlock(&dev->lock); if (stat & BIT(SUSPEND_REQUEST_INTERRUPT)) { if (dev->driver->suspend) dev->driver->suspend(&dev->gadget); @@ -3419,6 +3430,7 @@ static void handle_stat1_irqs(struct net2280 *dev, u32 stat) dev->driver->resume(&dev->gadget); /* at high speed, note erratum 0133 */ } + spin_lock(&dev->lock); stat &= ~tmp; } -- GitLab From a7d9367ccbb69394662b1411c6d30065bc6d30c2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 21 Aug 2018 11:59:52 +0200 Subject: [PATCH 0428/2269] USB: serial: io_ti: fix array underflow in completion handler commit 691a03cfe8ca483f9c48153b869d354e4ae3abef upstream. As reported by Dan Carpenter, a malicious USB device could set port_number to a negative value and we would underflow the port array in the interrupt completion handler. As these devices only have one or two ports, fix this by making sure we only consider the seventh bit when determining the port number (and ignore bits 0xb0 which are typically set to 0x30). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Reported-by: Dan Carpenter Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/io_ti.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/io_ti.h b/drivers/usb/serial/io_ti.h index 1bd67b24f9160..bc9ff5ebd67c1 100644 --- a/drivers/usb/serial/io_ti.h +++ b/drivers/usb/serial/io_ti.h @@ -178,7 +178,7 @@ struct ump_interrupt { } __attribute__((packed)); -#define TIUMP_GET_PORT_FROM_CODE(c) (((c) >> 4) - 3) +#define TIUMP_GET_PORT_FROM_CODE(c) (((c) >> 6) & 0x01) #define TIUMP_GET_FUNC_FROM_CODE(c) ((c) & 0x0f) #define TIUMP_INTERRUPT_CODE_LSR 0x03 #define TIUMP_INTERRUPT_CODE_MSR 0x04 -- GitLab From a82200ced75f366e6d0740bf8a6fae3a212ea84c Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Sat, 1 Sep 2018 16:25:08 +0800 Subject: [PATCH 0429/2269] usb: misc: uss720: Fix two sleep-in-atomic-context bugs commit bc8acc214d3f1cafebcbcd101a695bbac716595d upstream. async_complete() in uss720.c is a completion handler function for the USB driver. So it should not sleep, but it is can sleep according to the function call paths (from bottom to top) in Linux-4.16. [FUNC] set_1284_register(GFP_KERNEL) drivers/usb/misc/uss720.c, 372: set_1284_register in parport_uss720_frob_control drivers/parport/ieee1284.c, 560: [FUNC_PTR]parport_uss720_frob_control in parport_ieee1284_ack_data_avail drivers/parport/ieee1284.c, 577: parport_ieee1284_ack_data_avail in parport_ieee1284_interrupt ./include/linux/parport.h, 474: parport_ieee1284_interrupt in parport_generic_irq drivers/usb/misc/uss720.c, 116: parport_generic_irq in async_complete [FUNC] get_1284_register(GFP_KERNEL) drivers/usb/misc/uss720.c, 382: get_1284_register in parport_uss720_read_status drivers/parport/ieee1284.c, 555: [FUNC_PTR]parport_uss720_read_status in parport_ieee1284_ack_data_avail drivers/parport/ieee1284.c, 577: parport_ieee1284_ack_data_avail in parport_ieee1284_interrupt ./include/linux/parport.h, 474: parport_ieee1284_interrupt in parport_generic_irq drivers/usb/misc/uss720.c, 116: parport_generic_irq in async_complete Note that [FUNC_PTR] means a function pointer call is used. To fix these bugs, GFP_KERNEL is replaced with GFP_ATOMIC. These bugs are found by my static analysis tool DSAC. Signed-off-by: Jia-Ju Bai Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/uss720.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c index 8a13b2fcf3e13..03152f98108c7 100644 --- a/drivers/usb/misc/uss720.c +++ b/drivers/usb/misc/uss720.c @@ -382,7 +382,7 @@ static unsigned char parport_uss720_frob_control(struct parport *pp, unsigned ch mask &= 0x0f; val &= 0x0f; d = (priv->reg[1] & (~mask)) ^ val; - if (set_1284_register(pp, 2, d, GFP_KERNEL)) + if (set_1284_register(pp, 2, d, GFP_ATOMIC)) return 0; priv->reg[1] = d; return d & 0xf; @@ -392,7 +392,7 @@ static unsigned char parport_uss720_read_status(struct parport *pp) { unsigned char ret; - if (get_1284_register(pp, 1, &ret, GFP_KERNEL)) + if (get_1284_register(pp, 1, &ret, GFP_ATOMIC)) return 0; return ret & 0xf8; } -- GitLab From a98152a6feaa64c33684097eaeb4ff225fab8fca Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 21 Aug 2018 11:59:53 +0200 Subject: [PATCH 0430/2269] USB: serial: ti_usb_3410_5052: fix array underflow in completion handler commit 5dfdd24eb3d39d815bc952ae98128e967c9bba49 upstream. Similarly to a recently reported bug in io_ti, a malicious USB device could set port_number to a negative value and we would underflow the port array in the interrupt completion handler. As these devices only have one or two ports, fix this by making sure we only consider the seventh bit when determining the port number (and ignore bits 0xb0 which are typically set to 0x30). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ti_usb_3410_5052.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index 8fc3854e5e696..57e9f6617084f 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -1123,7 +1123,7 @@ static void ti_break(struct tty_struct *tty, int break_state) static int ti_get_port_from_code(unsigned char code) { - return (code >> 4) - 3; + return (code >> 6) & 0x01; } static int ti_get_func_from_code(unsigned char code) -- GitLab From a383de0d80fa7fce143f93cebc4bd65ee782d928 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 15 Aug 2018 21:44:25 +0100 Subject: [PATCH 0431/2269] USB: yurex: Fix buffer over-read in yurex_write() commit 7e10f14ebface44a48275c8d6dc1caae3668d5a9 upstream. If the written data starts with a digit, yurex_write() tries to parse it as an integer using simple_strtoull(). This requires a null- terminator, and currently there's no guarantee that there is one. (The sample program at https://github.com/NeoCat/YUREX-driver-for-Linux/blob/master/sample/yurex_clock.pl writes an integer without a null terminator. It seems like it must have worked by chance!) Always add a null byte after the written data. Enlarge the buffer to allow for this. Cc: stable@vger.kernel.org Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/yurex.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c index 47763311a42e1..0673f286afbd4 100644 --- a/drivers/usb/misc/yurex.c +++ b/drivers/usb/misc/yurex.c @@ -425,13 +425,13 @@ static ssize_t yurex_write(struct file *file, const char __user *user_buffer, { struct usb_yurex *dev; int i, set = 0, retval = 0; - char buffer[16]; + char buffer[16 + 1]; char *data = buffer; unsigned long long c, c2 = 0; signed long timeout = 0; DEFINE_WAIT(wait); - count = min(sizeof(buffer), count); + count = min(sizeof(buffer) - 1, count); dev = file->private_data; /* verify that we actually have some data to write */ @@ -450,6 +450,7 @@ static ssize_t yurex_write(struct file *file, const char __user *user_buffer, retval = -EFAULT; goto error; } + buffer[count] = 0; memset(dev->cntl_buffer, CMD_PADDING, YUREX_BUF_SIZE); switch (buffer[0]) { -- GitLab From 80f539981a9b950ecc295ea9893e17f44c08ef9c Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Sat, 1 Sep 2018 16:12:10 +0800 Subject: [PATCH 0432/2269] usb: cdc-wdm: Fix a sleep-in-atomic-context bug in service_outstanding_interrupt() commit 6e22e3af7bb3a7b9dc53cb4687659f6e63fca427 upstream. wdm_in_callback() is a completion handler function for the USB driver. So it should not sleep. But it calls service_outstanding_interrupt(), which calls usb_submit_urb() with GFP_KERNEL. To fix this bug, GFP_KERNEL is replaced with GFP_ATOMIC. This bug is found by my static analysis tool DSAC. Signed-off-by: Jia-Ju Bai Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 3e865dbf878c7..a9509ecccedba 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -457,7 +457,7 @@ static int service_outstanding_interrupt(struct wdm_device *desc) set_bit(WDM_RESPONDING, &desc->flags); spin_unlock_irq(&desc->iuspin); - rv = usb_submit_urb(desc->response, GFP_KERNEL); + rv = usb_submit_urb(desc->response, GFP_ATOMIC); spin_lock_irq(&desc->iuspin); if (rv) { dev_err(&desc->intf->dev, -- GitLab From 334902cfd93895076539d645e6615c525af8345f Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 5 Sep 2018 17:56:46 +0200 Subject: [PATCH 0433/2269] Revert "cdc-acm: implement put_char() and flush_chars()" commit df3aa13c7bbb307e172c37f193f9a7aa058d4739 upstream. This reverts commit a81cf9799ad7299b03a4dff020d9685f9ac5f3e0. The patch causes a regression, which I cannot find the reason for. So let's revert for now, as a revert hurts only performance. Original report: I was trying to resolve the problem with Oliver but we don't get any conclusion for 5 months, so I am now sending this to mail list and cdc_acm authors. I am using simple request-response protocol to obtain the boiller parameters in constant intervals. A simple one transaction is: 1. opening the /dev/ttyACM0 2. sending the following 10-bytes request to the device: unsigned char req[] = {0x02, 0xfe, 0x01, 0x05, 0x08, 0x02, 0x01, 0x69, 0xab, 0x03}; 3. reading response (frame of 74 bytes length). 4. closing the descriptor I am doing this transaction with 5 seconds intervals. Before the bad commit everything was working correctly: I've got a requests and a responses in a timely manner. After the bad commit more time I am using the kernel module, more problems I have. The graph [2] is showing the problem. As you can see after module load all seems fine but after about 30 minutes I've got a plenty of EAGAINs when doing read()'s and trying to read back the data. When I rmmod and insmod the cdc_acm module again, then the situation is starting over again: running ok shortly after load, and more time it is running, more EAGAINs I have when calling read(). As a bonus I can see the problem on the device itself: The device is configured as you can see here on this screen [3]. It has two transmision LEDs: TX and RX. Blink duration is set for 100ms. This is a recording before the bad commit when all is working fine: [4] And this is with the bad commit: [5] As you can see the TX led is blinking wrongly long (indicating transmission?) and I have problems doing read() calls (EAGAIN). Reported-by: Mariusz Bialonczyk Signed-off-by: Oliver Neukum Fixes: a81cf9799ad7 ("cdc-acm: implement put_char() and flush_chars()") Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 73 ------------------------------------- drivers/usb/class/cdc-acm.h | 1 - 2 files changed, 74 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index f2f31fc16f290..feaa0d8f830ac 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -792,20 +792,9 @@ static int acm_tty_write(struct tty_struct *tty, } if (acm->susp_count) { - if (acm->putbuffer) { - /* now to preserve order */ - usb_anchor_urb(acm->putbuffer->urb, &acm->delayed); - acm->putbuffer = NULL; - } usb_anchor_urb(wb->urb, &acm->delayed); spin_unlock_irqrestore(&acm->write_lock, flags); return count; - } else { - if (acm->putbuffer) { - /* at this point there is no good way to handle errors */ - acm_start_wb(acm, acm->putbuffer); - acm->putbuffer = NULL; - } } stat = acm_start_wb(acm, wb); @@ -816,66 +805,6 @@ static int acm_tty_write(struct tty_struct *tty, return count; } -static void acm_tty_flush_chars(struct tty_struct *tty) -{ - struct acm *acm = tty->driver_data; - struct acm_wb *cur; - int err; - unsigned long flags; - - spin_lock_irqsave(&acm->write_lock, flags); - - cur = acm->putbuffer; - if (!cur) /* nothing to do */ - goto out; - - acm->putbuffer = NULL; - err = usb_autopm_get_interface_async(acm->control); - if (err < 0) { - cur->use = 0; - acm->putbuffer = cur; - goto out; - } - - if (acm->susp_count) - usb_anchor_urb(cur->urb, &acm->delayed); - else - acm_start_wb(acm, cur); -out: - spin_unlock_irqrestore(&acm->write_lock, flags); - return; -} - -static int acm_tty_put_char(struct tty_struct *tty, unsigned char ch) -{ - struct acm *acm = tty->driver_data; - struct acm_wb *cur; - int wbn; - unsigned long flags; - -overflow: - cur = acm->putbuffer; - if (!cur) { - spin_lock_irqsave(&acm->write_lock, flags); - wbn = acm_wb_alloc(acm); - if (wbn >= 0) { - cur = &acm->wb[wbn]; - acm->putbuffer = cur; - } - spin_unlock_irqrestore(&acm->write_lock, flags); - if (!cur) - return 0; - } - - if (cur->len == acm->writesize) { - acm_tty_flush_chars(tty); - goto overflow; - } - - cur->buf[cur->len++] = ch; - return 1; -} - static int acm_tty_write_room(struct tty_struct *tty) { struct acm *acm = tty->driver_data; @@ -2000,8 +1929,6 @@ static const struct tty_operations acm_ops = { .cleanup = acm_tty_cleanup, .hangup = acm_tty_hangup, .write = acm_tty_write, - .put_char = acm_tty_put_char, - .flush_chars = acm_tty_flush_chars, .write_room = acm_tty_write_room, .ioctl = acm_tty_ioctl, .throttle = acm_tty_throttle, diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h index eacc116e83da2..ca06b20d7af9c 100644 --- a/drivers/usb/class/cdc-acm.h +++ b/drivers/usb/class/cdc-acm.h @@ -96,7 +96,6 @@ struct acm { unsigned long read_urbs_free; struct urb *read_urbs[ACM_NR]; struct acm_rb read_buffers[ACM_NR]; - struct acm_wb *putbuffer; /* for acm_tty_put_char() */ int rx_buflimit; spinlock_t read_lock; u8 *notification_buffer; /* to reassemble fragmented notifications */ -- GitLab From 20c8102b322efc8a6f206714a1e0e5db1817aef6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 6 Sep 2018 12:47:51 +0300 Subject: [PATCH 0434/2269] cifs: prevent integer overflow in nxt_dir_entry() commit 8ad8aa353524d89fa2e09522f3078166ff78ec42 upstream. The "old_entry + le32_to_cpu(pDirInfo->NextEntryOffset)" can wrap around so I have added a check for integer overflow. Reported-by: Dr Silvio Cesare of InfoSect Reviewed-by: Ronnie Sahlberg Reviewed-by: Aurelien Aptel Signed-off-by: Dan Carpenter Signed-off-by: Steve French CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/readdir.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index a27fc8791551c..ef24b4527459f 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -376,8 +376,15 @@ static char *nxt_dir_entry(char *old_entry, char *end_of_smb, int level) new_entry = old_entry + sizeof(FIND_FILE_STANDARD_INFO) + pfData->FileNameLength; - } else - new_entry = old_entry + le32_to_cpu(pDirInfo->NextEntryOffset); + } else { + u32 next_offset = le32_to_cpu(pDirInfo->NextEntryOffset); + + if (old_entry + next_offset < old_entry) { + cifs_dbg(VFS, "invalid offset %u\n", next_offset); + return NULL; + } + new_entry = old_entry + next_offset; + } cifs_dbg(FYI, "new entry %p old entry %p\n", new_entry, old_entry); /* validate that new_entry is not past end of SMB */ if (new_entry >= end_of_smb) { -- GitLab From f3259909c85e815ca474bd745a8465f67cdecd17 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 6 Sep 2018 12:48:22 +0300 Subject: [PATCH 0435/2269] CIFS: fix wrapping bugs in num_entries() commit 56446f218af1133c802dad8e9e116f07f381846c upstream. The problem is that "entryptr + next_offset" and "entryptr + len + size" can wrap. I ended up changing the type of "entryptr" because it makes the math easier when we don't have to do so much casting. Signed-off-by: Dan Carpenter Signed-off-by: Steve French Reviewed-by: Aurelien Aptel Reviewed-by: Pavel Shilovsky CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 078ec705a5cc6..69309538ffb80 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2939,33 +2939,38 @@ num_entries(char *bufstart, char *end_of_buf, char **lastentry, size_t size) int len; unsigned int entrycount = 0; unsigned int next_offset = 0; - FILE_DIRECTORY_INFO *entryptr; + char *entryptr; + FILE_DIRECTORY_INFO *dir_info; if (bufstart == NULL) return 0; - entryptr = (FILE_DIRECTORY_INFO *)bufstart; + entryptr = bufstart; while (1) { - entryptr = (FILE_DIRECTORY_INFO *) - ((char *)entryptr + next_offset); - - if ((char *)entryptr + size > end_of_buf) { + if (entryptr + next_offset < entryptr || + entryptr + next_offset > end_of_buf || + entryptr + next_offset + size > end_of_buf) { cifs_dbg(VFS, "malformed search entry would overflow\n"); break; } - len = le32_to_cpu(entryptr->FileNameLength); - if ((char *)entryptr + len + size > end_of_buf) { + entryptr = entryptr + next_offset; + dir_info = (FILE_DIRECTORY_INFO *)entryptr; + + len = le32_to_cpu(dir_info->FileNameLength); + if (entryptr + len < entryptr || + entryptr + len > end_of_buf || + entryptr + len + size > end_of_buf) { cifs_dbg(VFS, "directory entry name would overflow frame end of buf %p\n", end_of_buf); break; } - *lastentry = (char *)entryptr; + *lastentry = entryptr; entrycount++; - next_offset = le32_to_cpu(entryptr->NextEntryOffset); + next_offset = le32_to_cpu(dir_info->NextEntryOffset); if (!next_offset) break; } -- GitLab From 8c08224aee327ee4608c1b9f6acb34c9a1269c76 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Thu, 6 Sep 2018 11:19:20 -0700 Subject: [PATCH 0436/2269] xtensa: ISS: don't allocate memory in platform_setup commit ef439d49e0bfb26cd5f03c88b4cb7cc9073ed30c upstream. Memory allocator is not initialized at that point yet, use static array instead. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/platforms/iss/setup.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/arch/xtensa/platforms/iss/setup.c b/arch/xtensa/platforms/iss/setup.c index f4bbb28026f8b..58709e89a8ed1 100644 --- a/arch/xtensa/platforms/iss/setup.c +++ b/arch/xtensa/platforms/iss/setup.c @@ -78,23 +78,28 @@ static struct notifier_block iss_panic_block = { void __init platform_setup(char **p_cmdline) { + static void *argv[COMMAND_LINE_SIZE / sizeof(void *)] __initdata; + static char cmdline[COMMAND_LINE_SIZE] __initdata; int argc = simc_argc(); int argv_size = simc_argv_size(); if (argc > 1) { - void **argv = alloc_bootmem(argv_size); - char *cmdline = alloc_bootmem(argv_size); - int i; + if (argv_size > sizeof(argv)) { + pr_err("%s: command line too long: argv_size = %d\n", + __func__, argv_size); + } else { + int i; - cmdline[0] = 0; - simc_argv((void *)argv); + cmdline[0] = 0; + simc_argv((void *)argv); - for (i = 1; i < argc; ++i) { - if (i > 1) - strcat(cmdline, " "); - strcat(cmdline, argv[i]); + for (i = 1; i < argc; ++i) { + if (i > 1) + strcat(cmdline, " "); + strcat(cmdline, argv[i]); + } + *p_cmdline = cmdline; } - *p_cmdline = cmdline; } atomic_notifier_chain_register(&panic_notifier_list, &iss_panic_block); -- GitLab From d9951521dd801896889294d27c99aa94ce5abe26 Mon Sep 17 00:00:00 2001 From: Yabin Cui Date: Thu, 23 Aug 2018 15:59:35 -0700 Subject: [PATCH 0437/2269] perf/core: Force USER_DS when recording user stack data commit 02e184476eff848273826c1d6617bb37e5bcc7ad upstream. Perf can record user stack data in response to a synchronous request, such as a tracepoint firing. If this happens under set_fs(KERNEL_DS), then we end up reading user stack data using __copy_from_user_inatomic() under set_fs(KERNEL_DS). I think this conflicts with the intention of using set_fs(KERNEL_DS). And it is explicitly forbidden by hardware on ARM64 when both CONFIG_ARM64_UAO and CONFIG_ARM64_PAN are used. So fix this by forcing USER_DS when recording user stack data. Signed-off-by: Yabin Cui Acked-by: Peter Zijlstra (Intel) Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 88b0193d9418 ("perf/callchain: Force USER_DS when invoking perf_callchain_user()") Link: http://lkml.kernel.org/r/20180823225935.27035-1-yabinc@google.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 7c394ddf1ce6e..812ebf1cbb876 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5700,6 +5700,7 @@ perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size, unsigned long sp; unsigned int rem; u64 dyn_size; + mm_segment_t fs; /* * We dump: @@ -5717,7 +5718,10 @@ perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size, /* Data. */ sp = perf_user_stack_pointer(regs); + fs = get_fs(); + set_fs(USER_DS); rem = __output_copy_user(handle, (void *) sp, dump_size); + set_fs(fs); dyn_size = dump_size - rem; perf_output_skip(handle, rem); -- GitLab From 9995545163fcbf2a8b762c15ce4585ae0924f568 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Tue, 11 Sep 2018 15:55:38 -0400 Subject: [PATCH 0438/2269] x86/EISA: Don't probe EISA bus for Xen PV guests commit 6a92b11169a65b3f8cc512c75a252cbd0d096ba0 upstream. For unprivileged Xen PV guests this is normal memory and ioremap will not be able to properly map it. While at it, since ioremap may return NULL, add a test for pointer's validity. Reported-by: Andy Smith Signed-off-by: Boris Ostrovsky Signed-off-by: Thomas Gleixner Cc: hpa@zytor.com Cc: xen-devel@lists.xenproject.org Cc: jgross@suse.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180911195538.23289-1-boris.ostrovsky@oracle.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/eisa.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c index f260e452e4f87..e8c8c5d78dbdd 100644 --- a/arch/x86/kernel/eisa.c +++ b/arch/x86/kernel/eisa.c @@ -7,11 +7,17 @@ #include #include +#include + static __init int eisa_bus_probe(void) { - void __iomem *p = ioremap(0x0FFFD9, 4); + void __iomem *p; + + if (xen_pv_domain() && !xen_initial_domain()) + return 0; - if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24)) + p = ioremap(0x0FFFD9, 4); + if (p && readl(p) == 'E' + ('I' << 8) + ('S' << 16) + ('A' << 24)) EISA_bus = 1; iounmap(p); return 0; -- GitLab From 948f1a7f7607b0989a2321b4432ff3104ef6d845 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 5 Sep 2018 14:07:14 -0400 Subject: [PATCH 0439/2269] NFSv4.1 fix infinite loop on I/O. commit 994b15b983a72e1148a173b61e5b279219bb45ae upstream. The previous fix broke recovery of delegated stateids because it assumes that if we did not mark the delegation as suspect, then the delegation has effectively been revoked, and so it removes that delegation irrespectively of whether or not it is valid and still in use. While this is "mostly harmless" for ordinary I/O, we've seen pNFS fail with LAYOUTGET spinning in an infinite loop while complaining that we're using an invalid stateid (in this case the all-zero stateid). What we rather want to do here is ensure that the delegation is always correctly marked as needing testing when that is the case. So we want to close the loophole offered by nfs4_schedule_stateid_recovery(), which marks the state as needing to be reclaimed, but not the delegation that may be backing it. Fixes: 0e3d3e5df07dc ("NFSv4.1 fix infinite loop on IO BAD_STATEID error") Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v4.11+ Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 10 +++++++--- fs/nfs/nfs4state.c | 2 ++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 77c7d29fcd3b8..a3b67d3b1dfb7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2533,14 +2533,18 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state) } nfs4_stateid_copy(&stateid, &delegation->stateid); - if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) || - !test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, - &delegation->flags)) { + if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) { rcu_read_unlock(); nfs_finish_clear_delegation_stateid(state, &stateid); return; } + if (!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, + &delegation->flags)) { + rcu_read_unlock(); + return; + } + cred = get_rpccred(delegation->cred); rcu_read_unlock(); status = nfs41_test_and_free_expired_stateid(server, &stateid, cred); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 45873ed92057b..e1d88bca815e1 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1354,6 +1354,8 @@ int nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_ if (!nfs4_state_mark_reclaim_nograce(clp, state)) return -EBADF; + nfs_inode_find_delegation_state_and_recover(state->inode, + &state->stateid); dprintk("%s: scheduling stateid recovery for server %s\n", __func__, clp->cl_hostname); nfs4_schedule_state_manager(clp); -- GitLab From 62e052895d299f32ddeac4e5049c542a50a45d3e Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 15 May 2018 23:32:45 +0100 Subject: [PATCH 0440/2269] binfmt_elf: Respect error return from `regset->active' [ Upstream commit 2f819db565e82e5f73cd42b39925098986693378 ] The regset API documented in defines -ENODEV as the result of the `->active' handler to be used where the feature requested is not available on the hardware found. However code handling core file note generation in `fill_thread_core_info' interpretes any non-zero result from the `->active' handler as the regset requested being active. Consequently processing continues (and hopefully gracefully fails later on) rather than being abandoned right away for the regset requested. Fix the problem then by making the code proceed only if a positive result is returned from the `->active' handler. Signed-off-by: Maciej W. Rozycki Signed-off-by: Paul Burton Fixes: 4206d3aa1978 ("elf core dump: notes user_regset") Patchwork: https://patchwork.linux-mips.org/patch/19332/ Cc: Alexander Viro Cc: James Hogan Cc: Ralf Baechle Cc: linux-fsdevel@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/binfmt_elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index c0e3f91e28e91..469666df91da2 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1725,7 +1725,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t, const struct user_regset *regset = &view->regsets[i]; do_thread_regset_writeback(t->task, regset); if (regset->core_note_type && regset->get && - (!regset->active || regset->active(t->task, regset))) { + (!regset->active || regset->active(t->task, regset) > 0)) { int ret; size_t size = regset->n * regset->size; void *data = kmalloc(size, GFP_KERNEL); -- GitLab From ebb42f77db4230624e1d63fe65d1ac5fb9e0c4e9 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Mon, 16 Jul 2018 18:35:34 -0700 Subject: [PATCH 0441/2269] net/mlx5: Add missing SET_DRIVER_VERSION command translation [ Upstream commit 0f4039104ee61e14ac4771a2181c2a20572f4ec9 ] When translating command opcodes to a string, SET_DRIVER_VERSION command was missing. Fixes: 42ca502e179d0 ('net/mlx5_core: Use a macro in mlx5_command_str()') Signed-off-by: Noa Osherovich Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index cf94fdf25155f..c7654209668bd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -449,6 +449,7 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(SET_HCA_CAP); MLX5_COMMAND_STR_CASE(QUERY_ISSI); MLX5_COMMAND_STR_CASE(SET_ISSI); + MLX5_COMMAND_STR_CASE(SET_DRIVER_VERSION); MLX5_COMMAND_STR_CASE(CREATE_MKEY); MLX5_COMMAND_STR_CASE(QUERY_MKEY); MLX5_COMMAND_STR_CASE(DESTROY_MKEY); -- GitLab From a41ab6fe169943cfef516ee5230ba8ce55118112 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 25 May 2018 11:10:06 +0530 Subject: [PATCH 0442/2269] arm64: dts: uniphier: Add missing cooling device properties for CPUs [ Upstream commit af0e09d0c6762e486b0eb5cc4737396964c34fad ] The cooling device properties, like "#cooling-cells" and "dynamic-power-coefficient", should either be present for all the CPUs of a cluster or none. If these are present only for a subset of CPUs of a cluster then things will start falling apart as soon as the CPUs are brought online in a different order. For example, this will happen because the operating system looks for such properties in the CPU node it is trying to bring up, so that it can register a cooling device. Add such missing properties. Signed-off-by: Viresh Kumar Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi index a29c279b6e8e4..dba6f0ff81063 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi @@ -55,6 +55,7 @@ clocks = <&sys_clk 32>; enable-method = "psci"; operating-points-v2 = <&cluster0_opp>; + #cooling-cells = <2>; }; cpu2: cpu@100 { @@ -73,6 +74,7 @@ clocks = <&sys_clk 33>; enable-method = "psci"; operating-points-v2 = <&cluster1_opp>; + #cooling-cells = <2>; }; }; -- GitLab From 67e522a76d99a0035150b4640295aab2c573d455 Mon Sep 17 00:00:00 2001 From: Ronny Chevalier Date: Wed, 11 Jul 2018 14:39:37 +0200 Subject: [PATCH 0443/2269] audit: fix use-after-free in audit_add_watch [ Upstream commit baa2a4fdd525c8c4b0f704d20457195b29437839 ] audit_add_watch stores locally krule->watch without taking a reference on watch. Then, it calls audit_add_to_parent, and uses the watch stored locally. Unfortunately, it is possible that audit_add_to_parent updates krule->watch. When it happens, it also drops a reference of watch which could free the watch. How to reproduce (with KASAN enabled): auditctl -w /etc/passwd -F success=0 -k test_passwd auditctl -w /etc/passwd -F success=1 -k test_passwd2 The second call to auditctl triggers the use-after-free, because audit_to_parent updates krule->watch to use a previous existing watch and drops the reference to the newly created watch. To fix the issue, we grab a reference of watch and we release it at the end of the function. Signed-off-by: Ronny Chevalier Reviewed-by: Richard Guy Briggs Signed-off-by: Paul Moore Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/audit_watch.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 9eb8b3511636e..4a98f6e314a9b 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -419,6 +419,13 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list) struct path parent_path; int h, ret = 0; + /* + * When we will be calling audit_add_to_parent, krule->watch might have + * been updated and watch might have been freed. + * So we need to keep a reference of watch. + */ + audit_get_watch(watch); + mutex_unlock(&audit_filter_mutex); /* Avoid calling path_lookup under audit_filter_mutex. */ @@ -427,8 +434,10 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list) /* caller expects mutex locked */ mutex_lock(&audit_filter_mutex); - if (ret) + if (ret) { + audit_put_watch(watch); return ret; + } /* either find an old parent or attach a new one */ parent = audit_find_parent(d_backing_inode(parent_path.dentry)); @@ -446,6 +455,7 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list) *list = &audit_inode_hash[h]; error: path_put(&parent_path); + audit_put_watch(watch); return ret; } -- GitLab From b888dba2e81d2b20d4896dcdebefb7e6d7e9697d Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Sat, 7 Jul 2018 05:37:22 +0200 Subject: [PATCH 0444/2269] mtdchar: fix overflows in adjustment of `count` [ Upstream commit 6c6bc9ea84d0008024606bf5ba10519e20d851bf ] The first checks in mtdchar_read() and mtdchar_write() attempt to limit `count` such that `*ppos + count <= mtd->size`. However, they ignore the possibility of `*ppos > mtd->size`, allowing the calculation of `count` to wrap around. `mtdchar_lseek()` prevents seeking beyond mtd->size, but the pread/pwrite syscalls bypass this. I haven't found any codepath on which this actually causes dangerous behavior, but it seems like a sensible change anyway. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jann Horn Signed-off-by: Boris Brezillon Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/mtdchar.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index b25f444c5914b..fa4d12217652b 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -160,8 +160,12 @@ static ssize_t mtdchar_read(struct file *file, char __user *buf, size_t count, pr_debug("MTD_read\n"); - if (*ppos + count > mtd->size) - count = mtd->size - *ppos; + if (*ppos + count > mtd->size) { + if (*ppos < mtd->size) + count = mtd->size - *ppos; + else + count = 0; + } if (!count) return 0; @@ -246,7 +250,7 @@ static ssize_t mtdchar_write(struct file *file, const char __user *buf, size_t c pr_debug("MTD_write\n"); - if (*ppos == mtd->size) + if (*ppos >= mtd->size) return -ENOSPC; if (*ppos + count > mtd->size) -- GitLab From 4f4374a9bd25b333971e6f2656b642d29e2efe7b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 18 Jul 2018 15:44:43 +0200 Subject: [PATCH 0445/2269] vfs: fix freeze protection in mnt_want_write_file() for overlayfs [ Upstream commit a6795a585929d94ca3e931bc8518f8deb8bbe627 ] The underlying real file used by overlayfs still contains the overlay path. This results in mnt_want_write_file() calls by the filesystem getting freeze protection on the wrong inode (the overlayfs one instead of the real one). Fix by using file_inode(file)->i_sb instead of file->f_path.mnt->mnt_sb. Reported-by: Amir Goldstein Signed-off-by: Miklos Szeredi Reviewed-by: Christoph Hellwig Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 9dc146e7b5e0a..3ee3ee5819bc1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -446,10 +446,10 @@ int mnt_want_write_file_path(struct file *file) { int ret; - sb_start_write(file->f_path.mnt->mnt_sb); + sb_start_write(file_inode(file)->i_sb); ret = __mnt_want_write_file(file); if (ret) - sb_end_write(file->f_path.mnt->mnt_sb); + sb_end_write(file_inode(file)->i_sb); return ret; } @@ -540,7 +540,8 @@ void __mnt_drop_write_file(struct file *file) void mnt_drop_write_file_path(struct file *file) { - mnt_drop_write(file->f_path.mnt); + __mnt_drop_write_file(file); + sb_end_write(file_inode(file)->i_sb); } void mnt_drop_write_file(struct file *file) -- GitLab From ef49d0e83711230b4a8e516c50953d3b549d8f36 Mon Sep 17 00:00:00 2001 From: Philipp Puschmann Date: Tue, 17 Jul 2018 13:41:12 +0200 Subject: [PATCH 0446/2269] Bluetooth: Use lock_sock_nested in bt_accept_enqueue [ Upstream commit b71c69c26b4916d11b8d403d8e667bbd191f1b8f ] Fixes this warning that was provoked by a pairing: [60258.016221] WARNING: possible recursive locking detected [60258.021558] 4.15.0-RD1812-BSP #1 Tainted: G O [60258.027146] -------------------------------------------- [60258.032464] kworker/u5:0/70 is trying to acquire lock: [60258.037609] (sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP){+.+.}, at: [<87759073>] bt_accept_enqueue+0x3c/0x74 [60258.046863] [60258.046863] but task is already holding lock: [60258.052704] (sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP){+.+.}, at: [] l2cap_sock_new_connection_cb+0x1c/0x88 [60258.062905] [60258.062905] other info that might help us debug this: [60258.069441] Possible unsafe locking scenario: [60258.069441] [60258.075368] CPU0 [60258.077821] ---- [60258.080272] lock(sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP); [60258.085510] lock(sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP); [60258.090748] [60258.090748] *** DEADLOCK *** [60258.090748] [60258.096676] May be due to missing lock nesting notation [60258.096676] [60258.103472] 5 locks held by kworker/u5:0/70: [60258.107747] #0: ((wq_completion)%shdev->name#2){+.+.}, at: [<9460d092>] process_one_work+0x130/0x4fc [60258.117263] #1: ((work_completion)(&hdev->rx_work)){+.+.}, at: [<9460d092>] process_one_work+0x130/0x4fc [60258.126942] #2: (&conn->chan_lock){+.+.}, at: [<7877c8c3>] l2cap_connect+0x80/0x4f8 [60258.134806] #3: (&chan->lock/2){+.+.}, at: [<2e16c724>] l2cap_connect+0x8c/0x4f8 [60258.142410] #4: (sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP){+.+.}, at: [] l2cap_sock_new_connection_cb+0x1c/0x88 [60258.153043] [60258.153043] stack backtrace: [60258.157413] CPU: 1 PID: 70 Comm: kworker/u5:0 Tainted: G O 4.15.0-RD1812-BSP #1 [60258.165945] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) [60258.172485] Workqueue: hci0 hci_rx_work [60258.176331] Backtrace: [60258.178797] [<8010c9fc>] (dump_backtrace) from [<8010ccbc>] (show_stack+0x18/0x1c) [60258.186379] r7:80e55fe4 r6:80e55fe4 r5:20050093 r4:00000000 [60258.192058] [<8010cca4>] (show_stack) from [<809864e8>] (dump_stack+0xb0/0xdc) [60258.199301] [<80986438>] (dump_stack) from [<8016ecc8>] (__lock_acquire+0xffc/0x11d4) [60258.207144] r9:5e2bb019 r8:630f974c r7:ba8a5940 r6:ba8a5ed8 r5:815b5220 r4:80fa081c [60258.214901] [<8016dccc>] (__lock_acquire) from [<8016f620>] (lock_acquire+0x78/0x98) [60258.222655] r10:00000040 r9:00000040 r8:808729f0 r7:00000001 r6:00000000 r5:60050013 [60258.230491] r4:00000000 [60258.233045] [<8016f5a8>] (lock_acquire) from [<806ee974>] (lock_sock_nested+0x64/0x88) [60258.240970] r7:00000000 r6:b796e870 r5:00000001 r4:b796e800 [60258.246643] [<806ee910>] (lock_sock_nested) from [<808729f0>] (bt_accept_enqueue+0x3c/0x74) [60258.255004] r8:00000001 r7:ba7d3c00 r6:ba7d3ea4 r5:ba7d2000 r4:b796e800 [60258.261717] [<808729b4>] (bt_accept_enqueue) from [<808aa39c>] (l2cap_sock_new_connection_cb+0x68/0x88) [60258.271117] r5:b796e800 r4:ba7d2000 [60258.274708] [<808aa334>] (l2cap_sock_new_connection_cb) from [<808a294c>] (l2cap_connect+0x190/0x4f8) [60258.283933] r5:00000001 r4:ba6dce00 [60258.287524] [<808a27bc>] (l2cap_connect) from [<808a4a14>] (l2cap_recv_frame+0x744/0x2cf8) [60258.295800] r10:ba6dcf24 r9:00000004 r8:b78d8014 r7:00000004 r6:bb05d000 r5:00000004 [60258.303635] r4:bb05d008 [60258.306183] [<808a42d0>] (l2cap_recv_frame) from [<808a7808>] (l2cap_recv_acldata+0x210/0x214) [60258.314805] r10:b78e7800 r9:bb05d960 r8:00000001 r7:bb05d000 r6:0000000c r5:b7957a80 [60258.322641] r4:ba6dce00 [60258.325188] [<808a75f8>] (l2cap_recv_acldata) from [<8087630c>] (hci_rx_work+0x35c/0x4e8) [60258.333374] r6:80e5743c r5:bb05d7c8 r4:b7957a80 [60258.338004] [<80875fb0>] (hci_rx_work) from [<8013dc7c>] (process_one_work+0x1a4/0x4fc) [60258.346018] r10:00000001 r9:00000000 r8:baabfef8 r7:ba997500 r6:baaba800 r5:baaa5d00 [60258.353853] r4:bb05d7c8 [60258.356401] [<8013dad8>] (process_one_work) from [<8013e028>] (worker_thread+0x54/0x5cc) [60258.364503] r10:baabe038 r9:baaba834 r8:80e05900 r7:00000088 r6:baaa5d18 r5:baaba800 [60258.372338] r4:baaa5d00 [60258.374888] [<8013dfd4>] (worker_thread) from [<801448f8>] (kthread+0x134/0x160) [60258.382295] r10:ba8310b8 r9:bb07dbfc r8:8013dfd4 r7:baaa5d00 r6:00000000 r5:baaa8ac0 [60258.390130] r4:ba831080 [60258.392682] [<801447c4>] (kthread) from [<801080b4>] (ret_from_fork+0x14/0x20) [60258.399915] r10:00000000 r9:00000000 r8:00000000 r7:00000000 r6:00000000 r5:801447c4 [60258.407751] r4:baaa8ac0 r3:baabe000 Signed-off-by: Philipp Puschmann Signed-off-by: Marcel Holtmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/af_bluetooth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 91e3ba2807064..583951e82ceed 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -159,7 +159,7 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk) BT_DBG("parent %p, sk %p", parent, sk); sock_hold(sk); - lock_sock(sk); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); list_add_tail(&bt_sk(sk)->accept_q, &bt_sk(parent)->accept_q); bt_sk(sk)->parent = parent; release_sock(sk); -- GitLab From c818695c71068a30580064fc65fea51e074f57bf Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Fri, 8 Jun 2018 14:57:42 -0700 Subject: [PATCH 0447/2269] evm: Don't deadlock if a crypto algorithm is unavailable [ Upstream commit e2861fa71641c6414831d628a1f4f793b6562580 ] When EVM attempts to appraise a file signed with a crypto algorithm the kernel doesn't have support for, it will cause the kernel to trigger a module load. If the EVM policy includes appraisal of kernel modules this will in turn call back into EVM - since EVM is holding a lock until the crypto initialisation is complete, this triggers a deadlock. Add a CRYPTO_NOLOAD flag and skip module loading if it's set, and add that flag in the EVM case in order to fail gracefully with an error message instead of deadlocking. Signed-off-by: Matthew Garrett Acked-by: Herbert Xu Signed-off-by: Mimi Zohar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- crypto/api.c | 2 +- include/linux/crypto.h | 5 +++++ security/integrity/evm/evm_crypto.c | 3 ++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/crypto/api.c b/crypto/api.c index 941cd4c6c7ecb..e485aed11ad09 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -215,7 +215,7 @@ struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask) mask &= ~(CRYPTO_ALG_LARVAL | CRYPTO_ALG_DEAD); alg = crypto_alg_lookup(name, type, mask); - if (!alg) { + if (!alg && !(mask & CRYPTO_NOLOAD)) { request_module("crypto-%s", name); if (!((type ^ CRYPTO_ALG_NEED_FALLBACK) & mask & diff --git a/include/linux/crypto.h b/include/linux/crypto.h index cc36484d29e16..de96913306cbb 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -111,6 +111,11 @@ */ #define CRYPTO_ALG_OPTIONAL_KEY 0x00004000 +/* + * Don't trigger module loading + */ +#define CRYPTO_NOLOAD 0x00008000 + /* * Transform masks and values (for crt_flags). */ diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c index 1d32cd20009a3..ee9c3de5065ab 100644 --- a/security/integrity/evm/evm_crypto.c +++ b/security/integrity/evm/evm_crypto.c @@ -94,7 +94,8 @@ static struct shash_desc *init_desc(char type) mutex_lock(&mutex); if (*tfm) goto out; - *tfm = crypto_alloc_shash(algo, 0, CRYPTO_ALG_ASYNC); + *tfm = crypto_alloc_shash(algo, 0, + CRYPTO_ALG_ASYNC | CRYPTO_NOLOAD); if (IS_ERR(*tfm)) { rc = PTR_ERR(*tfm); pr_err("Can not allocate %s (reason: %ld)\n", algo, rc); -- GitLab From bdf948eaa5593f82bb3345e0432a5671091f487d Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Sat, 7 Jul 2018 08:53:07 +0200 Subject: [PATCH 0448/2269] KVM: PPC: Book3S HV: Add of_node_put() in success path [ Upstream commit 51eaa08f029c7343df846325d7cf047be8b96e81 ] The call to of_find_compatible_node() is returning a pointer with incremented refcount so it must be explicitly decremented after the last use. As here it is only being used for checking of node presence but the result is not actually used in the success path it can be dropped immediately. Signed-off-by: Nicholas Mc Guire Fixes: commit f725758b899f ("KVM: PPC: Book3S HV: Use OPAL XICS emulation on POWER9") Signed-off-by: Paul Mackerras Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kvm/book3s_hv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 377d1420bd024..58746328b9bd6 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4356,6 +4356,8 @@ static int kvmppc_book3s_init_hv(void) pr_err("KVM-HV: Cannot determine method for accessing XICS\n"); return -ENODEV; } + /* presence of intc confirmed - node can be dropped again */ + of_node_put(np); } #endif -- GitLab From 55bdb77aa991c2dfd107c4805e334ad571f73a4f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 17 Jul 2018 10:36:04 -0700 Subject: [PATCH 0449/2269] security: check for kstrdup() failure in lsm_append() [ Upstream commit 87ea58433208d17295e200d56be5e2a4fe4ce7d6 ] lsm_append() should return -ENOMEM if memory allocation failed. Fixes: d69dece5f5b6 ("LSM: Add /sys/kernel/security/lsm") Signed-off-by: Eric Biggers Signed-off-by: James Morris Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- security/security.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/security/security.c b/security/security.c index 4bf0f571b4ef9..95a1a0f52880c 100644 --- a/security/security.c +++ b/security/security.c @@ -111,6 +111,8 @@ static int lsm_append(char *new, char **result) if (*result == NULL) { *result = kstrdup(new, GFP_KERNEL); + if (*result == NULL) + return -ENOMEM; } else { /* Check if it is the last registered name */ if (match_last_lsm(*result, new)) -- GitLab From c2bd54bc222050856992e011e3a45b03e07cb647 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 16 Jul 2018 08:26:36 -0700 Subject: [PATCH 0450/2269] MIPS: loongson64: cs5536: Fix PCI_OHCI_INT_REG reads [ Upstream commit cd87668d601f622e0ebcfea4f78d116d5f572f4d ] The PCI_OHCI_INT_REG case in pci_ohci_read_reg() contains the following if statement: if ((lo & 0x00000f00) == CS5536_USB_INTR) CS5536_USB_INTR expands to the constant 11, which gives us the following condition which can never evaluate true: if ((lo & 0xf00) == 11) At least when using GCC 8.1.0 this falls foul of the tautoligcal-compare warning, and since the code is built with the -Werror flag the build fails. Fix this by shifting lo right by 8 bits in order to match the corresponding PCI_OHCI_INT_REG case in pci_ohci_write_reg(). Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/19861/ Cc: Huacai Chen Cc: James Hogan Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/mips/loongson64/common/cs5536/cs5536_ohci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/loongson64/common/cs5536/cs5536_ohci.c b/arch/mips/loongson64/common/cs5536/cs5536_ohci.c index f7c905e50dc41..92dc6bafc1271 100644 --- a/arch/mips/loongson64/common/cs5536/cs5536_ohci.c +++ b/arch/mips/loongson64/common/cs5536/cs5536_ohci.c @@ -138,7 +138,7 @@ u32 pci_ohci_read_reg(int reg) break; case PCI_OHCI_INT_REG: _rdmsr(DIVIL_MSR_REG(PIC_YSEL_LOW), &hi, &lo); - if ((lo & 0x00000f00) == CS5536_USB_INTR) + if (((lo >> PIC_YSEL_LOW_USB_SHIFT) & 0xf) == CS5536_USB_INTR) conf_data = 1; break; default: -- GitLab From bc87baee19a707b5edfed111dc1a7e6d2ac1e3a5 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 15 Jul 2018 18:16:17 -0500 Subject: [PATCH 0451/2269] configfs: fix registered group removal [ Upstream commit cc57c07343bd071cdf1915a91a24ab7d40c9b590 ] This patch fixes a bug where configfs_register_group had added a group in a tree, and userspace has done a rmdir on a dir somewhere above that group and we hit a kernel crash. The problem is configfs_rmdir will detach everything under it and unlink groups on the default_groups list. It will not unlink groups added with configfs_register_group so when configfs_unregister_group is called to drop its references to the group/items we crash when we try to access the freed dentrys. The patch just adds a check for if a rmdir has been done above us and if so just does the unlink part of unregistration. Sorry if you are getting this multiple times. I thouhgt I sent this to some of you and lkml, but I do not see it. Signed-off-by: Mike Christie Cc: Christoph Hellwig Cc: Joel Becker Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/configfs/dir.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 56fb26127fef2..d2a1a79fa324b 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1777,6 +1777,16 @@ void configfs_unregister_group(struct config_group *group) struct dentry *dentry = group->cg_item.ci_dentry; struct dentry *parent = group->cg_item.ci_parent->ci_dentry; + mutex_lock(&subsys->su_mutex); + if (!group->cg_item.ci_parent->ci_group) { + /* + * The parent has already been unlinked and detached + * due to a rmdir. + */ + goto unlink_group; + } + mutex_unlock(&subsys->su_mutex); + inode_lock_nested(d_inode(parent), I_MUTEX_PARENT); spin_lock(&configfs_dirent_lock); configfs_detach_prep(dentry, NULL); @@ -1791,6 +1801,7 @@ void configfs_unregister_group(struct config_group *group) dput(dentry); mutex_lock(&subsys->su_mutex); +unlink_group: unlink_group(group); mutex_unlock(&subsys->su_mutex); } -- GitLab From 93cc60d04bed988b78bf50178ffaae9e89aa8499 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 5 Jul 2018 02:10:17 -0700 Subject: [PATCH 0452/2269] pinctrl: rza1: Fix selector use for groups and functions [ Upstream commit dc4003d260594aa300028c3c5d040c5719abd19b ] We must use a mutex around the generic_add functions and save the function and group selector in case we need to remove them. Otherwise the selector use will be racy for deferred probe at least. Fixes: 5a49b644b307 ("pinctrl: Renesas RZ/A1 pin and gpio controller") Reported-by: H. Nikolaus Schaller Cc: Christ van Willegen Cc: Haojian Zhuang Cc: Paul Cercueil Cc: Sean Wang Acked-by: Jacopo Mondi Signed-off-by: Tony Lindgren Tested-By: H. Nikolaus Schaller Reviewed-by: Andy Shevchenko Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-rza1.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/pinctrl/pinctrl-rza1.c b/drivers/pinctrl/pinctrl-rza1.c index 04d058706b808..a865dc56a491b 100644 --- a/drivers/pinctrl/pinctrl-rza1.c +++ b/drivers/pinctrl/pinctrl-rza1.c @@ -878,6 +878,7 @@ static int rza1_dt_node_to_map(struct pinctrl_dev *pctldev, const char *grpname; const char **fngrps; int ret, npins; + int gsel, fsel; npins = rza1_dt_node_pin_count(np); if (npins < 0) { @@ -927,18 +928,19 @@ static int rza1_dt_node_to_map(struct pinctrl_dev *pctldev, fngrps[0] = grpname; mutex_lock(&rza1_pctl->mutex); - ret = pinctrl_generic_add_group(pctldev, grpname, grpins, npins, - NULL); - if (ret) { + gsel = pinctrl_generic_add_group(pctldev, grpname, grpins, npins, + NULL); + if (gsel < 0) { mutex_unlock(&rza1_pctl->mutex); - return ret; + return gsel; } - ret = pinmux_generic_add_function(pctldev, grpname, fngrps, 1, - mux_confs); - if (ret) + fsel = pinmux_generic_add_function(pctldev, grpname, fngrps, 1, + mux_confs); + if (fsel < 0) { + ret = fsel; goto remove_group; - mutex_unlock(&rza1_pctl->mutex); + } dev_info(rza1_pctl->dev, "Parsed function and group %s with %d pins\n", grpname, npins); @@ -955,15 +957,15 @@ static int rza1_dt_node_to_map(struct pinctrl_dev *pctldev, (*map)->data.mux.group = np->name; (*map)->data.mux.function = np->name; *num_maps = 1; + mutex_unlock(&rza1_pctl->mutex); return 0; remove_function: - mutex_lock(&rza1_pctl->mutex); - pinmux_generic_remove_last_function(pctldev); + pinmux_generic_remove_function(pctldev, fsel); remove_group: - pinctrl_generic_remove_last_group(pctldev); + pinctrl_generic_remove_group(pctldev, gsel); mutex_unlock(&rza1_pctl->mutex); dev_info(rza1_pctl->dev, "Unable to parse function and group %s\n", -- GitLab From 9b85641c204b313f0dfe9b2a5969ec12c80d6669 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Mon, 16 Jul 2018 11:06:01 -0700 Subject: [PATCH 0453/2269] sched/core: Use smp_mb() in wake_woken_function() [ Upstream commit 76e079fefc8f62bd9b2cd2950814d1ee806e31a5 ] wake_woken_function() synchronizes with wait_woken() as follows: [wait_woken] [wake_woken_function] entry->flags &= ~wq_flag_woken; condition = true; smp_mb(); smp_wmb(); if (condition) wq_entry->flags |= wq_flag_woken; break; This commit replaces the above smp_wmb() with an smp_mb() in order to guarantee that either wait_woken() sees the wait condition being true or the store to wq_entry->flags in woken_wake_function() follows the store in wait_woken() in the coherence order (so that the former can eventually be observed by wait_woken()). The commit also fixes a comment associated to set_current_state() in wait_woken(): the comment pairs the barrier in set_current_state() to the above smp_wmb(), while the actual pairing involves the barrier in set_current_state() and the barrier executed by the try_to_wake_up() in wake_woken_function(). Signed-off-by: Andrea Parri Signed-off-by: Paul E. McKenney Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akiyks@gmail.com Cc: boqun.feng@gmail.com Cc: dhowells@redhat.com Cc: j.alglave@ucl.ac.uk Cc: linux-arch@vger.kernel.org Cc: luc.maranget@inria.fr Cc: npiggin@gmail.com Cc: parri.andrea@gmail.com Cc: stern@rowland.harvard.edu Cc: will.deacon@arm.com Link: http://lkml.kernel.org/r/20180716180605.16115-10-paulmck@linux.vnet.ibm.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/sched/wait.c | 47 ++++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 929ecb7d6b78a..e296084643821 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -395,35 +395,36 @@ static inline bool is_kthread_should_stop(void) * if (condition) * break; * - * p->state = mode; condition = true; - * smp_mb(); // A smp_wmb(); // C - * if (!wq_entry->flags & WQ_FLAG_WOKEN) wq_entry->flags |= WQ_FLAG_WOKEN; - * schedule() try_to_wake_up(); - * p->state = TASK_RUNNING; ~~~~~~~~~~~~~~~~~~ - * wq_entry->flags &= ~WQ_FLAG_WOKEN; condition = true; - * smp_mb() // B smp_wmb(); // C - * wq_entry->flags |= WQ_FLAG_WOKEN; - * } - * remove_wait_queue(&wq_head, &wait); + * // in wait_woken() // in woken_wake_function() * + * p->state = mode; wq_entry->flags |= WQ_FLAG_WOKEN; + * smp_mb(); // A try_to_wake_up(): + * if (!(wq_entry->flags & WQ_FLAG_WOKEN)) + * schedule() if (p->state & mode) + * p->state = TASK_RUNNING; p->state = TASK_RUNNING; + * wq_entry->flags &= ~WQ_FLAG_WOKEN; ~~~~~~~~~~~~~~~~~~ + * smp_mb(); // B condition = true; + * } smp_mb(); // C + * remove_wait_queue(&wq_head, &wait); wq_entry->flags |= WQ_FLAG_WOKEN; */ long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout) { - set_current_state(mode); /* A */ /* - * The above implies an smp_mb(), which matches with the smp_wmb() from - * woken_wake_function() such that if we observe WQ_FLAG_WOKEN we must - * also observe all state before the wakeup. + * The below executes an smp_mb(), which matches with the full barrier + * executed by the try_to_wake_up() in woken_wake_function() such that + * either we see the store to wq_entry->flags in woken_wake_function() + * or woken_wake_function() sees our store to current->state. */ + set_current_state(mode); /* A */ if (!(wq_entry->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop()) timeout = schedule_timeout(timeout); __set_current_state(TASK_RUNNING); /* - * The below implies an smp_mb(), it too pairs with the smp_wmb() from - * woken_wake_function() such that we must either observe the wait - * condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss - * an event. + * The below executes an smp_mb(), which matches with the smp_mb() (C) + * in woken_wake_function() such that either we see the wait condition + * being true or the store to wq_entry->flags in woken_wake_function() + * follows ours in the coherence order. */ smp_store_mb(wq_entry->flags, wq_entry->flags & ~WQ_FLAG_WOKEN); /* B */ @@ -433,14 +434,8 @@ EXPORT_SYMBOL(wait_woken); int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key) { - /* - * Although this function is called under waitqueue lock, LOCK - * doesn't imply write barrier and the users expects write - * barrier semantics on wakeup functions. The following - * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up() - * and is paired with smp_store_mb() in wait_woken(). - */ - smp_wmb(); /* C */ + /* Pairs with the smp_store_mb() in wait_woken(). */ + smp_mb(); /* C */ wq_entry->flags |= WQ_FLAG_WOKEN; return default_wake_function(wq_entry, mode, sync, key); -- GitLab From 7fd683812e48ef8c52431adf6fc9eb14011a1abb Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 16 Jul 2018 23:25:07 +0800 Subject: [PATCH 0454/2269] efi/esrt: Only call efi_mem_reserve() for boot services memory [ Upstream commit 61f0d55569463a1af897117ff47d202b0ccb2e24 ] The following commit: 7e1550b8f208 ("efi: Drop type and attribute checks in efi_mem_desc_lookup()") refactored the implementation of efi_mem_desc_lookup() so that the type check is moved to the callers, one of which is the x86 version of efi_arch_mem_reserve(), where we added a modified check that only takes EFI_BOOT_SERVICES_DATA regions into account. This is reasonable, since it is the only memory type that requires this, but doing so uncovered some unexpected behavior in the ESRT code, which permits the ESRT table to reside in other types of memory than what the UEFI spec mandates (i.e., EFI_BOOT_SERVICES_DATA), and unconditionally calls efi_mem_reserve() on the region in question. This may result in errors such as esrt: Reserving ESRT space from 0x000000009c810318 to 0x000000009c810350. efi: Failed to lookup EFI memory descriptor for 0x000000009c810318 when the ESRT table is not in EFI_BOOT_SERVICES_DATA memory, but we try to reserve it nonetheless. So make the call to efi_mem_reserve() conditional on the memory type. Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Jones Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/esrt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c index c47e0c6ec00f8..f3c28777b8c6f 100644 --- a/drivers/firmware/efi/esrt.c +++ b/drivers/firmware/efi/esrt.c @@ -333,7 +333,8 @@ void __init efi_esrt_init(void) end = esrt_data + size; pr_info("Reserving ESRT space from %pa to %pa.\n", &esrt_data, &end); - efi_mem_reserve(esrt_data, esrt_data_size); + if (md.type == EFI_BOOT_SERVICES_DATA) + efi_mem_reserve(esrt_data, esrt_data_size); pr_debug("esrt-init: loaded.\n"); err_memunmap: -- GitLab From 24dbc773dd962d795739df2c29718e993587ac40 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Thu, 12 Jul 2018 11:28:24 +0200 Subject: [PATCH 0455/2269] ARM: hisi: handle of_iomap and fix missing of_node_put [ Upstream commit d396cb185c0337aae5664b250cdd9a73f6eb1503 ] Relying on an unchecked of_iomap() which can return NULL is problematic here, an explicit check seems mandatory. Also the call to of_find_compatible_node() returns a device node with refcount incremented therefor an explicit of_node_put() is needed here. Signed-off-by: Nicholas Mc Guire Fixes: commit 22bae4290457 ("ARM: hi3xxx: add hotplug support") Signed-off-by: Wei Xu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-hisi/hotplug.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/arm/mach-hisi/hotplug.c b/arch/arm/mach-hisi/hotplug.c index a129aae726028..3f28c9141b488 100644 --- a/arch/arm/mach-hisi/hotplug.c +++ b/arch/arm/mach-hisi/hotplug.c @@ -148,13 +148,20 @@ static int hi3xxx_hotplug_init(void) struct device_node *node; node = of_find_compatible_node(NULL, NULL, "hisilicon,sysctrl"); - if (node) { - ctrl_base = of_iomap(node, 0); - id = HI3620_CTRL; - return 0; + if (!node) { + id = ERROR_CTRL; + return -ENOENT; } - id = ERROR_CTRL; - return -ENOENT; + + ctrl_base = of_iomap(node, 0); + of_node_put(node); + if (!ctrl_base) { + id = ERROR_CTRL; + return -ENOMEM; + } + + id = HI3620_CTRL; + return 0; } void hi3xxx_set_cpu(int cpu, bool enable) -- GitLab From e1912dc20ca6fde09108256aedcd194c953da342 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Thu, 12 Jul 2018 11:28:22 +0200 Subject: [PATCH 0456/2269] ARM: hisi: fix error handling and missing of_node_put [ Upstream commit 9f30b5ae0585ca5234fe979294b8f897299dec99 ] of_iomap() can return NULL which seems critical here and thus should be explicitly flagged so that the cause of system halting can be understood. As of_find_compatible_node() is returning a device node with refcount incremented it must be explicitly decremented here. Signed-off-by: Nicholas Mc Guire Fixes: commit 7fda91e73155 ("ARM: hisi: enable smp for HiP01") Signed-off-by: Wei Xu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-hisi/hotplug.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-hisi/hotplug.c b/arch/arm/mach-hisi/hotplug.c index 3f28c9141b488..32870560b2806 100644 --- a/arch/arm/mach-hisi/hotplug.c +++ b/arch/arm/mach-hisi/hotplug.c @@ -226,10 +226,10 @@ void hip01_set_cpu(int cpu, bool enable) if (!ctrl_base) { np = of_find_compatible_node(NULL, NULL, "hisilicon,hip01-sysctrl"); - if (np) - ctrl_base = of_iomap(np, 0); - else - BUG(); + BUG_ON(!np); + ctrl_base = of_iomap(np, 0); + of_node_put(np); + BUG_ON(!ctrl_base); } if (enable) { -- GitLab From 6c4abbeb250948f916dce05e972861b263531008 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Thu, 12 Jul 2018 11:28:23 +0200 Subject: [PATCH 0457/2269] ARM: hisi: check of_iomap and fix missing of_node_put [ Upstream commit 81646a3d39ef14749301374a3a0b8311384cd412 ] of_find_compatible_node() returns a device node with refcount incremented and thus needs an explicit of_node_put(). Further relying on an unchecked of_iomap() which can return NULL is problematic here, after all ctrl_base is critical enough for hix5hd2_set_cpu() to call BUG() if not available so a check seems mandated here. Signed-off-by: Nicholas Mc Guire 0002 Fixes: commit 06cc5c1d4d73 ("ARM: hisi: enable hix5hd2 SoC") Signed-off-by: Wei Xu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-hisi/hotplug.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/arm/mach-hisi/hotplug.c b/arch/arm/mach-hisi/hotplug.c index 32870560b2806..909bb24937812 100644 --- a/arch/arm/mach-hisi/hotplug.c +++ b/arch/arm/mach-hisi/hotplug.c @@ -180,11 +180,15 @@ static bool hix5hd2_hotplug_init(void) struct device_node *np; np = of_find_compatible_node(NULL, NULL, "hisilicon,cpuctrl"); - if (np) { - ctrl_base = of_iomap(np, 0); - return true; - } - return false; + if (!np) + return false; + + ctrl_base = of_iomap(np, 0); + of_node_put(np); + if (!ctrl_base) + return false; + + return true; } void hix5hd2_set_cpu(int cpu, bool enable) -- GitLab From 7ec40f0ab2582b91f1e943544e06e80f016871d2 Mon Sep 17 00:00:00 2001 From: Rick Farrington Date: Fri, 13 Jul 2018 12:50:21 -0700 Subject: [PATCH 0458/2269] liquidio: fix hang when re-binding VF host drv after running DPDK VF driver [ Upstream commit ac13d6d8eaded15c67265eafc32f439ea3a0ac4a ] When configuring SLI_PKTn_OUTPUT_CONTROL, VF driver was assuming that IPTR mode was disabled by reset, which was not true. Since DPDK driver had set IPTR mode previously, the VF driver (which uses buf-ptr-only mode) was not properly handling DROQ packets (i.e. it saw zero-length packets). This represented an invalid hardware configuration which the driver could not handle. Signed-off-by: Rick Farrington Signed-off-by: Felix Manlunas Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c | 3 +++ drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c index e8b290473ee25..2e089b5ff8f32 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c @@ -493,6 +493,9 @@ static void cn23xx_pf_setup_global_output_regs(struct octeon_device *oct) for (q_no = srn; q_no < ern; q_no++) { reg_val = octeon_read_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(q_no)); + /* clear IPTR */ + reg_val &= ~CN23XX_PKT_OUTPUT_CTL_IPTR; + /* set DPTR */ reg_val |= CN23XX_PKT_OUTPUT_CTL_DPTR; diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c index 9338a00083788..1f8b7f6512540 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c @@ -165,6 +165,9 @@ static void cn23xx_vf_setup_global_output_regs(struct octeon_device *oct) reg_val = octeon_read_csr(oct, CN23XX_VF_SLI_OQ_PKT_CONTROL(q_no)); + /* clear IPTR */ + reg_val &= ~CN23XX_PKT_OUTPUT_CTL_IPTR; + /* set DPTR */ reg_val |= CN23XX_PKT_OUTPUT_CTL_DPTR; -- GitLab From ed3151e4221452a77305e28ba1728db84bc08429 Mon Sep 17 00:00:00 2001 From: Enrico Scholz Date: Thu, 3 May 2018 18:29:36 +0200 Subject: [PATCH 0459/2269] gpu: ipu-v3: csi: pass back mbus_code_to_bus_cfg error codes [ Upstream commit d36d0e6309dd8137cf438cbb680e72eb63c81425 ] mbus_code_to_bus_cfg() can fail on unknown mbus codes; pass back the error to the caller. Signed-off-by: Enrico Scholz Signed-off-by: Jan Luebbe [p.zabel@pengutronix.de - renamed rc to ret for consistency] Signed-off-by: Philipp Zabel Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/ipu-v3/ipu-csi.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-csi.c b/drivers/gpu/ipu-v3/ipu-csi.c index 24e12b87a0cbe..2bc51d4d3f1e6 100644 --- a/drivers/gpu/ipu-v3/ipu-csi.c +++ b/drivers/gpu/ipu-v3/ipu-csi.c @@ -316,13 +316,17 @@ static int mbus_code_to_bus_cfg(struct ipu_csi_bus_config *cfg, u32 mbus_code) /* * Fill a CSI bus config struct from mbus_config and mbus_framefmt. */ -static void fill_csi_bus_cfg(struct ipu_csi_bus_config *csicfg, +static int fill_csi_bus_cfg(struct ipu_csi_bus_config *csicfg, struct v4l2_mbus_config *mbus_cfg, struct v4l2_mbus_framefmt *mbus_fmt) { + int ret; + memset(csicfg, 0, sizeof(*csicfg)); - mbus_code_to_bus_cfg(csicfg, mbus_fmt->code); + ret = mbus_code_to_bus_cfg(csicfg, mbus_fmt->code); + if (ret < 0) + return ret; switch (mbus_cfg->type) { case V4L2_MBUS_PARALLEL: @@ -353,6 +357,8 @@ static void fill_csi_bus_cfg(struct ipu_csi_bus_config *csicfg, /* will never get here, keep compiler quiet */ break; } + + return 0; } int ipu_csi_init_interface(struct ipu_csi *csi, @@ -362,8 +368,11 @@ int ipu_csi_init_interface(struct ipu_csi *csi, struct ipu_csi_bus_config cfg; unsigned long flags; u32 width, height, data = 0; + int ret; - fill_csi_bus_cfg(&cfg, mbus_cfg, mbus_fmt); + ret = fill_csi_bus_cfg(&cfg, mbus_cfg, mbus_fmt); + if (ret < 0) + return ret; /* set default sensor frame width and height */ width = mbus_fmt->width; @@ -584,11 +593,14 @@ int ipu_csi_set_mipi_datatype(struct ipu_csi *csi, u32 vc, struct ipu_csi_bus_config cfg; unsigned long flags; u32 temp; + int ret; if (vc > 3) return -EINVAL; - mbus_code_to_bus_cfg(&cfg, mbus_fmt->code); + ret = mbus_code_to_bus_cfg(&cfg, mbus_fmt->code); + if (ret < 0) + return ret; spin_lock_irqsave(&csi->lock, flags); -- GitLab From 3c11fe1edaff491fd43fc9270282e04981621d99 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sun, 15 Jul 2018 15:39:34 +0200 Subject: [PATCH 0460/2269] tty: fix termios input-speed encoding when using BOTHER [ Upstream commit 1cee38f0363a88db374e50b232ca17b9a4c12fa0 ] When the termios CIBAUD bits are left unset (i.e. B0), we use the same output and input speed and should leave CIBAUD unchanged. When the user requests a rate using BOTHER and c_ospeed which the driver cannot set exactly, the driver can report back the actual baud rate using tty_termios_encode_baud_rate(). If this rate is close enough to a standard rate however, we could end up setting CIBAUD to a Bfoo value despite the user having left it unset. This in turn could lead to an unexpected input rate being set on subsequent termios updates. Fix this by using a zero tolerance value also for the input rate when CIBAUD is clear so that the matching logic works as expected. Fixes: 78137e3b34e1 ("[PATCH] tty: improve encode_baud_rate logic") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_baudrate.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/tty/tty_baudrate.c b/drivers/tty/tty_baudrate.c index 5c33fd25676d5..8457bb941e1ad 100644 --- a/drivers/tty/tty_baudrate.c +++ b/drivers/tty/tty_baudrate.c @@ -156,16 +156,20 @@ void tty_termios_encode_baud_rate(struct ktermios *termios, termios->c_ospeed = obaud; #ifdef BOTHER + if ((termios->c_cflag >> IBSHIFT) & CBAUD) + ibinput = 1; /* An input speed was specified */ + /* If the user asked for a precise weird speed give a precise weird answer. If they asked for a Bfoo speed they may have problems digesting non-exact replies so fuzz a bit */ - if ((termios->c_cflag & CBAUD) == BOTHER) + if ((termios->c_cflag & CBAUD) == BOTHER) { oclose = 0; + if (!ibinput) + iclose = 0; + } if (((termios->c_cflag >> IBSHIFT) & CBAUD) == BOTHER) iclose = 0; - if ((termios->c_cflag >> IBSHIFT) & CBAUD) - ibinput = 1; /* An input speed was specified */ #endif termios->c_cflag &= ~CBAUD; -- GitLab From 49c90d012ab47625e0b41a1d8d49b3ce1ca8a5a7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sun, 15 Jul 2018 15:39:33 +0200 Subject: [PATCH 0461/2269] tty: fix termios input-speed encoding [ Upstream commit fada18c48d774b9e837928ecdce6a5d5fdd11ee7 ] Make sure to clear the CIBAUD bits before OR-ing the new mask when encoding the termios input baud rate. This could otherwise lead to an incorrect input rate being reported back and incidentally set on subsequent termios updates. Fixes: edc6afc54968 ("[PATCH] tty: switch to ktermios and new framework") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_baudrate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/tty_baudrate.c b/drivers/tty/tty_baudrate.c index 8457bb941e1ad..ebc797fc1afd3 100644 --- a/drivers/tty/tty_baudrate.c +++ b/drivers/tty/tty_baudrate.c @@ -172,6 +172,9 @@ void tty_termios_encode_baud_rate(struct ktermios *termios, iclose = 0; #endif termios->c_cflag &= ~CBAUD; +#ifdef IBSHIFT + termios->c_cflag &= ~(CBAUD << IBSHIFT); +#endif /* * Our goal is to find a close match to the standard baud rate -- GitLab From 58d402738d9fd6f9fbb779dd8e44be803f2f6cb1 Mon Sep 17 00:00:00 2001 From: Laurentiu Tudor Date: Wed, 4 Jul 2018 14:34:20 +0300 Subject: [PATCH 0462/2269] mmc: sdhci-of-esdhc: set proper dma mask for ls104x chips [ Upstream commit 5552d7ad596c3fea953f40fef74170ce0760c04d ] SDHCI controller in ls1043a and ls1046a generate 40-bit wide addresses when doing DMA. Make sure that the corresponding dma mask is correctly configured. Context: when enabling smmu on these chips the following problem is encountered: the smmu input address size is 48 bits so the dma mappings for sdhci end up 48-bit wide. However, on these chips sdhci only use 40-bits of that address size when doing dma. So you end up with a 48-bit address translation in smmu but the device generates transactions with clipped 40-bit addresses, thus smmu context faults are triggered. Setting up the correct dma mask fixes this situation. Signed-off-by: Laurentiu Tudor Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-of-esdhc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index 4ffa6b173a21e..8332f56e6c0da 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include "sdhci-pltfm.h" #include "sdhci-esdhc.h" @@ -427,6 +428,11 @@ static void esdhc_of_adma_workaround(struct sdhci_host *host, u32 intmask) static int esdhc_of_enable_dma(struct sdhci_host *host) { u32 value; + struct device *dev = mmc_dev(host->mmc); + + if (of_device_is_compatible(dev->of_node, "fsl,ls1043a-esdhc") || + of_device_is_compatible(dev->of_node, "fsl,ls1046a-esdhc")) + dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40)); value = sdhci_readl(host, ESDHC_DMA_SYSCTL); value |= ESDHC_DMA_SNOOP; -- GitLab From 26bea7e6d2cabf0e3a71f577640dbd78b2715ac7 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Thu, 12 Jul 2018 09:39:02 +0200 Subject: [PATCH 0463/2269] mmc: tegra: prevent HS200 on Tegra 3 [ Upstream commit 127407e36f4fe3a1d5e8b9998b479956ce83a7dc ] The stack assumes that SDHC controller which support SD3.0 (SDR104) do support HS200. This is not the case for Tegra 3, which does support SD 3.0 but only supports eMMC spec 4.41. Use SDHCI_QUIRK2_BROKEN_HS200 to indicate that the controller does not support HS200. Note that commit 156e14b126ff ("mmc: sdhci: fix caps2 for HS200") added the tie between SD3.0 (SDR104) and HS200. I don't think that this is necessarly true. It is fully legitimate to support SD3.0 and not support HS200. The quirk naming suggests something is broken in the controller, but this is not the case: The controller simply does not support HS200. Fixes: 7ad2ed1dfcbe ("mmc: tegra: enable UHS-I modes") Signed-off-by: Stefan Agner Tested-by: Marcel Ziswiler Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-tegra.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index 0cd6fa80db662..ce3f344d2b66e 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -334,7 +334,8 @@ static const struct sdhci_pltfm_data sdhci_tegra30_pdata = { SDHCI_QUIRK_NO_HISPD_BIT | SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC | SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN, - .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN, + .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN | + SDHCI_QUIRK2_BROKEN_HS200, .ops = &tegra_sdhci_ops, }; -- GitLab From 636a9a7d52219fcf4364044a71947e3eebc2ac10 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Thu, 5 Jul 2018 14:18:19 +0200 Subject: [PATCH 0464/2269] mmc: sdhci: do not try to use 3.3V signaling if not supported [ Upstream commit 1b5190c2e74c47ebe4bcecf7a072358ad9f1feaa ] For eMMC devices it is valid to only support 1.8V signaling. When vqmmc is set to a fixed 1.8V regulator the stack tries to set 3.3V initially and prints the following warning: mmc1: Switching to 3.3V signalling voltage failed Clear the MMC_SIGNAL_VOLTAGE_330 flag in case 3.3V is signaling is not available. This prevents the stack from even trying to use 3.3V signaling and avoids the above warning. Signed-off-by: Stefan Agner Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index d35deb79965d2..f063fe569339b 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -3631,14 +3631,21 @@ int sdhci_setup_host(struct sdhci_host *host) mmc_gpio_get_cd(host->mmc) < 0) mmc->caps |= MMC_CAP_NEEDS_POLL; - /* If vqmmc regulator and no 1.8V signalling, then there's no UHS */ if (!IS_ERR(mmc->supply.vqmmc)) { ret = regulator_enable(mmc->supply.vqmmc); + + /* If vqmmc provides no 1.8V signalling, then there's no UHS */ if (!regulator_is_supported_voltage(mmc->supply.vqmmc, 1700000, 1950000)) host->caps1 &= ~(SDHCI_SUPPORT_SDR104 | SDHCI_SUPPORT_SDR50 | SDHCI_SUPPORT_DDR50); + + /* In eMMC case vqmmc might be a fixed 1.8V regulator */ + if (!regulator_is_supported_voltage(mmc->supply.vqmmc, 2700000, + 3600000)) + host->flags &= ~SDHCI_SIGNALING_330; + if (ret) { pr_warn("%s: Failed to enable vqmmc regulator: %d\n", mmc_hostname(mmc), ret); -- GitLab From 96ff197fab41164f4b8e41710dca73717d6333ac Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 12 Jul 2018 13:02:52 -0400 Subject: [PATCH 0465/2269] drm/nouveau: Fix runtime PM leak in drm_open() [ Upstream commit 922a8c82fafdec99688bbaea6c5889f562a42cdc ] Noticed this as I was skimming through, if we fail to allocate memory for cli we'll end up returning without dropping the runtime PM ref we got. Additionally, we'll even return the wrong return code! (ret most likely will == 0 here, we want -ENOMEM). Signed-off-by: Lyude Paul Reviewed-by: Lukas Wunner Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_drm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 362a34cb435db..d6f13d7254de9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -848,8 +848,10 @@ nouveau_drm_open(struct drm_device *dev, struct drm_file *fpriv) get_task_comm(tmpname, current); snprintf(name, sizeof(name), "%s[%d]", tmpname, pid_nr(fpriv->pid)); - if (!(cli = kzalloc(sizeof(*cli), GFP_KERNEL))) - return ret; + if (!(cli = kzalloc(sizeof(*cli), GFP_KERNEL))) { + ret = -ENOMEM; + goto done; + } ret = nouveau_cli_init(drm, name, cli); if (ret) -- GitLab From 7b549a0711626a06d46b944a7a53fad0f8418f74 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Sat, 14 Jul 2018 12:52:09 +0200 Subject: [PATCH 0466/2269] drm/nouveau/debugfs: Wake up GPU before doing any reclocking [ Upstream commit eaeb9010bb4bcdc20e58254fa42f3fe730a7f908 ] Fixes various reclocking related issues on prime systems. Signed-off-by: Karol Herbst Signed-off-by: Martin Peres Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_debugfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c index 963a4dba8213e..9109b69cd0529 100644 --- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c +++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c @@ -160,7 +160,11 @@ nouveau_debugfs_pstate_set(struct file *file, const char __user *ubuf, args.ustate = value; } + ret = pm_runtime_get_sync(drm->dev); + if (IS_ERR_VALUE(ret) && ret != -EACCES) + return ret; ret = nvif_mthd(ctrl, NVIF_CONTROL_PSTATE_USER, &args, sizeof(args)); + pm_runtime_put_autosuspend(drm->dev); if (ret < 0) return ret; -- GitLab From 62411c32d1672ea51cfce468e6cbfae364cac57e Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 30 May 2018 16:06:25 +0200 Subject: [PATCH 0467/2269] drm/nouveau: tegra: Detach from ARM DMA/IOMMU mapping [ Upstream commit b59fb482b52269977ee5de205308e5b236a03917 ] Depending on the kernel configuration, early ARM architecture setup code may have attached the GPU to a DMA/IOMMU mapping that transparently uses the IOMMU to back the DMA API. Tegra requires special handling for IOMMU backed buffers (a special bit in the GPU's MMU page tables indicates the memory path to take: via the SMMU or directly to the memory controller). Transparently backing DMA memory with an IOMMU prevents Nouveau from properly handling such memory accesses and causes memory access faults. As a side-note: buffers other than those allocated in instance memory don't need to be physically contiguous from the GPU's perspective since the GPU can map them into contiguous buffers using its own MMU. Mapping these buffers through the IOMMU is unnecessary and will even lead to performance degradation because of the additional translation. One exception to this are compressible buffers which need large pages. In order to enable these large pages, multiple small pages will have to be combined into one large (I/O virtually contiguous) mapping via the IOMMU. However, that is a topic outside the scope of this fix and isn't currently supported. An implementation will want to explicitly create these large pages in the Nouveau driver, so detaching from a DMA/IOMMU mapping would still be required. Signed-off-by: Thierry Reding Acked-by: Christoph Hellwig Reviewed-by: Robin Murphy Tested-by: Nicolas Chauvet Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c index 189ed80e21ffb..fa1ead337c237 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c @@ -23,6 +23,10 @@ #ifdef CONFIG_NOUVEAU_PLATFORM_DRIVER #include "priv.h" +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) +#include +#endif + static int nvkm_device_tegra_power_up(struct nvkm_device_tegra *tdev) { @@ -105,6 +109,15 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev) unsigned long pgsize_bitmap; int ret; +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) + if (dev->archdata.mapping) { + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); + + arm_iommu_detach_device(dev); + arm_iommu_release_mapping(mapping); + } +#endif + if (!tdev->func->iommu_bit) return; -- GitLab From 8eefff55ccf05d600c5793fa0ff90b49b195c932 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 12 Jul 2018 22:29:55 +0100 Subject: [PATCH 0468/2269] parport: sunbpp: fix error return code [ Upstream commit faa1a47388b33623e4d504c23569188907b039a0 ] Return an error code on failure. Change leading spaces to tab on the first if. Problem found using Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/parport/parport_sunbpp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/parport/parport_sunbpp.c b/drivers/parport/parport_sunbpp.c index 01cf1c1a841a4..8de329546b822 100644 --- a/drivers/parport/parport_sunbpp.c +++ b/drivers/parport/parport_sunbpp.c @@ -286,12 +286,16 @@ static int bpp_probe(struct platform_device *op) ops = kmemdup(&parport_sunbpp_ops, sizeof(struct parport_operations), GFP_KERNEL); - if (!ops) + if (!ops) { + err = -ENOMEM; goto out_unmap; + } dprintk(("register_port\n")); - if (!(p = parport_register_port((unsigned long)base, irq, dma, ops))) + if (!(p = parport_register_port((unsigned long)base, irq, dma, ops))) { + err = -ENOMEM; goto out_free_ops; + } p->size = size; p->dev = &op->dev; -- GitLab From e593232f6110c6c9f41c4ec3526de1bcad26712a Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Tue, 12 Jun 2018 12:22:15 +0100 Subject: [PATCH 0469/2269] sched/fair: Fix util_avg of new tasks for asymmetric systems [ Upstream commit 8fe5c5a937d0f4e84221631833a2718afde52285 ] When a new task wakes-up for the first time, its initial utilization is set to half of the spare capacity of its CPU. The current implementation of post_init_entity_util_avg() uses SCHED_CAPACITY_SCALE directly as a capacity reference. As a result, on a big.LITTLE system, a new task waking up on an idle little CPU will be given ~512 of util_avg, even if the CPU's capacity is significantly less than that. Fix this by computing the spare capacity with arch_scale_cpu_capacity(). Signed-off-by: Quentin Perret Signed-off-by: Peter Zijlstra (Intel) Acked-by: Vincent Guittot Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dietmar.eggemann@arm.com Cc: morten.rasmussen@arm.com Cc: patrick.bellasi@arm.com Link: http://lkml.kernel.org/r/20180612112215.25448-1-quentin.perret@arm.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/sched/fair.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0cc7098c6dfd8..a5e20ceb0b5a2 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -757,11 +757,12 @@ static void attach_entity_cfs_rq(struct sched_entity *se); * To solve this problem, we also cap the util_avg of successive tasks to * only 1/2 of the left utilization budget: * - * util_avg_cap = (1024 - cfs_rq->avg.util_avg) / 2^n + * util_avg_cap = (cpu_scale - cfs_rq->avg.util_avg) / 2^n * - * where n denotes the nth task. + * where n denotes the nth task and cpu_scale the CPU capacity. * - * For example, a simplest series from the beginning would be like: + * For example, for a CPU with 1024 of capacity, a simplest series from + * the beginning would be like: * * task util_avg: 512, 256, 128, 64, 32, 16, 8, ... * cfs_rq util_avg: 512, 768, 896, 960, 992, 1008, 1016, ... @@ -773,7 +774,8 @@ void post_init_entity_util_avg(struct sched_entity *se) { struct cfs_rq *cfs_rq = cfs_rq_of(se); struct sched_avg *sa = &se->avg; - long cap = (long)(SCHED_CAPACITY_SCALE - cfs_rq->avg.util_avg) / 2; + long cpu_scale = arch_scale_cpu_capacity(NULL, cpu_of(rq_of(cfs_rq))); + long cap = (long)(cpu_scale - cfs_rq->avg.util_avg) / 2; if (cap > 0) { if (cfs_rq->avg.util_avg != 0) { -- GitLab From 7a46541d1ecca8d42eb3d39cd9bae354c27ace41 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 11 Jul 2018 13:40:28 -0600 Subject: [PATCH 0470/2269] coresight: Handle errors in finding input/output ports [ Upstream commit fe470f5f7f684ed15bc49b6183a64237547910ff ] If we fail to find the input / output port for a LINK component while enabling a path, we should fail gracefully rather than assuming port "0". Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index b8091bef21dc7..e571e4010dff0 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -115,7 +115,7 @@ static int coresight_find_link_inport(struct coresight_device *csdev, dev_err(&csdev->dev, "couldn't find inport, parent: %s, child: %s\n", dev_name(&parent->dev), dev_name(&csdev->dev)); - return 0; + return -ENODEV; } static int coresight_find_link_outport(struct coresight_device *csdev, @@ -133,7 +133,7 @@ static int coresight_find_link_outport(struct coresight_device *csdev, dev_err(&csdev->dev, "couldn't find outport, parent: %s, child: %s\n", dev_name(&csdev->dev), dev_name(&child->dev)); - return 0; + return -ENODEV; } static int coresight_enable_sink(struct coresight_device *csdev, u32 mode) @@ -186,6 +186,9 @@ static int coresight_enable_link(struct coresight_device *csdev, else refport = 0; + if (refport < 0) + return refport; + if (atomic_inc_return(&csdev->refcnt[refport]) == 1) { if (link_ops(csdev)->enable) { ret = link_ops(csdev)->enable(csdev, inport, outport); -- GitLab From 4334c6e1fcb1daf7be590d22e0569f6b19cd7ffe Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 11 Jul 2018 13:40:35 -0600 Subject: [PATCH 0471/2269] coresight: tpiu: Fix disabling timeouts [ Upstream commit ccff2dfaceaca4517432f5c149594215fe9098cc ] Probing the TPIU driver under UBSan triggers an out-of-bounds shift warning in coresight_timeout(): ... [ 5.677530] UBSAN: Undefined behaviour in drivers/hwtracing/coresight/coresight.c:929:16 [ 5.685542] shift exponent 64 is too large for 64-bit type 'long unsigned int' ... On closer inspection things are exponentially out of whack because we're passing a bitmask where a bit number should be. Amusingly, it seems that both calls will find their expected values by sheer luck and appear to succeed: 1 << FFCR_FON_MAN ends up at bit 64 which whilst undefined evaluates as zero in practice, while 1 << FFSR_FT_STOPPED finds bit 2 (TCPresent) which apparently is usually tied high. Following the examples of other drivers, define separate FOO and FOO_BIT macros for masks vs. indices, and put things right. CC: Robert Walker CC: Mike Leach CC: Mathieu Poirier Fixes: 11595db8e17f ("coresight: Fix disabling of CoreSight TPIU") Signed-off-by: Robin Murphy Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight-tpiu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c index 735dca089389a..15dd01f8c1976 100644 --- a/drivers/hwtracing/coresight/coresight-tpiu.c +++ b/drivers/hwtracing/coresight/coresight-tpiu.c @@ -47,8 +47,9 @@ /** register definition **/ /* FFSR - 0x300 */ -#define FFSR_FT_STOPPED BIT(1) +#define FFSR_FT_STOPPED_BIT 1 /* FFCR - 0x304 */ +#define FFCR_FON_MAN_BIT 6 #define FFCR_FON_MAN BIT(6) #define FFCR_STOP_FI BIT(12) @@ -93,9 +94,9 @@ static void tpiu_disable_hw(struct tpiu_drvdata *drvdata) /* Generate manual flush */ writel_relaxed(FFCR_STOP_FI | FFCR_FON_MAN, drvdata->base + TPIU_FFCR); /* Wait for flush to complete */ - coresight_timeout(drvdata->base, TPIU_FFCR, FFCR_FON_MAN, 0); + coresight_timeout(drvdata->base, TPIU_FFCR, FFCR_FON_MAN_BIT, 0); /* Wait for formatter to stop */ - coresight_timeout(drvdata->base, TPIU_FFSR, FFSR_FT_STOPPED, 1); + coresight_timeout(drvdata->base, TPIU_FFSR, FFSR_FT_STOPPED_BIT, 1); CS_LOCK(drvdata->base); } -- GitLab From 8e3015e8736ab82811ce11b9ee8adba2d8a40464 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 11 Jul 2018 13:40:14 -0600 Subject: [PATCH 0472/2269] coresight: ETM: Add support for Arm Cortex-A73 and Cortex-A35 [ Upstream commit 5cedd22370a0a460b663c06de1fc10b4ba3c5d0b ] Add ETM PIDs of the Arm cortex-A CPUs to the white list of ETMs. While at it add a helper macro to make it easier to add the new entries. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight-etm4x.c | 31 +++++++++---------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index cf364a514c121..5a1a14bcae72a 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -1034,7 +1034,8 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) } pm_runtime_put(&adev->dev); - dev_info(dev, "%s initialized\n", (char *)id->data); + dev_info(dev, "CPU%d: ETM v%d.%d initialized\n", + drvdata->cpu, drvdata->arch >> 4, drvdata->arch & 0xf); if (boot_enable) { coresight_enable(drvdata->csdev); @@ -1052,23 +1053,19 @@ err_arch_supported: return ret; } +#define ETM4x_AMBA_ID(pid) \ + { \ + .id = pid, \ + .mask = 0x000fffff, \ + } + static const struct amba_id etm4_ids[] = { - { /* ETM 4.0 - Cortex-A53 */ - .id = 0x000bb95d, - .mask = 0x000fffff, - .data = "ETM 4.0", - }, - { /* ETM 4.0 - Cortex-A57 */ - .id = 0x000bb95e, - .mask = 0x000fffff, - .data = "ETM 4.0", - }, - { /* ETM 4.0 - A72, Maia, HiSilicon */ - .id = 0x000bb95a, - .mask = 0x000fffff, - .data = "ETM 4.0", - }, - { 0, 0}, + ETM4x_AMBA_ID(0x000bb95d), /* Cortex-A53 */ + ETM4x_AMBA_ID(0x000bb95e), /* Cortex-A57 */ + ETM4x_AMBA_ID(0x000bb95a), /* Cortex-A72 */ + ETM4x_AMBA_ID(0x000bb959), /* Cortex-A73 */ + ETM4x_AMBA_ID(0x000bb9da), /* Cortex-A35 */ + {}, }; static struct amba_driver etm4x_driver = { -- GitLab From 34240e87d244a1db8ec4cc4997ee44bf4a54d680 Mon Sep 17 00:00:00 2001 From: Tuomas Tynkkynen Date: Fri, 13 Jul 2018 00:54:17 +0300 Subject: [PATCH 0473/2269] staging: bcm2835-audio: Don't leak workqueue if open fails [ Upstream commit 678c5b119307c40f9a17152512f9c949d0ec7292 ] Currently, if bcm2835_audio_open() fails partway, the allocated workqueue is leaked. Avoid that. While at it, propagate the return value of bcm2835_audio_open_connection() on failure instead of returning -1. Signed-off-by: Tuomas Tynkkynen Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../vc04_services/bcm2835-audio/bcm2835-vchiq.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c b/drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c index 4be864dbd41c9..9eb3b625a1b18 100644 --- a/drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c +++ b/drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c @@ -442,16 +442,16 @@ int bcm2835_audio_open(struct bcm2835_alsa_stream *alsa_stream) my_workqueue_init(alsa_stream); ret = bcm2835_audio_open_connection(alsa_stream); - if (ret) { - ret = -1; - goto exit; - } + if (ret) + goto free_wq; + instance = alsa_stream->instance; LOG_DBG(" instance (%p)\n", instance); if (mutex_lock_interruptible(&instance->vchi_mutex)) { LOG_DBG("Interrupted whilst waiting for lock on (%d)\n", instance->num_connections); - return -EINTR; + ret = -EINTR; + goto free_wq; } vchi_service_use(instance->vchi_handle[0]); @@ -474,7 +474,11 @@ int bcm2835_audio_open(struct bcm2835_alsa_stream *alsa_stream) unlock: vchi_service_release(instance->vchi_handle[0]); mutex_unlock(&instance->vchi_mutex); -exit: + +free_wq: + if (ret) + destroy_workqueue(alsa_stream->my_wq); + return ret; } -- GitLab From 492b804d7fa54d528501b88f49a3f58b2a62320a Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 11 Jul 2018 13:19:38 +0000 Subject: [PATCH 0474/2269] gpio: pxa: Fix potential NULL dereference [ Upstream commit 9506755633d0b32ef76f67c345000178e9b0dfc4 ] platform_get_resource() may fail and return NULL, so we should better check it's return value to avoid a NULL pointer dereference a bit later in the code. This is detected by Coccinelle semantic patch. @@ expression pdev, res, n, t, e, e1, e2; @@ res = platform_get_resource(pdev, t, n); + if (!res) + return -EINVAL; ... when != res == NULL e = devm_ioremap(e1, res->start, e2); Signed-off-by: Wei Yongjun Acked-by: Robert Jarzmik Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-pxa.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c index 6029899789f3d..2943dfc4c4707 100644 --- a/drivers/gpio/gpio-pxa.c +++ b/drivers/gpio/gpio-pxa.c @@ -662,6 +662,8 @@ static int pxa_gpio_probe(struct platform_device *pdev) pchip->irq0 = irq0; pchip->irq1 = irq1; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; gpio_reg_base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); if (!gpio_reg_base) -- GitLab From 4101e59a294a401a4fece73bfae7d068b3a992bb Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 9 Jul 2018 21:47:27 +0300 Subject: [PATCH 0475/2269] gpiolib: Mark gpio_suffixes array with __maybe_unused MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b23ec59926faf05b0c43680d05671c484e810ac4 ] Since we put static variable to a header file it's copied to each module that includes the header. But not all of them are actually used it. Mark gpio_suffixes array with __maybe_unused to hide a compiler warning: In file included from drivers/gpio/gpiolib-legacy.c:6:0: drivers/gpio/gpiolib.h:95:27: warning: ‘gpio_suffixes’ defined but not used [-Wunused-const-variable=] static const char * const gpio_suffixes[] = { "gpios", "gpio" }; ^~~~~~~~~~~~~ In file included from drivers/gpio/gpiolib-devprop.c:17:0: drivers/gpio/gpiolib.h:95:27: warning: ‘gpio_suffixes’ defined but not used [-Wunused-const-variable=] static const char * const gpio_suffixes[] = { "gpios", "gpio" }; ^~~~~~~~~~~~~ Signed-off-by: Andy Shevchenko Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h index 3d4d0634c9ddd..7a3d9658d74c5 100644 --- a/drivers/gpio/gpiolib.h +++ b/drivers/gpio/gpiolib.h @@ -88,7 +88,7 @@ struct acpi_gpio_info { }; /* gpio suffixes used for ACPI and device tree lookup */ -static const char * const gpio_suffixes[] = { "gpios", "gpio" }; +static __maybe_unused const char * const gpio_suffixes[] = { "gpios", "gpio" }; #ifdef CONFIG_OF_GPIO struct gpio_desc *of_find_gpio(struct device *dev, -- GitLab From 80784cba90f3e976ac44a18d9dce45d6c706a624 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 20 Jun 2018 07:18:02 +0200 Subject: [PATCH 0476/2269] mfd: 88pm860x-i2c: switch to i2c_lock_bus(..., I2C_LOCK_SEGMENT) [ Upstream commit 8c8f74f327a76604a499fad8c54c15e1c0ee8051 ] Locking the root adapter for __i2c_transfer will deadlock if the device sits behind a mux-locked I2C mux. Switch to the finer-grained i2c_lock_bus with the I2C_LOCK_SEGMENT flag. If the device does not sit behind a mux-locked mux, the two locking variants are equivalent. Signed-off-by: Peter Rosin Acked-by: Lee Jones Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/88pm860x-i2c.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/mfd/88pm860x-i2c.c b/drivers/mfd/88pm860x-i2c.c index 84e313107233e..7b9052ea74134 100644 --- a/drivers/mfd/88pm860x-i2c.c +++ b/drivers/mfd/88pm860x-i2c.c @@ -146,14 +146,14 @@ int pm860x_page_reg_write(struct i2c_client *i2c, int reg, unsigned char zero; int ret; - i2c_lock_adapter(i2c->adapter); + i2c_lock_bus(i2c->adapter, I2C_LOCK_SEGMENT); read_device(i2c, 0xFA, 0, &zero); read_device(i2c, 0xFB, 0, &zero); read_device(i2c, 0xFF, 0, &zero); ret = write_device(i2c, reg, 1, &data); read_device(i2c, 0xFE, 0, &zero); read_device(i2c, 0xFC, 0, &zero); - i2c_unlock_adapter(i2c->adapter); + i2c_unlock_bus(i2c->adapter, I2C_LOCK_SEGMENT); return ret; } EXPORT_SYMBOL(pm860x_page_reg_write); @@ -164,14 +164,14 @@ int pm860x_page_bulk_read(struct i2c_client *i2c, int reg, unsigned char zero = 0; int ret; - i2c_lock_adapter(i2c->adapter); + i2c_lock_bus(i2c->adapter, I2C_LOCK_SEGMENT); read_device(i2c, 0xfa, 0, &zero); read_device(i2c, 0xfb, 0, &zero); read_device(i2c, 0xff, 0, &zero); ret = read_device(i2c, reg, count, buf); read_device(i2c, 0xFE, 0, &zero); read_device(i2c, 0xFC, 0, &zero); - i2c_unlock_adapter(i2c->adapter); + i2c_unlock_bus(i2c->adapter, I2C_LOCK_SEGMENT); return ret; } EXPORT_SYMBOL(pm860x_page_bulk_read); -- GitLab From bb9dcbf97a9be5a144f65c36e3fdd460307e52f4 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 20 Jun 2018 07:17:56 +0200 Subject: [PATCH 0477/2269] input: rohm_bu21023: switch to i2c_lock_bus(..., I2C_LOCK_SEGMENT) [ Upstream commit 193c2a07cfaacb9249ab0e3d34bce32490879355 ] Locking the root adapter for __i2c_transfer will deadlock if the device sits behind a mux-locked I2C mux. Switch to the finer-grained i2c_lock_bus with the I2C_LOCK_SEGMENT flag. If the device does not sit behind a mux-locked mux, the two locking variants are equivalent. Signed-off-by: Peter Rosin Acked-by: Dmitry Torokhov Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/input/touchscreen/rohm_bu21023.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/rohm_bu21023.c b/drivers/input/touchscreen/rohm_bu21023.c index eeaf6ff035974..fc54e044fecef 100644 --- a/drivers/input/touchscreen/rohm_bu21023.c +++ b/drivers/input/touchscreen/rohm_bu21023.c @@ -304,7 +304,7 @@ static int rohm_i2c_burst_read(struct i2c_client *client, u8 start, void *buf, msg[1].len = len; msg[1].buf = buf; - i2c_lock_adapter(adap); + i2c_lock_bus(adap, I2C_LOCK_SEGMENT); for (i = 0; i < 2; i++) { if (__i2c_transfer(adap, &msg[i], 1) < 0) { @@ -313,7 +313,7 @@ static int rohm_i2c_burst_read(struct i2c_client *client, u8 start, void *buf, } } - i2c_unlock_adapter(adap); + i2c_unlock_bus(adap, I2C_LOCK_SEGMENT); return ret; } -- GitLab From bb2874bba8eec8dbeed8de569f4d918deca70c97 Mon Sep 17 00:00:00 2001 From: Wei Lu Date: Wed, 11 Jul 2018 22:32:47 -0400 Subject: [PATCH 0478/2269] drm/amdkfd: Fix error codes in kfd_get_process MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e47cb828eb3fca3e8999a0b9aa053dda18552071 ] Return ERR_PTR(-EINVAL) if kfd_get_process fails to find the process. This fixes kernel oopses when a child process calls KFD ioctls with a file descriptor inherited from the parent process. Signed-off-by: Wei Lu Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index c74cf22a1ed9d..3ae88dcff08d1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -123,6 +123,8 @@ struct kfd_process *kfd_get_process(const struct task_struct *thread) return ERR_PTR(-EINVAL); process = find_process(thread); + if (!process) + return ERR_PTR(-EINVAL); return process; } -- GitLab From 8be95762229a173c76ded9d2340b2aeffd6a553b Mon Sep 17 00:00:00 2001 From: Zhouyang Jia Date: Tue, 12 Jun 2018 12:40:03 +0800 Subject: [PATCH 0479/2269] rtc: bq4802: add error handling for devm_ioremap [ Upstream commit 7874b919866ba91bac253fa219d3d4c82bb944df ] When devm_ioremap fails, the lack of error-handling code may cause unexpected results. This patch adds error-handling code after calling devm_ioremap. Signed-off-by: Zhouyang Jia Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-bq4802.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/rtc/rtc-bq4802.c b/drivers/rtc/rtc-bq4802.c index bd170cb3361ce..5747a54cbd423 100644 --- a/drivers/rtc/rtc-bq4802.c +++ b/drivers/rtc/rtc-bq4802.c @@ -164,6 +164,10 @@ static int bq4802_probe(struct platform_device *pdev) } else if (p->r->flags & IORESOURCE_MEM) { p->regs = devm_ioremap(&pdev->dev, p->r->start, resource_size(p->r)); + if (!p->regs){ + err = -ENOMEM; + goto out; + } p->read = bq4802_read_mem; p->write = bq4802_write_mem; } else { -- GitLab From c8831347d7060b48c5e68587ef5b774f9c10c3a0 Mon Sep 17 00:00:00 2001 From: Timo Wischer Date: Tue, 10 Jul 2018 17:28:45 +0200 Subject: [PATCH 0480/2269] ALSA: pcm: Fix snd_interval_refine first/last with open min/max [ Upstream commit ff2d6acdf6f13d9f8fdcd890844c6d7535ac1f10 ] Without this commit the following intervals [x y), (x y) were be replaced to (y-1 y) by snd_interval_refine_last(). This was also done if y-1 is part of the previous interval. With this changes it will be replaced with [y-1 y) in case of y-1 is part of the previous interval. A similar behavior will be used for snd_interval_refine_first(). This commit adapts the changes for alsa-lib of commit 9bb985c ("pcm: snd_interval_refine_first/last: exclude value only if also excluded before") Signed-off-by: Timo Wischer Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/core/pcm_lib.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index faa67861cbc17..729a85a6211d6 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -629,27 +629,33 @@ EXPORT_SYMBOL(snd_interval_refine); static int snd_interval_refine_first(struct snd_interval *i) { + const unsigned int last_max = i->max; + if (snd_BUG_ON(snd_interval_empty(i))) return -EINVAL; if (snd_interval_single(i)) return 0; i->max = i->min; - i->openmax = i->openmin; - if (i->openmax) + if (i->openmin) i->max++; + /* only exclude max value if also excluded before refine */ + i->openmax = (i->openmax && i->max >= last_max); return 1; } static int snd_interval_refine_last(struct snd_interval *i) { + const unsigned int last_min = i->min; + if (snd_BUG_ON(snd_interval_empty(i))) return -EINVAL; if (snd_interval_single(i)) return 0; i->min = i->max; - i->openmin = i->openmax; - if (i->openmin) + if (i->openmax) i->min--; + /* only exclude min value if also excluded before refine */ + i->openmin = (i->openmin && i->min <= last_min); return 1; } -- GitLab From 7a5096cf28864f65f1926e5d63e19ad6fe0431bb Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 4 Jul 2018 13:59:16 +0200 Subject: [PATCH 0481/2269] scsi: libfc: fixup 'sleeping function called from invalid context' [ Upstream commit fa519f701d27198a2858bb108fc18ea9d8c106a7 ] fc_rport_login() will be calling mutex_lock() while running inside an RCU-protected section, triggering the warning 'sleeping function called from invalid context'. To fix this we can drop the rcu functions here altogether as the disc mutex protecting the list itself is already held, preventing any list manipulation. Fixes: a407c593398c ("scsi: libfc: Fixup disc_mutex handling") Signed-off-by: Hannes Reinecke Acked-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/libfc/fc_disc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c index 8660f923ace02..bb9c1c0166439 100644 --- a/drivers/scsi/libfc/fc_disc.c +++ b/drivers/scsi/libfc/fc_disc.c @@ -294,9 +294,11 @@ static void fc_disc_done(struct fc_disc *disc, enum fc_disc_event event) * discovery, reverify or log them in. Otherwise, log them out. * Skip ports which were never discovered. These are the dNS port * and ports which were created by PLOGI. + * + * We don't need to use the _rcu variant here as the rport list + * is protected by the disc mutex which is already held on entry. */ - rcu_read_lock(); - list_for_each_entry_rcu(rdata, &disc->rports, peers) { + list_for_each_entry(rdata, &disc->rports, peers) { if (!kref_get_unless_zero(&rdata->kref)) continue; if (rdata->disc_id) { @@ -307,7 +309,6 @@ static void fc_disc_done(struct fc_disc *disc, enum fc_disc_event event) } kref_put(&rdata->kref, fc_rport_destroy); } - rcu_read_unlock(); mutex_unlock(&disc->disc_mutex); disc->disc_callback(lport, event); mutex_lock(&disc->disc_mutex); -- GitLab From 2c7de6d9c0d726bbf0844f6de2164bb96d2e1650 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 29 May 2018 19:12:18 -0700 Subject: [PATCH 0482/2269] selftest: timers: Tweak raw_skew to SKIP when ADJ_OFFSET/other clock adjustments are in progress [ Upstream commit 1416270f4a1ae83ea84156ceba19a66a8f88be1f ] In the past we've warned when ADJ_OFFSET was in progress, usually caused by ntpd or some other time adjusting daemon running in non steady sate, which can cause the skew calculations to be incorrect. Thus, this patch checks to see if the clock was being adjusted when we fail so that we don't cause false negatives. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Miroslav Lichvar Cc: Richard Cochran Cc: Prarit Bhargava Cc: Stephen Boyd Cc: Shuah Khan Cc: linux-kselftest@vger.kernel.org Suggested-by: Miroslav Lichvar Signed-off-by: John Stultz Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/timers/raw_skew.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c index ca6cd146aafe1..dcf73c5dab6e1 100644 --- a/tools/testing/selftests/timers/raw_skew.c +++ b/tools/testing/selftests/timers/raw_skew.c @@ -134,6 +134,11 @@ int main(int argv, char **argc) printf(" %lld.%i(act)", ppm/1000, abs((int)(ppm%1000))); if (llabs(eppm - ppm) > 1000) { + if (tx1.offset || tx2.offset || + tx1.freq != tx2.freq || tx1.tick != tx2.tick) { + printf(" [SKIP]\n"); + return ksft_exit_skip("The clock was adjusted externally. Shutdown NTPd or other time sync daemons\n"); + } printf(" [FAILED]\n"); return ksft_exit_fail(); } -- GitLab From 2f9cd8588241f725fbe0c8cc56c1bd7e092e7b48 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Jul 2018 12:38:09 +0300 Subject: [PATCH 0483/2269] drm/panel: type promotion bug in s6e8aa0_read_mtp_id() [ Upstream commit cd0e0ca69109d025b1a1b6609f70682db62138b0 ] The ARRAY_SIZE() macro is type size_t. If s6e8aa0_dcs_read() returns a negative error code, then "ret < ARRAY_SIZE(id)" is false because the negative error code is type promoted to a high positive value. Fixes: 02051ca06371 ("drm/panel: add S6E8AA0 driver") Signed-off-by: Dan Carpenter Reviewed-by: Andrzej Hajda Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20180704093807.s3lqsb2v6dg2k43d@kili.mountain Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c b/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c index a188a3959f1ad..6ad827b93ae19 100644 --- a/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c +++ b/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c @@ -823,7 +823,7 @@ static void s6e8aa0_read_mtp_id(struct s6e8aa0 *ctx) int ret, i; ret = s6e8aa0_dcs_read(ctx, 0xd1, id, ARRAY_SIZE(id)); - if (ret < ARRAY_SIZE(id) || id[0] == 0x00) { + if (ret < 0 || ret < ARRAY_SIZE(id) || id[0] == 0x00) { dev_err(ctx->dev, "read id failed\n"); ctx->error = -EIO; return; -- GitLab From 07a252b478fb1321021931cae559a0573c141ab9 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 2 Jul 2018 17:35:59 +0800 Subject: [PATCH 0484/2269] blk-mq: only attempt to merge bio if there is rq in sw queue [ Upstream commit b04f50ab8a74129b3041a2836c33c916be3c6667 ] Only attempt to merge bio iff the ctx->rq_list isn't empty, because: 1) for high-performance SSD, most of times dispatch may succeed, then there may be nothing left in ctx->rq_list, so don't try to merge over sw queue if it is empty, then we can save one acquiring of ctx->lock 2) we can't expect good merge performance on per-cpu sw queue, and missing one merge on sw queue won't be a big deal since tasks can be scheduled from one CPU to another. Cc: Laurence Oberman Cc: Omar Sandoval Cc: Bart Van Assche Tested-by: Kashyap Desai Reported-by: Kashyap Desai Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- block/blk-mq-sched.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index eca011fdfa0ed..ae5d8f10a27c5 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -236,7 +236,8 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) return e->type->ops.mq.bio_merge(hctx, bio); } - if (hctx->flags & BLK_MQ_F_SHOULD_MERGE) { + if ((hctx->flags & BLK_MQ_F_SHOULD_MERGE) && + !list_empty_careful(&ctx->rq_list)) { /* default per sw-queue merge */ spin_lock(&ctx->lock); ret = blk_mq_attempt_merge(q, ctx, bio); -- GitLab From b520f00da41f748fcf90b01d5399e2e2b28cb2f4 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 25 Jun 2018 19:31:49 +0800 Subject: [PATCH 0485/2269] blk-mq: avoid to synchronize rcu inside blk_cleanup_queue() [ Upstream commit 1311326cf4755c7ffefd20f576144ecf46d9906b ] SCSI probing may synchronously create and destroy a lot of request_queues for non-existent devices. Any synchronize_rcu() in queue creation or destroy path may introduce long latency during booting, see detailed description in comment of blk_register_queue(). This patch removes one synchronize_rcu() inside blk_cleanup_queue() for this case, commit c2856ae2f315d75(blk-mq: quiesce queue before freeing queue) needs synchronize_rcu() for implementing blk_mq_quiesce_queue(), but when queue isn't initialized, it isn't necessary to do that since only pass-through requests are involved, no original issue in scsi_execute() at all. Without this patch and previous one, it may take more 20+ seconds for virtio-scsi to complete disk probe. With the two patches, the time becomes less than 100ms. Fixes: c2856ae2f315d75 ("blk-mq: quiesce queue before freeing queue") Reported-by: Andrew Jones Cc: Omar Sandoval Cc: Bart Van Assche Cc: linux-scsi@vger.kernel.org Cc: "Martin K. Petersen" Cc: Christoph Hellwig Tested-by: Andrew Jones Signed-off-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- block/blk-core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 1d27e2a152e07..6aa2bc4e96529 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -669,9 +669,13 @@ void blk_cleanup_queue(struct request_queue *q) * make sure all in-progress dispatch are completed because * blk_freeze_queue() can only complete all requests, and * dispatch may still be in-progress since we dispatch requests - * from more than one contexts + * from more than one contexts. + * + * No need to quiesce queue if it isn't initialized yet since + * blk_freeze_queue() should be enough for cases of passthrough + * request. */ - if (q->mq_ops) + if (q->mq_ops && blk_queue_init_done(q)) blk_mq_quiesce_queue(q); /* for synchronous bio-based driver finish in-flight integrity i/o */ -- GitLab From a5d093d1857ef4fd1415929b54f9216c36666cdc Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 2 Jul 2018 15:59:38 -0700 Subject: [PATCH 0486/2269] pinctrl: msm: Fix msm_config_group_get() to be compliant [ Upstream commit 05e0c828955c1cab58dd71a04539442e5375d917 ] If you do this on an sdm845 board: cat /sys/kernel/debug/pinctrl/3400000.pinctrl/pinconf-groups ...it looks like nonsense. For every pin you see listed: input bias bus hold, input bias disabled, input bias pull down, input bias pull up That's because msm_config_group_get() isn't complying with the rules that pinconf_generic_dump_one() expects. Specifically for boolean parameters (anything with a "struct pin_config_item" where has_arg is false) the function expects that the function should return its value not through the "config" parameter but should return "0" if the value is set and "-EINVAL" if the value isn't set. Let's fix this. >From a quick sample of other pinctrl drivers, it appears to be tradition to also return 1 through the config parameter for these boolean parameters when they exist. I'm not one to knock tradition, so I'll follow tradition and return 1 in these cases. While I'm at it, I'll also continue searching for four leaf clovers, kocking on wood three times, and trying not to break mirrors. Fixes: f365be092572 ("pinctrl: Add Qualcomm TLMM driver") Signed-off-by: Douglas Anderson Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/qcom/pinctrl-msm.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index ff491da64dab8..31632c0875047 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -238,22 +238,30 @@ static int msm_config_group_get(struct pinctrl_dev *pctldev, /* Convert register value to pinconf value */ switch (param) { case PIN_CONFIG_BIAS_DISABLE: - arg = arg == MSM_NO_PULL; + if (arg != MSM_NO_PULL) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_BIAS_PULL_DOWN: - arg = arg == MSM_PULL_DOWN; + if (arg != MSM_PULL_DOWN) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_BIAS_BUS_HOLD: if (pctrl->soc->pull_no_keeper) return -ENOTSUPP; - arg = arg == MSM_KEEPER; + if (arg != MSM_KEEPER) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_BIAS_PULL_UP: if (pctrl->soc->pull_no_keeper) arg = arg == MSM_PULL_UP_NO_KEEPER; else arg = arg == MSM_PULL_UP; + if (!arg) + return -EINVAL; break; case PIN_CONFIG_DRIVE_STRENGTH: arg = msm_regval_to_drive(arg); -- GitLab From 07101e1c8435c942472114f31b03fa4d775dd4fd Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 2 Jul 2018 15:59:39 -0700 Subject: [PATCH 0487/2269] pinctrl: qcom: spmi-gpio: Fix pmic_gpio_config_get() to be compliant [ Upstream commit 1cf86bc21257a330e3af51f2a4e885f1a705f6a5 ] If you do this on an sdm845 board: grep "" /sys/kernel/debug/pinctrl/*spmi:pmic*/pinconf-groups ...it looks like nonsense. For every pin you see listed: input bias disabled, input bias high impedance, input bias pull down, input bias pull up, ... That's because pmic_gpio_config_get() isn't complying with the rules that pinconf_generic_dump_one() expects. Specifically for boolean parameters (anything with a "struct pin_config_item" where has_arg is false) the function expects that the function should return its value not through the "config" parameter but should return "0" if the value is set and "-EINVAL" if the value isn't set. Let's fix this. >From a quick sample of other pinctrl drivers, it appears to be tradition to also return 1 through the config parameter for these boolean parameters when they exist. I'm not one to knock tradition, so I'll follow tradition and return 1 in these cases. While I'm at it, I'll also continue searching for four leaf clovers, kocking on wood three times, and trying not to break mirrors. NOTE: This also fixes an apparent typo for reading PIN_CONFIG_BIAS_DISABLE where the old driver was accidentally using "=" instead of "==" and thus was setting some internal state when you tried to query PIN_CONFIG_BIAS_DISABLE. Oops. Fixes: eadff3024472 ("pinctrl: Qualcomm SPMI PMIC GPIO pin controller driver") Signed-off-by: Douglas Anderson Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/qcom/pinctrl-spmi-gpio.c | 32 ++++++++++++++++++------ 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c index c2c0bab04257d..22aaf4375fac0 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c @@ -390,31 +390,47 @@ static int pmic_gpio_config_get(struct pinctrl_dev *pctldev, switch (param) { case PIN_CONFIG_DRIVE_PUSH_PULL: - arg = pad->buffer_type == PMIC_GPIO_OUT_BUF_CMOS; + if (pad->buffer_type != PMIC_GPIO_OUT_BUF_CMOS) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_DRIVE_OPEN_DRAIN: - arg = pad->buffer_type == PMIC_GPIO_OUT_BUF_OPEN_DRAIN_NMOS; + if (pad->buffer_type != PMIC_GPIO_OUT_BUF_OPEN_DRAIN_NMOS) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_DRIVE_OPEN_SOURCE: - arg = pad->buffer_type == PMIC_GPIO_OUT_BUF_OPEN_DRAIN_PMOS; + if (pad->buffer_type != PMIC_GPIO_OUT_BUF_OPEN_DRAIN_PMOS) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_BIAS_PULL_DOWN: - arg = pad->pullup == PMIC_GPIO_PULL_DOWN; + if (pad->pullup != PMIC_GPIO_PULL_DOWN) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_BIAS_DISABLE: - arg = pad->pullup = PMIC_GPIO_PULL_DISABLE; + if (pad->pullup != PMIC_GPIO_PULL_DISABLE) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_BIAS_PULL_UP: - arg = pad->pullup == PMIC_GPIO_PULL_UP_30; + if (pad->pullup != PMIC_GPIO_PULL_UP_30) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_BIAS_HIGH_IMPEDANCE: - arg = !pad->is_enabled; + if (pad->is_enabled) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_POWER_SOURCE: arg = pad->power_source; break; case PIN_CONFIG_INPUT_ENABLE: - arg = pad->input_enabled; + if (!pad->input_enabled) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_OUTPUT: arg = pad->out_value; -- GitLab From 0eb82af7cd718652ed9e18c00352bed33c270ace Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Fri, 29 Jun 2018 17:38:14 +0300 Subject: [PATCH 0488/2269] clk: tegra: bpmp: Don't crash when a clock fails to register [ Upstream commit f7b3182232c82bb9769e2d5471d702bae2972d2b ] When registering clocks, we just skip any that fail to register (leaving a NULL hole in the clock table). However, our of_xlate function still tries to dereference each entry while looking for the clock with the requested id, causing a crash if any clocks failed to register. Add a check to of_xlate to skip any NULL clocks. Signed-off-by: Mikko Perttunen Acked-by: Jon Hunter Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/clk/tegra/clk-bpmp.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/clk/tegra/clk-bpmp.c b/drivers/clk/tegra/clk-bpmp.c index 638ace64033b9..6c933b0e29a3b 100644 --- a/drivers/clk/tegra/clk-bpmp.c +++ b/drivers/clk/tegra/clk-bpmp.c @@ -581,9 +581,15 @@ static struct clk_hw *tegra_bpmp_clk_of_xlate(struct of_phandle_args *clkspec, unsigned int id = clkspec->args[0], i; struct tegra_bpmp *bpmp = data; - for (i = 0; i < bpmp->num_clocks; i++) - if (bpmp->clocks[i]->id == id) - return &bpmp->clocks[i]->hw; + for (i = 0; i < bpmp->num_clocks; i++) { + struct tegra_bpmp_clk *clk = bpmp->clocks[i]; + + if (!clk) + continue; + + if (clk->id == id) + return &clk->hw; + } return NULL; } -- GitLab From 29068d29718f0bdef2cb98e587e717496e81cbcc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 11 Jul 2018 15:29:31 +0300 Subject: [PATCH 0489/2269] mei: bus: type promotion bug in mei_nfc_if_version() commit b40b3e9358fbafff6a4ba0f4b9658f6617146f9c upstream. We accidentally removed the check for negative returns without considering the issue of type promotion. The "if_version_length" variable is type size_t so if __mei_cl_recv() returns a negative then "bytes_recv" is type promoted to a high positive value and treated as success. Cc: Fixes: 582ab27a063a ("mei: bus: fix received data size check in NFC fixup") Signed-off-by: Dan Carpenter Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus-fixup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c index 0208c4b027c52..fa0236a5e59a3 100644 --- a/drivers/misc/mei/bus-fixup.c +++ b/drivers/misc/mei/bus-fixup.c @@ -267,7 +267,7 @@ static int mei_nfc_if_version(struct mei_cl *cl, ret = 0; bytes_recv = __mei_cl_recv(cl, (u8 *)reply, if_version_length, 0); - if (bytes_recv < if_version_length) { + if (bytes_recv < 0 || bytes_recv < if_version_length) { dev_err(bus->dev, "Could not read IF version\n"); ret = -EIO; goto err; -- GitLab From a44d267a67e974fd0370ae0e0a691f9e59100462 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 10 Apr 2018 15:32:28 +0200 Subject: [PATCH 0490/2269] earlycon: Initialize port->uartclk based on clock-frequency property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 814453adea7d081ad8917aa0f32d6a14165a3563 upstream. On DT based platforms when current-speed property is present baudrate is setup. Also port->uartclk is initialized to bogus BASE_BAUD * 16 value. Drivers like uartps/ns16550 contain logic when baudrate and uartclk is used for baudrate calculation. The patch is reading optional clock-frequency property to replace bogus BASE_BAUD * 16 calculation to have proper baudrate calculation. [-stable comment: commit 31cb9a8575ca ("earlycon: initialise baud field of earlycon device structure") has changed 8250_early.c behavior which now tries to setup UART speed. Ignoring clock-frequency results in wrong value of calculated divisor & malformed early console output.] Fixes: 31cb9a8575ca ("earlycon: initialise baud field of earlycon device structure") Signed-off-by: Michal Simek [rmilecki: add -stable comment and Fixes tag] Signed-off-by: Rafał Miłecki Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/earlycon.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/tty/serial/earlycon.c b/drivers/tty/serial/earlycon.c index ac667b47f1997..05ba1875bb3e8 100644 --- a/drivers/tty/serial/earlycon.c +++ b/drivers/tty/serial/earlycon.c @@ -289,6 +289,10 @@ int __init of_setup_earlycon(const struct earlycon_id *match, if (val) early_console_dev.baud = be32_to_cpu(*val); + val = of_get_flat_dt_prop(node, "clock-frequency", NULL); + if (val) + port->uartclk = be32_to_cpu(*val); + if (options) { early_console_dev.baud = simple_strtoul(options, NULL, 0); strlcpy(early_console_dev.options, options, -- GitLab From 5e8bf9f23f209170b8a45682726ebf1186edda32 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Wed, 25 Apr 2018 15:48:42 +0200 Subject: [PATCH 0491/2269] earlycon: Remove hardcoded port->uartclk initialization in of_setup_earlycon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 182ead3e418a20328b73152b8e81fc8b4cac3b0b upstream. There is no reason to initialize uartclk to BASE_BAUD * 16 for DT based systems. [-stable comment: commit 31cb9a8575ca ("earlycon: initialise baud field of earlycon device structure") has changed 8250_early.c behavior which now tries to setup UART speed. Already-backported upstream commit 0ff3ab701963 ("serial: 8250_early: Only set divisor if valid clk & baud") handles properly uartclk not being set but it still requires backporting fix for wrong uartclk val. This fixes malformed early console output on arch-es with BASE_BAUD.] Fixes: 31cb9a8575ca ("earlycon: initialise baud field of earlycon device structure") Signed-off-by: Michal Simek Tested-by: Matt Redfearn [rmilecki: add -stable comment and Fixes tag] Signed-off-by: Rafał Miłecki Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/earlycon.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/tty/serial/earlycon.c b/drivers/tty/serial/earlycon.c index 05ba1875bb3e8..7b0a3a1688e83 100644 --- a/drivers/tty/serial/earlycon.c +++ b/drivers/tty/serial/earlycon.c @@ -254,7 +254,6 @@ int __init of_setup_earlycon(const struct earlycon_id *match, return -ENXIO; } port->mapbase = addr; - port->uartclk = BASE_BAUD * 16; val = of_get_flat_dt_prop(node, "reg-offset", NULL); if (val) -- GitLab From 00ac982d7aa3a945a8bdc561d6028b6bd5f92a79 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Sun, 3 Sep 2017 14:29:02 +0200 Subject: [PATCH 0492/2269] ASoC: samsung: i2s: Fix error handling path in i2s_set_sysclk() commit 6431a7e36652517df82855ee79c8240001812a55 upstream. If 'clk_prepare_enable()' fails, we must 'put' the corresponding clock. Othewise, there is a resource leak. Fixes: f5c97c7b0438 ("ASoC: samsung: i2s: Handle return value of clk_prepare_enable.") Signed-off-by: Christophe JAILLET Signed-off-by: Mark Brown Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- sound/soc/samsung/i2s.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c index f058f2bdd519c..33b4d14af2b3b 100644 --- a/sound/soc/samsung/i2s.c +++ b/sound/soc/samsung/i2s.c @@ -552,8 +552,11 @@ static int i2s_set_sysclk(struct snd_soc_dai *dai, } ret = clk_prepare_enable(i2s->op_clk); - if (ret) + if (ret) { + clk_put(i2s->op_clk); + i2s->op_clk = NULL; goto err; + } i2s->rclk_srcrate = clk_get_rate(i2s->op_clk); /* Over-ride the other's */ -- GitLab From 425026ae6b4cff4ac4456b6d10b92134523009cc Mon Sep 17 00:00:00 2001 From: Jaechul Lee Date: Wed, 6 Sep 2017 10:04:15 +0900 Subject: [PATCH 0493/2269] ASoC: samsung: Fix invalid argument when devm_gpiod_get is called commit 975b6a93088e83a41ba2f0dec2f086678fdb2a7a upstream. devm_gpiod_get is called with GPIOF_OUT_INIT_LOW but the function doesn't allow the parameters. Unluckily, GPIOF_OUT_INIT_LOW is same value as GPIOD_ASIS and gpio direction isn't set properly. Muted stream comes up when I try recording some sounds on TM2. mic-bias gpiod state can't be changed because the gpiod is created with the invalid parameter. The gpio should be set GPIOD_OUT_HIGH. Fixes: 1bfbc260a5b4 ("ASoC: samsung: Add machine driver for Exynos5433 based TM2 board") Signed-off-by: Jaechul Lee Reviewed-by: Krzysztof Kozlowski Signed-off-by: Mark Brown Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- sound/soc/samsung/tm2_wm5110.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/samsung/tm2_wm5110.c b/sound/soc/samsung/tm2_wm5110.c index 68698f3d72f96..52e2fbe446d1c 100644 --- a/sound/soc/samsung/tm2_wm5110.c +++ b/sound/soc/samsung/tm2_wm5110.c @@ -436,8 +436,7 @@ static int tm2_probe(struct platform_device *pdev) snd_soc_card_set_drvdata(card, priv); card->dev = dev; - priv->gpio_mic_bias = devm_gpiod_get(dev, "mic-bias", - GPIOF_OUT_INIT_LOW); + priv->gpio_mic_bias = devm_gpiod_get(dev, "mic-bias", GPIOD_OUT_HIGH); if (IS_ERR(priv->gpio_mic_bias)) { dev_err(dev, "Failed to get mic bias gpio\n"); return PTR_ERR(priv->gpio_mic_bias); -- GitLab From 453740de93cd202453c2d992a4992727dda985f6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 7 Sep 2017 19:45:20 +0100 Subject: [PATCH 0494/2269] drm/i915: Apply the GTT write flush for all !llc machines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c5ba5b24657e473b1c64b0a614b168a635a2c935 upstream. We also see the delayed GTT write issue on i915g/i915gm, so let's presume that it is a universal problem for all !llc machines, and that we just haven't yet noticed on g33, gen4 and gen5 machines. v2: Use a register that exists on all platforms Testcase: igt/gem_mmap_gtt/coherency # i915gm References: https://bugs.freedesktop.org/show_bug.cgi?id=102577 Signed-off-by: Chris Wilson Cc: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20170907184520.5032-1-chris@chris-wilson.co.uk Reviewed-by: Ville Syrjälä Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 90359c7954c8d..3c0ce3ee07108 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -687,10 +687,10 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) switch (obj->base.write_domain) { case I915_GEM_DOMAIN_GTT: - if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) { + if (!HAS_LLC(dev_priv)) { intel_runtime_pm_get(dev_priv); spin_lock_irq(&dev_priv->uncore.lock); - POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); + POSTING_READ_FW(RING_HEAD(dev_priv->engine[RCS]->mmio_base)); spin_unlock_irq(&dev_priv->uncore.lock); intel_runtime_pm_put(dev_priv); } -- GitLab From 57d6f87ac3619b980248972b386a8bfd8f045275 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 4 Jun 2018 13:41:42 -0700 Subject: [PATCH 0495/2269] net/ipv6: prevent use after free in ip6_route_mpath_notify commit f7225172f25aaf0dfd9ad65f05be8da5d6108b12 upstream. syzbot reported a use-after-free: BUG: KASAN: use-after-free in ip6_route_mpath_notify+0xe9/0x100 net/ipv6/route.c:4180 Read of size 4 at addr ffff8801bf789cf0 by task syz-executor756/4555 CPU: 1 PID: 4555 Comm: syz-executor756 Not tainted 4.17.0-rc7+ #78 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1b9/0x294 lib/dump_stack.c:113 print_address_description+0x6c/0x20b mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412 __asan_report_load4_noabort+0x14/0x20 mm/kasan/report.c:432 ip6_route_mpath_notify+0xe9/0x100 net/ipv6/route.c:4180 ip6_route_multipath_add+0x615/0x1910 net/ipv6/route.c:4303 inet6_rtm_newroute+0xe3/0x160 net/ipv6/route.c:4391 ... Allocated by task 4555: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc+0xc4/0xe0 mm/kasan/kasan.c:553 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:490 kmem_cache_alloc+0x12e/0x760 mm/slab.c:3554 dst_alloc+0xbb/0x1d0 net/core/dst.c:104 __ip6_dst_alloc+0x35/0xa0 net/ipv6/route.c:361 ip6_dst_alloc+0x29/0xb0 net/ipv6/route.c:376 ip6_route_info_create+0x4d4/0x3a30 net/ipv6/route.c:2834 ip6_route_multipath_add+0xc7e/0x1910 net/ipv6/route.c:4240 inet6_rtm_newroute+0xe3/0x160 net/ipv6/route.c:4391 ... Freed by task 4555: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x11a/0x170 mm/kasan/kasan.c:521 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 __cache_free mm/slab.c:3498 [inline] kmem_cache_free+0x86/0x2d0 mm/slab.c:3756 dst_destroy+0x267/0x3c0 net/core/dst.c:140 dst_release_immediate+0x71/0x9e net/core/dst.c:205 fib6_add+0xa40/0x1650 net/ipv6/ip6_fib.c:1305 __ip6_ins_rt+0x6c/0x90 net/ipv6/route.c:1011 ip6_route_multipath_add+0x513/0x1910 net/ipv6/route.c:4267 inet6_rtm_newroute+0xe3/0x160 net/ipv6/route.c:4391 ... The problem is that rt_last can point to a deleted route if the insert fails. One reproducer is to insert a route and then add a multipath route that has a duplicate nexthop.e.g,: $ ip -6 ro add vrf red 2001:db8:101::/64 nexthop via 2001:db8:1::2 $ ip -6 ro append vrf red 2001:db8:101::/64 nexthop via 2001:db8:1::4 nexthop via 2001:db8:1::2 Fix by not setting rt_last until the it is verified the insert succeeded. Backport Note: - Upstream has replaced rt6_info usage with fib6_info in 8d1c802b281 ("net/ipv6: Flip FIB entries to fib6_info") - fib6_info_release was introduced upstream in 93531c674315 ("net/ipv6: separate handling of FIB entries from dst based routes"), but is not present in stable kernels; 4.14.y relies on dst_release/ ip6_rt_put/dst_release_immediate. Fixes: 3b1137fe7482 ("net: ipv6: Change notifications for multipath add to RTA_MULTIPATH") Cc: Eric Dumazet Reported-by: syzbot Signed-off-by: David Ahern Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Zubin Mithra Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 60efd326014bc..30204bc2fc480 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3239,11 +3239,16 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, err_nh = NULL; list_for_each_entry(nh, &rt6_nh_list, next) { - rt_last = nh->rt6_info; err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack); - /* save reference to first route for notification */ - if (!rt_notif && !err) - rt_notif = nh->rt6_info; + + if (!err) { + /* save reference to last route successfully inserted */ + rt_last = nh->rt6_info; + + /* save reference to first route for notification */ + if (!rt_notif) + rt_notif = nh->rt6_info; + } /* nh->rt6_info is used or freed at this point, reset to NULL*/ nh->rt6_info = NULL; -- GitLab From 0d99649c5eeac23cb9bd5f473170e922f0bf693f Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 31 Jan 2018 16:26:27 +0900 Subject: [PATCH 0496/2269] e1000e: Remove Other from EIAC commit 745d0bd3af99ccc8c5f5822f808cd133eadad6ac upstream. It was reported that emulated e1000e devices in vmware esxi 6.5 Build 7526125 do not link up after commit 4aea7a5c5e94 ("e1000e: Avoid receiver overrun interrupt bursts", v4.15-rc1). Some tracing shows that after e1000e_trigger_lsc() is called, ICR reads out as 0x0 in e1000_msix_other() on emulated e1000e devices. In comparison, on real e1000e 82574 hardware, icr=0x80000004 (_INT_ASSERTED | _LSC) in the same situation. Some experimentation showed that this flaw in vmware e1000e emulation can be worked around by not setting Other in EIAC. This is how it was before 16ecba59bc33 ("e1000e: Do not read ICR in Other interrupt", v4.5-rc1). Fixes: 4aea7a5c5e94 ("e1000e: Avoid receiver overrun interrupt bursts") Signed-off-by: Benjamin Poirier Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/e1000e/netdev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 6265ce8915b66..22bcaf502a3e9 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1914,6 +1914,8 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) bool enable = true; icr = er32(ICR); + ew32(ICR, E1000_ICR_OTHER); + if (icr & E1000_ICR_RXO) { ew32(ICR, E1000_ICR_RXO); enable = false; @@ -2036,7 +2038,6 @@ static void e1000_configure_msix(struct e1000_adapter *adapter) hw->hw_addr + E1000_EITR_82574(vector)); else writel(1, hw->hw_addr + E1000_EITR_82574(vector)); - adapter->eiac_mask |= E1000_IMS_OTHER; /* Cause Tx interrupts on every write back */ ivar |= BIT(31); @@ -2261,7 +2262,7 @@ static void e1000_irq_enable(struct e1000_adapter *adapter) if (adapter->msix_entries) { ew32(EIAC_82574, adapter->eiac_mask & E1000_EIAC_MASK_82574); - ew32(IMS, adapter->eiac_mask | E1000_IMS_LSC); + ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER | E1000_IMS_LSC); } else if (hw->mac.type >= e1000_pch_lpt) { ew32(IMS, IMS_ENABLE_MASK | E1000_IMS_ECCER); } else { -- GitLab From 5239be076587d82e722cd7025b3dbe5bc614ecaf Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Thu, 8 Feb 2018 15:47:12 +0900 Subject: [PATCH 0497/2269] Partial revert "e1000e: Avoid receiver overrun interrupt bursts" commit 1f0ea19722ef9dfa229a9540f70b8d1c34a98a6a upstream. This partially reverts commit 4aea7a5c5e940c1723add439f4088844cd26196d. We keep the fix for the first part of the problem (1) described in the log of that commit, that is to read ICR in the other interrupt handler. We remove the fix for the second part of the problem (2), Other interrupt throttling. Bursts of "Other" interrupts may once again occur during rxo (receive overflow) traffic conditions. This is deemed acceptable in the interest of avoiding unforeseen fallout from changes that are not strictly necessary. As discussed, the e1000e driver should be in "maintenance mode". Link: https://www.spinics.net/lists/netdev/msg480675.html Signed-off-by: Benjamin Poirier Acked-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/e1000e/netdev.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 22bcaf502a3e9..6220b1331c2e6 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1911,21 +1911,10 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; u32 icr; - bool enable = true; icr = er32(ICR); ew32(ICR, E1000_ICR_OTHER); - if (icr & E1000_ICR_RXO) { - ew32(ICR, E1000_ICR_RXO); - enable = false; - /* napi poll will re-enable Other, make sure it runs */ - if (napi_schedule_prep(&adapter->napi)) { - adapter->total_rx_bytes = 0; - adapter->total_rx_packets = 0; - __napi_schedule(&adapter->napi); - } - } if (icr & E1000_ICR_LSC) { ew32(ICR, E1000_ICR_LSC); hw->mac.get_link_status = true; @@ -1934,7 +1923,7 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } - if (enable && !test_bit(__E1000_DOWN, &adapter->state)) + if (!test_bit(__E1000_DOWN, &adapter->state)) ew32(IMS, E1000_IMS_OTHER); return IRQ_HANDLED; @@ -2704,8 +2693,7 @@ static int e1000e_poll(struct napi_struct *napi, int weight) napi_complete_done(napi, work_done); if (!test_bit(__E1000_DOWN, &adapter->state)) { if (adapter->msix_entries) - ew32(IMS, adapter->rx_ring->ims_val | - E1000_IMS_OTHER); + ew32(IMS, adapter->rx_ring->ims_val); else e1000_irq_enable(adapter); } -- GitLab From dd5456adab14a89c660508c01f7c736f6c6bebd4 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Thu, 8 Feb 2018 15:47:13 +0900 Subject: [PATCH 0498/2269] e1000e: Fix queue interrupt re-raising in Other interrupt commit 361a954e6a7215de11a6179ad9bdc07d7e394b04 upstream. Restores the ICS write for Rx/Tx queue interrupts which was present before commit 16ecba59bc33 ("e1000e: Do not read ICR in Other interrupt", v4.5-rc1) but was not restored in commit 4aea7a5c5e94 ("e1000e: Avoid receiver overrun interrupt bursts", v4.15-rc1). This re-raises the queue interrupts in case the txq or rxq bits were set in ICR and the Other interrupt handler read and cleared ICR before the queue interrupt was raised. Fixes: 4aea7a5c5e94 ("e1000e: Avoid receiver overrun interrupt bursts") Signed-off-by: Benjamin Poirier Acked-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/e1000e/netdev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 6220b1331c2e6..a2fbbba8dc89e 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1915,6 +1915,9 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) icr = er32(ICR); ew32(ICR, E1000_ICR_OTHER); + if (icr & adapter->eiac_mask) + ew32(ICS, (icr & adapter->eiac_mask)); + if (icr & E1000_ICR_LSC) { ew32(ICR, E1000_ICR_LSC); hw->mac.get_link_status = true; -- GitLab From 7a5b2bf66bf8c72f2b0163a249404970bf83d284 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Thu, 8 Feb 2018 15:47:14 +0900 Subject: [PATCH 0499/2269] e1000e: Avoid missed interrupts following ICR read commit 116f4a640b3197401bc93b8adc6c35040308ceff upstream. The 82574 specification update errata 12 states that interrupts may be missed if ICR is read while INT_ASSERTED is not set. Avoid that problem by setting all bits related to events that can trigger the Other interrupt in IMS. The Other interrupt is raised for such events regardless of whether or not they are set in IMS. However, only when they are set is the INT_ASSERTED bit also set in ICR. By doing this, we ensure that INT_ASSERTED is always set when we read ICR in e1000_msix_other() and steer clear of the errata. This also ensures that ICR will automatically be cleared on read, therefore we no longer need to clear bits explicitly. Signed-off-by: Benjamin Poirier Acked-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/e1000e/defines.h | 21 ++++++++++++++++++++- drivers/net/ethernet/intel/e1000e/netdev.c | 11 ++++------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h index afb7ebe20b243..824fd44e25f0d 100644 --- a/drivers/net/ethernet/intel/e1000e/defines.h +++ b/drivers/net/ethernet/intel/e1000e/defines.h @@ -400,6 +400,10 @@ #define E1000_ICR_RXDMT0 0x00000010 /* Rx desc min. threshold (0) */ #define E1000_ICR_RXO 0x00000040 /* Receiver Overrun */ #define E1000_ICR_RXT0 0x00000080 /* Rx timer intr (ring 0) */ +#define E1000_ICR_MDAC 0x00000200 /* MDIO Access Complete */ +#define E1000_ICR_SRPD 0x00010000 /* Small Receive Packet Detected */ +#define E1000_ICR_ACK 0x00020000 /* Receive ACK Frame Detected */ +#define E1000_ICR_MNG 0x00040000 /* Manageability Event Detected */ #define E1000_ICR_ECCER 0x00400000 /* Uncorrectable ECC Error */ /* If this bit asserted, the driver should claim the interrupt */ #define E1000_ICR_INT_ASSERTED 0x80000000 @@ -407,7 +411,7 @@ #define E1000_ICR_RXQ1 0x00200000 /* Rx Queue 1 Interrupt */ #define E1000_ICR_TXQ0 0x00400000 /* Tx Queue 0 Interrupt */ #define E1000_ICR_TXQ1 0x00800000 /* Tx Queue 1 Interrupt */ -#define E1000_ICR_OTHER 0x01000000 /* Other Interrupts */ +#define E1000_ICR_OTHER 0x01000000 /* Other Interrupt */ /* PBA ECC Register */ #define E1000_PBA_ECC_COUNTER_MASK 0xFFF00000 /* ECC counter mask */ @@ -431,12 +435,27 @@ E1000_IMS_RXSEQ | \ E1000_IMS_LSC) +/* These are all of the events related to the OTHER interrupt. + */ +#define IMS_OTHER_MASK ( \ + E1000_IMS_LSC | \ + E1000_IMS_RXO | \ + E1000_IMS_MDAC | \ + E1000_IMS_SRPD | \ + E1000_IMS_ACK | \ + E1000_IMS_MNG) + /* Interrupt Mask Set */ #define E1000_IMS_TXDW E1000_ICR_TXDW /* Transmit desc written back */ #define E1000_IMS_LSC E1000_ICR_LSC /* Link Status Change */ #define E1000_IMS_RXSEQ E1000_ICR_RXSEQ /* Rx sequence error */ #define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* Rx desc min. threshold */ +#define E1000_IMS_RXO E1000_ICR_RXO /* Receiver Overrun */ #define E1000_IMS_RXT0 E1000_ICR_RXT0 /* Rx timer intr */ +#define E1000_IMS_MDAC E1000_ICR_MDAC /* MDIO Access Complete */ +#define E1000_IMS_SRPD E1000_ICR_SRPD /* Small Receive Packet */ +#define E1000_IMS_ACK E1000_ICR_ACK /* Receive ACK Frame Detected */ +#define E1000_IMS_MNG E1000_ICR_MNG /* Manageability Event */ #define E1000_IMS_ECCER E1000_ICR_ECCER /* Uncorrectable ECC Error */ #define E1000_IMS_RXQ0 E1000_ICR_RXQ0 /* Rx Queue 0 Interrupt */ #define E1000_IMS_RXQ1 E1000_ICR_RXQ1 /* Rx Queue 1 Interrupt */ diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index a2fbbba8dc89e..d601560b300cf 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1910,16 +1910,12 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) struct net_device *netdev = data; struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - u32 icr; - - icr = er32(ICR); - ew32(ICR, E1000_ICR_OTHER); + u32 icr = er32(ICR); if (icr & adapter->eiac_mask) ew32(ICS, (icr & adapter->eiac_mask)); if (icr & E1000_ICR_LSC) { - ew32(ICR, E1000_ICR_LSC); hw->mac.get_link_status = true; /* guard against interrupt when we're going down */ if (!test_bit(__E1000_DOWN, &adapter->state)) @@ -1927,7 +1923,7 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) } if (!test_bit(__E1000_DOWN, &adapter->state)) - ew32(IMS, E1000_IMS_OTHER); + ew32(IMS, E1000_IMS_OTHER | IMS_OTHER_MASK); return IRQ_HANDLED; } @@ -2254,7 +2250,8 @@ static void e1000_irq_enable(struct e1000_adapter *adapter) if (adapter->msix_entries) { ew32(EIAC_82574, adapter->eiac_mask & E1000_EIAC_MASK_82574); - ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER | E1000_IMS_LSC); + ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER | + IMS_OTHER_MASK); } else if (hw->mac.type >= e1000_pch_lpt) { ew32(IMS, IMS_ENABLE_MASK | E1000_IMS_ECCER); } else { -- GitLab From dc2aa33a101d4fae88272819029a14c603d1168a Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Tue, 6 Mar 2018 10:55:52 +0900 Subject: [PATCH 0500/2269] Revert "e1000e: Separate signaling for link check/link up" commit 3016e0a0c91246e55418825ba9aae271be267522 upstream. This reverts commit 19110cfbb34d4af0cdfe14cd243f3b09dc95b013. This reverts commit 4110e02eb45ea447ec6f5459c9934de0a273fb91. This reverts commit d3604515c9eda464a92e8e67aae82dfe07fe3c98. Commit 19110cfbb34d ("e1000e: Separate signaling for link check/link up") changed what happens to the link status when there is an error which happens after "get_link_status = false" in the copper check_for_link callbacks. Previously, such an error would be ignored and the link considered up. After that commit, any error implies that the link is down. Revert commit 19110cfbb34d ("e1000e: Separate signaling for link check/link up") and its followups. After reverting, the race condition described in the log of commit 19110cfbb34d is reintroduced. It may still be triggered by LSC events but this should keep the link down in case the link is electrically unstable, as discussed. The race may no longer be triggered by RXO events because commit 4aea7a5c5e94 ("e1000e: Avoid receiver overrun interrupt bursts") restored reading icr in the Other handler. Link: https://lkml.org/lkml/2018/3/1/789 Signed-off-by: Benjamin Poirier Acked-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 13 ++++--------- drivers/net/ethernet/intel/e1000e/mac.c | 13 ++++--------- drivers/net/ethernet/intel/e1000e/netdev.c | 2 +- 3 files changed, 9 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index ff308b05d68cc..d6d4ed7acf031 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1367,9 +1367,6 @@ out: * Checks to see of the link status of the hardware has changed. If a * change in link status has been detected, then we read the PHY registers * to get the current speed/duplex if link exists. - * - * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link - * up). **/ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) { @@ -1385,7 +1382,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * Change or Rx Sequence Error interrupt. */ if (!mac->get_link_status) - return 1; + return 0; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex @@ -1602,7 +1599,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * we have already determined whether we have link or not. */ if (!mac->autoneg) - return 1; + return -E1000_ERR_CONFIG; /* Auto-Neg is enabled. Auto Speed Detection takes care * of MAC speed/duplex configuration. So we only need to @@ -1616,12 +1613,10 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * different link partner. */ ret_val = e1000e_config_fc_after_link_up(hw); - if (ret_val) { + if (ret_val) e_dbg("Error configuring flow control\n"); - return ret_val; - } - return 1; + return ret_val; } static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c index db735644b3121..b322011ec2828 100644 --- a/drivers/net/ethernet/intel/e1000e/mac.c +++ b/drivers/net/ethernet/intel/e1000e/mac.c @@ -410,9 +410,6 @@ void e1000e_clear_hw_cntrs_base(struct e1000_hw *hw) * Checks to see of the link status of the hardware has changed. If a * change in link status has been detected, then we read the PHY registers * to get the current speed/duplex if link exists. - * - * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link - * up). **/ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) { @@ -426,7 +423,7 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) * Change or Rx Sequence Error interrupt. */ if (!mac->get_link_status) - return 1; + return 0; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex @@ -450,7 +447,7 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) * we have already determined whether we have link or not. */ if (!mac->autoneg) - return 1; + return -E1000_ERR_CONFIG; /* Auto-Neg is enabled. Auto Speed Detection takes care * of MAC speed/duplex configuration. So we only need to @@ -464,12 +461,10 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) * different link partner. */ ret_val = e1000e_config_fc_after_link_up(hw); - if (ret_val) { + if (ret_val) e_dbg("Error configuring flow control\n"); - return ret_val; - } - return 1; + return ret_val; } /** diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index d601560b300cf..a25dc581a9030 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5089,7 +5089,7 @@ static bool e1000e_has_link(struct e1000_adapter *adapter) case e1000_media_type_copper: if (hw->mac.get_link_status) { ret_val = hw->mac.ops.check_for_link(hw); - link_active = ret_val > 0; + link_active = !hw->mac.get_link_status; } else { link_active = true; } -- GitLab From da2b4ae736e8800b639103edb8a7a769f0806f87 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Tue, 6 Mar 2018 10:55:53 +0900 Subject: [PATCH 0501/2269] e1000e: Fix link check race condition commit e2710dbf0dc1e37d85368e2404049dadda848d5a upstream. Alex reported the following race condition: /* link goes up... interrupt... schedule watchdog */ \ e1000_watchdog_task \ e1000e_has_link \ hw->mac.ops.check_for_link() === e1000e_check_for_copper_link \ e1000e_phy_has_link_generic(..., &link) link = true /* link goes down... interrupt */ \ e1000_msix_other hw->mac.get_link_status = true /* link is up */ mac->get_link_status = false link_active = true /* link_active is true, wrongly, and stays so because * get_link_status is false */ Avoid this problem by making sure that we don't set get_link_status = false after having checked the link. It seems this problem has been present since the introduction of e1000e. Link: https://lkml.org/lkml/2018/1/29/338 Reported-by: Alexander Duyck Signed-off-by: Benjamin Poirier Acked-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 31 +++++++++++---------- drivers/net/ethernet/intel/e1000e/mac.c | 14 +++++----- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index d6d4ed7acf031..1dddfb7b2de6c 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1383,6 +1383,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) */ if (!mac->get_link_status) return 0; + mac->get_link_status = false; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex @@ -1390,12 +1391,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) */ ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link); if (ret_val) - return ret_val; + goto out; if (hw->mac.type == e1000_pchlan) { ret_val = e1000_k1_gig_workaround_hv(hw, link); if (ret_val) - return ret_val; + goto out; } /* When connected at 10Mbps half-duplex, some parts are excessively @@ -1428,7 +1429,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) ret_val = hw->phy.ops.acquire(hw); if (ret_val) - return ret_val; + goto out; if (hw->mac.type == e1000_pch2lan) emi_addr = I82579_RX_CONFIG; @@ -1450,7 +1451,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) hw->phy.ops.release(hw); if (ret_val) - return ret_val; + goto out; if (hw->mac.type >= e1000_pch_spt) { u16 data; @@ -1459,14 +1460,14 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) if (speed == SPEED_1000) { ret_val = hw->phy.ops.acquire(hw); if (ret_val) - return ret_val; + goto out; ret_val = e1e_rphy_locked(hw, PHY_REG(776, 20), &data); if (ret_val) { hw->phy.ops.release(hw); - return ret_val; + goto out; } ptr_gap = (data & (0x3FF << 2)) >> 2; @@ -1480,18 +1481,18 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) } hw->phy.ops.release(hw); if (ret_val) - return ret_val; + goto out; } else { ret_val = hw->phy.ops.acquire(hw); if (ret_val) - return ret_val; + goto out; ret_val = e1e_wphy_locked(hw, PHY_REG(776, 20), 0xC023); hw->phy.ops.release(hw); if (ret_val) - return ret_val; + goto out; } } @@ -1518,7 +1519,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) (hw->adapter->pdev->device == E1000_DEV_ID_PCH_I218_V3)) { ret_val = e1000_k1_workaround_lpt_lp(hw, link); if (ret_val) - return ret_val; + goto out; } if (hw->mac.type >= e1000_pch_lpt) { /* Set platform power management values for @@ -1526,7 +1527,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) */ ret_val = e1000_platform_pm_pch_lpt(hw, link); if (ret_val) - return ret_val; + goto out; } /* Clear link partner's EEE ability */ @@ -1549,9 +1550,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) } if (!link) - return 0; /* No link detected */ - - mac->get_link_status = false; + goto out; switch (hw->mac.type) { case e1000_pch2lan: @@ -1617,6 +1616,10 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) e_dbg("Error configuring flow control\n"); return ret_val; + +out: + mac->get_link_status = true; + return ret_val; } static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c index b322011ec2828..5bdc3a2d4fd70 100644 --- a/drivers/net/ethernet/intel/e1000e/mac.c +++ b/drivers/net/ethernet/intel/e1000e/mac.c @@ -424,19 +424,15 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) */ if (!mac->get_link_status) return 0; + mac->get_link_status = false; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex * of the PHY. */ ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link); - if (ret_val) - return ret_val; - - if (!link) - return 0; /* No link detected */ - - mac->get_link_status = false; + if (ret_val || !link) + goto out; /* Check if there was DownShift, must be checked * immediately after link-up @@ -465,6 +461,10 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) e_dbg("Error configuring flow control\n"); return ret_val; + +out: + mac->get_link_status = true; + return ret_val; } /** -- GitLab From 9884746ef5c21a9e7aeff6774825359796ef5cf3 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Tue, 20 Feb 2018 15:12:00 +0900 Subject: [PATCH 0502/2269] e1000e: Fix check_for_link return value with autoneg off commit 4e7dc08e57c95673d2edaba8983c3de4dd1f65f5 upstream. When autoneg is off, the .check_for_link callback functions clear the get_link_status flag and systematically return a "pseudo-error". This means that the link is not detected as up until the next execution of the e1000_watchdog_task() 2 seconds later. Fixes: 19110cfbb34d ("e1000e: Separate signaling for link check/link up") Signed-off-by: Benjamin Poirier Acked-by: Sasha Neftin Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 2 +- drivers/net/ethernet/intel/e1000e/mac.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 1dddfb7b2de6c..00eedf202e62d 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1598,7 +1598,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * we have already determined whether we have link or not. */ if (!mac->autoneg) - return -E1000_ERR_CONFIG; + return 1; /* Auto-Neg is enabled. Auto Speed Detection takes care * of MAC speed/duplex configuration. So we only need to diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c index 5bdc3a2d4fd70..48cc945fc8b0f 100644 --- a/drivers/net/ethernet/intel/e1000e/mac.c +++ b/drivers/net/ethernet/intel/e1000e/mac.c @@ -443,7 +443,7 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) * we have already determined whether we have link or not. */ if (!mac->autoneg) - return -E1000_ERR_CONFIG; + return 1; /* Auto-Neg is enabled. Auto Speed Detection takes care * of MAC speed/duplex configuration. So we only need to -- GitLab From 2cc4d365363b1fb681b8231adcf4a8f80082506c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 26 Sep 2018 08:38:16 +0200 Subject: [PATCH 0503/2269] Linux 4.14.72 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index dd4eaeeb2050d..734722bda1736 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 71 +SUBLEVEL = 72 EXTRAVERSION = NAME = Petit Gorille -- GitLab From e709f59a8cd2405f59451073c58b3e335c643b27 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Wed, 26 Sep 2018 13:48:19 -0700 Subject: [PATCH 0504/2269] ANDROID: restrict store of prefer_idle as boolean It works as boolean so stores like a boolean too. Bug: 116734731 Test: Set stune Change-Id: I0daa3cc1723d009ed5bc2a71fa1c2e3d4ece6a7f Signed-off-by: Wei Wang --- kernel/sched/tune.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index 765dcd46d468e..74a45606dc8c1 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -455,7 +455,7 @@ prefer_idle_write(struct cgroup_subsys_state *css, struct cftype *cft, u64 prefer_idle) { struct schedtune *st = css_st(css); - st->prefer_idle = prefer_idle; + st->prefer_idle = !!prefer_idle; return 0; } -- GitLab From 13a47054f0b2f00c0f916387a1d4cf5402848add Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 13 Sep 2018 16:43:07 +0200 Subject: [PATCH 0505/2269] gso_segment: Reset skb->mac_len after modifying network header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c56cae23c6b167acc68043c683c4573b80cbcc2c ] When splitting a GSO segment that consists of encapsulated packets, the skb->mac_len of the segments can end up being set wrong, causing packet drops in particular when using act_mirred and ifb interfaces in combination with a qdisc that splits GSO packets. This happens because at the time skb_segment() is called, network_header will point to the inner header, throwing off the calculation in skb_reset_mac_len(). The network_header is subsequently adjust by the outer IP gso_segment handlers, but they don't set the mac_len. Fix this by adding skb_reset_mac_len() calls to both the IPv4 and IPv6 gso_segment handlers, after they modify the network_header. Many thanks to Eric Dumazet for his help in identifying the cause of the bug. Acked-by: Dave Taht Reviewed-by: Eric Dumazet Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/af_inet.c | 1 + net/ipv6/ip6_offload.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index b9d9a2b8792c7..f31c09873d0f8 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1307,6 +1307,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, if (encap) skb_reset_inner_headers(skb); skb->network_header = (u8 *)iph - skb->head; + skb_reset_mac_len(skb); } while ((skb = skb->next)); out: diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 4a87f9428ca51..e3698b6d82313 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -113,6 +113,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, payload_len = skb->len - nhoff - sizeof(*ipv6h); ipv6h->payload_len = htons(payload_len); skb->network_header = (u8 *)ipv6h - skb->head; + skb_reset_mac_len(skb); if (udpfrag) { int err = ip6_find_1stfragopt(skb, &prevhdr); -- GitLab From bba90d3686fd0e2de7b058fbbf2d314527a20d49 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 14 Sep 2018 12:02:31 -0700 Subject: [PATCH 0506/2269] ipv6: fix possible use-after-free in ip6_xmit() [ Upstream commit bbd6528d28c1b8e80832b3b018ec402b6f5c3215 ] In the unlikely case ip6_xmit() has to call skb_realloc_headroom(), we need to call skb_set_owner_w() before consuming original skb, otherwise we risk a use-after-free. Bring IPv6 in line with what we do in IPv4 to fix this. Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_output.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1da021527fcd1..9ab1e0fcbc13c 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -219,12 +219,10 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, kfree_skb(skb); return -ENOBUFS; } + if (skb->sk) + skb_set_owner_w(skb2, skb->sk); consume_skb(skb); skb = skb2; - /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically, - * it is safe to call in our context (socket lock not held) - */ - skb_set_owner_w(skb, (struct sock *)sk); } if (opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); -- GitLab From 9951e17efd05b8ad49bbc0db2e6b483f95d94d86 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Wed, 12 Sep 2018 07:36:35 +0200 Subject: [PATCH 0507/2269] net/appletalk: fix minor pointer leak to userspace in SIOCFINDIPDDPRT [ Upstream commit 9824dfae5741275473a23a7ed5756c7b6efacc9d ] Fields ->dev and ->next of struct ipddp_route may be copied to userspace on the SIOCFINDIPDDPRT ioctl. This is only accessible to CAP_NET_ADMIN though. Let's manually copy the relevant fields instead of using memcpy(). BugLink: http://blog.infosectcbr.com.au/2018/09/linux-kernel-infoleaks.html Cc: Jann Horn Signed-off-by: Willy Tarreau Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/appletalk/ipddp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c index 9375cef224205..3d27616d9c855 100644 --- a/drivers/net/appletalk/ipddp.c +++ b/drivers/net/appletalk/ipddp.c @@ -283,8 +283,12 @@ static int ipddp_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) case SIOCFINDIPDDPRT: spin_lock_bh(&ipddp_route_lock); rp = __ipddp_find_route(&rcp); - if (rp) - memcpy(&rcp2, rp, sizeof(rcp2)); + if (rp) { + memset(&rcp2, 0, sizeof(rcp2)); + rcp2.ip = rp->ip; + rcp2.at = rp->at; + rcp2.flags = rp->flags; + } spin_unlock_bh(&ipddp_route_lock); if (rp) { -- GitLab From c0f2c063abc8a9ce8e116ad4236686234c709e9f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 14 Sep 2018 17:39:53 +0100 Subject: [PATCH 0508/2269] net: hp100: fix always-true check for link up state [ Upstream commit a7f38002fb69b44f8fc622ecb838665d0b8666af ] The operation ~(p100_inb(VG_LAN_CFG_1) & HP100_LINK_UP) returns a value that is always non-zero and hence the wait for the link to drop always terminates prematurely. Fix this by using a logical not operator instead of a bitwise complement. This issue has been in the driver since pre-2.6.12-rc2. Detected by CoverityScan, CID#114157 ("Logical vs. bitwise operator") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/hp/hp100.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hp/hp100.c b/drivers/net/ethernet/hp/hp100.c index c8c7ad2eff77e..9b5a68b654328 100644 --- a/drivers/net/ethernet/hp/hp100.c +++ b/drivers/net/ethernet/hp/hp100.c @@ -2634,7 +2634,7 @@ static int hp100_login_to_vg_hub(struct net_device *dev, u_short force_relogin) /* Wait for link to drop */ time = jiffies + (HZ / 10); do { - if (~(hp100_inb(VG_LAN_CFG_1) & HP100_LINK_UP_ST)) + if (!(hp100_inb(VG_LAN_CFG_1) & HP100_LINK_UP_ST)) break; if (!in_interrupt()) schedule_timeout_interruptible(1); -- GitLab From f3aa1f3a1113fc85ccb8e625055154f026c47396 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 14 Sep 2018 16:28:05 +0200 Subject: [PATCH 0509/2269] pppoe: fix reception of frames with no mac header [ Upstream commit 8540827ebac6b654ab2f69c8fbce9e4fbd6304a0 ] pppoe_rcv() needs to look back at the Ethernet header in order to lookup the PPPoE session. Therefore we need to ensure that the mac header is big enough to contain an Ethernet header. Otherwise eth_hdr(skb)->h_source might access invalid data. ================================================================== BUG: KMSAN: uninit-value in __get_item drivers/net/ppp/pppoe.c:172 [inline] BUG: KMSAN: uninit-value in get_item drivers/net/ppp/pppoe.c:236 [inline] BUG: KMSAN: uninit-value in pppoe_rcv+0xcef/0x10e0 drivers/net/ppp/pppoe.c:450 CPU: 0 PID: 4543 Comm: syz-executor355 Not tainted 4.16.0+ #87 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:53 kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067 __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:683 __get_item drivers/net/ppp/pppoe.c:172 [inline] get_item drivers/net/ppp/pppoe.c:236 [inline] pppoe_rcv+0xcef/0x10e0 drivers/net/ppp/pppoe.c:450 __netif_receive_skb_core+0x47df/0x4a90 net/core/dev.c:4562 __netif_receive_skb net/core/dev.c:4627 [inline] netif_receive_skb_internal+0x49d/0x630 net/core/dev.c:4701 netif_receive_skb+0x230/0x240 net/core/dev.c:4725 tun_rx_batched drivers/net/tun.c:1555 [inline] tun_get_user+0x740f/0x7c60 drivers/net/tun.c:1962 tun_chr_write_iter+0x1d4/0x330 drivers/net/tun.c:1990 call_write_iter include/linux/fs.h:1782 [inline] new_sync_write fs/read_write.c:469 [inline] __vfs_write+0x7fb/0x9f0 fs/read_write.c:482 vfs_write+0x463/0x8d0 fs/read_write.c:544 SYSC_write+0x172/0x360 fs/read_write.c:589 SyS_write+0x55/0x80 fs/read_write.c:581 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x4447c9 RSP: 002b:00007fff64c8fc28 EFLAGS: 00000297 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00000000004447c9 RDX: 000000000000fd87 RSI: 0000000020000600 RDI: 0000000000000004 RBP: 00000000006cf018 R08: 00007fff64c8fda8 R09: 00007fff00006bda R10: 0000000000005fe7 R11: 0000000000000297 R12: 00000000004020d0 R13: 0000000000402160 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline] kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188 kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314 kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321 slab_post_alloc_hook mm/slab.h:445 [inline] slab_alloc_node mm/slub.c:2737 [inline] __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369 __kmalloc_reserve net/core/skbuff.c:138 [inline] __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206 alloc_skb include/linux/skbuff.h:984 [inline] alloc_skb_with_frags+0x1d4/0xb20 net/core/skbuff.c:5234 sock_alloc_send_pskb+0xb56/0x1190 net/core/sock.c:2085 tun_alloc_skb drivers/net/tun.c:1532 [inline] tun_get_user+0x2242/0x7c60 drivers/net/tun.c:1829 tun_chr_write_iter+0x1d4/0x330 drivers/net/tun.c:1990 call_write_iter include/linux/fs.h:1782 [inline] new_sync_write fs/read_write.c:469 [inline] __vfs_write+0x7fb/0x9f0 fs/read_write.c:482 vfs_write+0x463/0x8d0 fs/read_write.c:544 SYSC_write+0x172/0x360 fs/read_write.c:589 SyS_write+0x55/0x80 fs/read_write.c:581 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 ================================================================== Fixes: 224cf5ad14c0 ("ppp: Move the PPP drivers") Reported-by: syzbot+f5f6080811c849739212@syzkaller.appspotmail.com Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ppp/pppoe.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 71e2aef6b7a1b..951892da3352e 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -429,6 +429,9 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev, if (!skb) goto out; + if (skb_mac_header_len(skb) < ETH_HLEN) + goto drop; + if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr))) goto drop; -- GitLab From 6f5ec16ee02b4f453796d40ec086e6523877c907 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Mon, 17 Sep 2018 22:00:24 +0200 Subject: [PATCH 0510/2269] qmi_wwan: set DTR for modems in forced USB2 mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 922005c7f50e7f4b2a6dbc182e9c575b4f92396b ] Recent firmware revisions have added the ability to force these modems to USB2 mode, hiding their SuperSpeed capabilities from the host. The driver has been using the SuperSpeed capability, as shown by the bcdUSB field of the device descriptor, to detect the need to enable the DTR quirk. This method fails when the modems are forced to USB2 mode by the modem firmware. Fix by unconditionally enabling the DTR quirk for the affected device IDs. Reported-by: Fred Veldini Reported-by: Deshu Wen Signed-off-by: Bjørn Mork Reported-by: Fred Veldini Reported-by: Deshu Wen Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 31684f3382f61..c5d4b35bb72ae 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1205,13 +1205,13 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1199, 0x9061, 8)}, /* Sierra Wireless Modem */ {QMI_FIXED_INTF(0x1199, 0x9063, 8)}, /* Sierra Wireless EM7305 */ {QMI_FIXED_INTF(0x1199, 0x9063, 10)}, /* Sierra Wireless EM7305 */ - {QMI_FIXED_INTF(0x1199, 0x9071, 8)}, /* Sierra Wireless MC74xx */ - {QMI_FIXED_INTF(0x1199, 0x9071, 10)}, /* Sierra Wireless MC74xx */ - {QMI_FIXED_INTF(0x1199, 0x9079, 8)}, /* Sierra Wireless EM74xx */ - {QMI_FIXED_INTF(0x1199, 0x9079, 10)}, /* Sierra Wireless EM74xx */ - {QMI_FIXED_INTF(0x1199, 0x907b, 8)}, /* Sierra Wireless EM74xx */ - {QMI_FIXED_INTF(0x1199, 0x907b, 10)}, /* Sierra Wireless EM74xx */ - {QMI_FIXED_INTF(0x1199, 0x9091, 8)}, /* Sierra Wireless EM7565 */ + {QMI_QUIRK_SET_DTR(0x1199, 0x9071, 8)}, /* Sierra Wireless MC74xx */ + {QMI_QUIRK_SET_DTR(0x1199, 0x9071, 10)},/* Sierra Wireless MC74xx */ + {QMI_QUIRK_SET_DTR(0x1199, 0x9079, 8)}, /* Sierra Wireless EM74xx */ + {QMI_QUIRK_SET_DTR(0x1199, 0x9079, 10)},/* Sierra Wireless EM74xx */ + {QMI_QUIRK_SET_DTR(0x1199, 0x907b, 8)}, /* Sierra Wireless EM74xx */ + {QMI_QUIRK_SET_DTR(0x1199, 0x907b, 10)},/* Sierra Wireless EM74xx */ + {QMI_QUIRK_SET_DTR(0x1199, 0x9091, 8)}, /* Sierra Wireless EM7565 */ {QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */ {QMI_FIXED_INTF(0x1bbb, 0x0203, 2)}, /* Alcatel L800MA */ {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ -- GitLab From 0f6f77f3b8f46b40da840a409a76bdb5c271e0a5 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 13 Sep 2018 16:27:20 +0200 Subject: [PATCH 0511/2269] udp4: fix IP_CMSG_CHECKSUM for connected sockets [ Upstream commit 2b5a921740a55c00223a797d075b9c77c42cb171 ] commit 2abb7cdc0dc8 ("udp: Add support for doing checksum unnecessary conversion") left out the early demux path for connected sockets. As a result IP_CMSG_CHECKSUM gives wrong values for such socket when GRO is not enabled/available. This change addresses the issue by moving the csum conversion to a common helper and using such helper in both the default and the early demux rx path. Fixes: 2abb7cdc0dc8 ("udp: Add support for doing checksum unnecessary conversion") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/udp.c | 49 ++++++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5752bf7593dc4..3de413867991e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2049,6 +2049,28 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, inet_compute_pseudo); } +/* wrapper for udp_queue_rcv_skb tacking care of csum conversion and + * return code conversion for ip layer consumption + */ +static int udp_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb, + struct udphdr *uh) +{ + int ret; + + if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) + skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check, + inet_compute_pseudo); + + ret = udp_queue_rcv_skb(sk, skb); + + /* a return value > 0 means to resubmit the input, but + * it wants the return to be -protocol, or 0 + */ + if (ret > 0) + return -ret; + return 0; +} + /* * All we need to do is get the socket, and then do a checksum. */ @@ -2095,14 +2117,9 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (unlikely(sk->sk_rx_dst != dst)) udp_sk_rx_dst_set(sk, dst); - ret = udp_queue_rcv_skb(sk, skb); + ret = udp_unicast_rcv_skb(sk, skb, uh); sock_put(sk); - /* a return value > 0 means to resubmit the input, but - * it wants the return to be -protocol, or 0 - */ - if (ret > 0) - return -ret; - return 0; + return ret; } if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) @@ -2110,22 +2127,8 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, saddr, daddr, udptable, proto); sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); - if (sk) { - int ret; - - if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) - skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check, - inet_compute_pseudo); - - ret = udp_queue_rcv_skb(sk, skb); - - /* a return value > 0 means to resubmit the input, but - * it wants the return to be -protocol, or 0 - */ - if (ret > 0) - return -ret; - return 0; - } + if (sk) + return udp_unicast_rcv_skb(sk, skb, uh); if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; -- GitLab From ff64a1a2ca3dd1785386250222e6dcabbadd975d Mon Sep 17 00:00:00 2001 From: Vasily Khoruzhick Date: Thu, 13 Sep 2018 11:12:03 -0700 Subject: [PATCH 0512/2269] neighbour: confirm neigh entries when ARP packet is received [ Upstream commit f0e0d04413fcce9bc76388839099aee93cd0d33b ] Update 'confirmed' timestamp when ARP packet is received. It shouldn't affect locktime logic and anyway entry can be confirmed by any higher-layer protocol. Thus it makes sense to confirm it when ARP packet is received. Fixes: 77d7123342dc ("neighbour: update neigh timestamps iff update is effective") Signed-off-by: Vasily Khoruzhick Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/neighbour.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 514d697d46913..dcb89cbc27302 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1174,6 +1174,12 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, lladdr = neigh->ha; } + /* Update confirmed timestamp for neighbour entry after we + * received ARP packet even if it doesn't change IP to MAC binding. + */ + if (new & NUD_CONNECTED) + neigh->confirmed = jiffies; + /* If entry was valid and address is not changed, do not change entry state, if new one is STALE. */ @@ -1195,15 +1201,12 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, } } - /* Update timestamps only once we know we will make a change to the + /* Update timestamp only once we know we will make a change to the * neighbour entry. Otherwise we risk to move the locktime window with * noop updates and ignore relevant ARP updates. */ - if (new != old || lladdr != neigh->ha) { - if (new & NUD_CONNECTED) - neigh->confirmed = jiffies; + if (new != old || lladdr != neigh->ha) neigh->updated = jiffies; - } if (new != old) { neigh_del_timer(neigh); -- GitLab From b13f721a3409594ec91ec0d06eeef228175864a6 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 13 Sep 2018 16:27:21 +0200 Subject: [PATCH 0513/2269] udp6: add missing checks on edumux packet processing [ Upstream commit eb63f2964dbe36f26deac77d3016791675821ded ] Currently the UDPv6 early demux rx code path lacks some mandatory checks, already implemented into the normal RX code path - namely the checksum conversion and no_check6_rx check. Similar to the previous commit, we move the common processing to an UDPv6 specific helper and call it from both edemux code path and normal code path. In respect to the UDPv4, we need to add an explicit check for non zero csum according to no_check6_rx value. Reported-by: Jianlin Shi Suggested-by: Xin Long Fixes: c9f2c1ae123a ("udp6: fix socket leak on early demux") Fixes: 2abb7cdc0dc8 ("udp: Add support for doing checksum unnecessary conversion") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/udp.c | 65 ++++++++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 330d5ea8451b5..5cee941ab0a9c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -780,6 +780,28 @@ static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) } } +/* wrapper for udp_queue_rcv_skb tacking care of csum conversion and + * return code conversion for ip layer consumption + */ +static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb, + struct udphdr *uh) +{ + int ret; + + if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) + skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check, + ip6_compute_pseudo); + + ret = udpv6_queue_rcv_skb(sk, skb); + + /* a return value > 0 means to resubmit the input, but + * it wants the return to be -protocol, or 0 + */ + if (ret > 0) + return -ret; + return 0; +} + int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { @@ -831,13 +853,14 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (unlikely(sk->sk_rx_dst != dst)) udp6_sk_rx_dst_set(sk, dst); - ret = udpv6_queue_rcv_skb(sk, skb); - sock_put(sk); + if (!uh->check && !udp_sk(sk)->no_check6_rx) { + sock_put(sk); + goto report_csum_error; + } - /* a return value > 0 means to resubmit the input */ - if (ret > 0) - return ret; - return 0; + ret = udp6_unicast_rcv_skb(sk, skb, uh); + sock_put(sk); + return ret; } /* @@ -850,30 +873,13 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, /* Unicast */ sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk) { - int ret; - - if (!uh->check && !udp_sk(sk)->no_check6_rx) { - udp6_csum_zero_error(skb); - goto csum_error; - } - - if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) - skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check, - ip6_compute_pseudo); - - ret = udpv6_queue_rcv_skb(sk, skb); - - /* a return value > 0 means to resubmit the input */ - if (ret > 0) - return ret; - - return 0; + if (!uh->check && !udp_sk(sk)->no_check6_rx) + goto report_csum_error; + return udp6_unicast_rcv_skb(sk, skb, uh); } - if (!uh->check) { - udp6_csum_zero_error(skb); - goto csum_error; - } + if (!uh->check) + goto report_csum_error; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; @@ -894,6 +900,9 @@ short_packet: ulen, skb->len, daddr, ntohs(uh->dest)); goto discard; + +report_csum_error: + udp6_csum_zero_error(skb); csum_error: __UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); discard: -- GitLab From ee547ed7dee4c888c63bd136e73dfa7c96466064 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 14 Sep 2018 12:03:18 +0200 Subject: [PATCH 0514/2269] net/sched: act_sample: fix NULL dereference in the data path [ Upstream commit 34043d250f51368f214aed7f54c2dc29c819a8c7 ] Matteo reported the following splat, testing the datapath of TC 'sample': BUG: KASAN: null-ptr-deref in tcf_sample_act+0xc4/0x310 Read of size 8 at addr 0000000000000000 by task nc/433 CPU: 0 PID: 433 Comm: nc Not tainted 4.19.0-rc3-kvm #17 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS ?-20180531_142017-buildhw-08.phx2.fedoraproject.org-1.fc28 04/01/2014 Call Trace: kasan_report.cold.6+0x6c/0x2fa tcf_sample_act+0xc4/0x310 ? dev_hard_start_xmit+0x117/0x180 tcf_action_exec+0xa3/0x160 tcf_classify+0xdd/0x1d0 htb_enqueue+0x18e/0x6b0 ? deref_stack_reg+0x7a/0xb0 ? htb_delete+0x4b0/0x4b0 ? unwind_next_frame+0x819/0x8f0 ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 __dev_queue_xmit+0x722/0xca0 ? unwind_get_return_address_ptr+0x50/0x50 ? netdev_pick_tx+0xe0/0xe0 ? save_stack+0x8c/0xb0 ? kasan_kmalloc+0xbe/0xd0 ? __kmalloc_track_caller+0xe4/0x1c0 ? __kmalloc_reserve.isra.45+0x24/0x70 ? __alloc_skb+0xdd/0x2e0 ? sk_stream_alloc_skb+0x91/0x3b0 ? tcp_sendmsg_locked+0x71b/0x15a0 ? tcp_sendmsg+0x22/0x40 ? __sys_sendto+0x1b0/0x250 ? __x64_sys_sendto+0x6f/0x80 ? do_syscall_64+0x5d/0x150 ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 ? __sys_sendto+0x1b0/0x250 ? __x64_sys_sendto+0x6f/0x80 ? do_syscall_64+0x5d/0x150 ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 ip_finish_output2+0x495/0x590 ? ip_copy_metadata+0x2e0/0x2e0 ? skb_gso_validate_network_len+0x6f/0x110 ? ip_finish_output+0x174/0x280 __tcp_transmit_skb+0xb17/0x12b0 ? __tcp_select_window+0x380/0x380 tcp_write_xmit+0x913/0x1de0 ? __sk_mem_schedule+0x50/0x80 tcp_sendmsg_locked+0x49d/0x15a0 ? tcp_rcv_established+0x8da/0xa30 ? tcp_set_state+0x220/0x220 ? clear_user+0x1f/0x50 ? iov_iter_zero+0x1ae/0x590 ? __fget_light+0xa0/0xe0 tcp_sendmsg+0x22/0x40 __sys_sendto+0x1b0/0x250 ? __ia32_sys_getpeername+0x40/0x40 ? _copy_to_user+0x58/0x70 ? poll_select_copy_remaining+0x176/0x200 ? __pollwait+0x1c0/0x1c0 ? ktime_get_ts64+0x11f/0x140 ? kern_select+0x108/0x150 ? core_sys_select+0x360/0x360 ? vfs_read+0x127/0x150 ? kernel_write+0x90/0x90 __x64_sys_sendto+0x6f/0x80 do_syscall_64+0x5d/0x150 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7fefef2b129d Code: ff ff ff ff eb b6 0f 1f 80 00 00 00 00 48 8d 05 51 37 0c 00 41 89 ca 8b 00 85 c0 75 20 45 31 c9 45 31 c0 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 6b f3 c3 66 0f 1f 84 00 00 00 00 00 41 56 41 RSP: 002b:00007fff2f5350c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 000056118d60c120 RCX: 00007fefef2b129d RDX: 0000000000002000 RSI: 000056118d629320 RDI: 0000000000000003 RBP: 000056118d530370 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000002000 R13: 000056118d5c2a10 R14: 000056118d5c2a10 R15: 000056118d5303b8 tcf_sample_act() tried to update its per-cpu stats, but tcf_sample_init() forgot to allocate them, because tcf_idr_create() was called with a wrong value of 'cpustats'. Setting it to true proved to fix the reported crash. Reported-by: Matteo Croce Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR") Fixes: 5c5670fae430 ("net/sched: Introduce sample tc action") Tested-by: Matteo Croce Signed-off-by: Davide Caratti Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_sample.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 53752b9327d02..a859b55d78991 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -64,7 +64,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, if (!exists) { ret = tcf_idr_create(tn, parm->index, est, a, - &act_sample_ops, bind, false); + &act_sample_ops, bind, true); if (ret) return ret; ret = ACT_P_CREATED; -- GitLab From 10cacaf13189711ce5c81222aaae3cd61a5ca848 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 12 Sep 2018 17:44:41 +0200 Subject: [PATCH 0515/2269] tls: don't copy the key out of tls12_crypto_info_aes_gcm_128 [ Upstream commit 7cba09c6d5bc73ebbd25a353742d9ddb7a713b95 ] There's no need to copy the key to an on-stack buffer before calling crypto_aead_setkey(). Fixes: 3c4d7559159b ("tls: kernel TLS support") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_sw.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index b81aa6d7dc45e..7fd5e04755ea9 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -661,7 +661,6 @@ static void tls_sw_free_resources(struct sock *sk) int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx) { - char keyval[TLS_CIPHER_AES_GCM_128_KEY_SIZE]; struct tls_crypto_info *crypto_info; struct tls12_crypto_info_aes_gcm_128 *gcm_128_info; struct tls_sw_context *sw_ctx; @@ -753,9 +752,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx) ctx->push_pending_record = tls_sw_push_pending_record; - memcpy(keyval, gcm_128_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE); - - rc = crypto_aead_setkey(sw_ctx->aead_send, keyval, + rc = crypto_aead_setkey(sw_ctx->aead_send, gcm_128_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE); if (rc) goto free_aead; -- GitLab From 0c0334299a7e085849c83bffe754626ba517d5ee Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 12 Sep 2018 17:44:42 +0200 Subject: [PATCH 0516/2269] tls: zero the crypto information from tls_context before freeing [ Upstream commit 86029d10af18381814881d6cce2dd6872163b59f ] This contains key material in crypto_send_aes_gcm_128 and crypto_recv_aes_gcm_128. Introduce union tls_crypto_context, and replace the two identical unions directly embedded in struct tls_context with it. We can then use this union to clean up the memory in the new tls_ctx_free() function. Fixes: 3c4d7559159b ("tls: kernel TLS support") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/tls.h | 14 ++++++++------ net/tls/tls_main.c | 15 ++++++++++++--- net/tls/tls_sw.c | 2 +- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index 48940a883d9a3..86ed3dd80fe70 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -79,11 +79,13 @@ enum { TLS_PENDING_CLOSED_RECORD }; +union tls_crypto_context { + struct tls_crypto_info info; + struct tls12_crypto_info_aes_gcm_128 aes_gcm_128; +}; + struct tls_context { - union { - struct tls_crypto_info crypto_send; - struct tls12_crypto_info_aes_gcm_128 crypto_send_aes_gcm_128; - }; + union tls_crypto_context crypto_send; void *priv_ctx; @@ -208,8 +210,8 @@ static inline void tls_fill_prepend(struct tls_context *ctx, * size KTLS_DTLS_HEADER_SIZE + KTLS_DTLS_NONCE_EXPLICIT_SIZE */ buf[0] = record_type; - buf[1] = TLS_VERSION_MINOR(ctx->crypto_send.version); - buf[2] = TLS_VERSION_MAJOR(ctx->crypto_send.version); + buf[1] = TLS_VERSION_MINOR(ctx->crypto_send.info.version); + buf[2] = TLS_VERSION_MAJOR(ctx->crypto_send.info.version); /* we can use IV for nonce explicit according to spec */ buf[3] = pkt_len >> 8; buf[4] = pkt_len & 0xFF; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 055b9992d8c78..a66c6a30669c7 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -218,6 +218,15 @@ static void tls_write_space(struct sock *sk) ctx->sk_write_space(sk); } +static void tls_ctx_free(struct tls_context *ctx) +{ + if (!ctx) + return; + + memzero_explicit(&ctx->crypto_send, sizeof(ctx->crypto_send)); + kfree(ctx); +} + static void tls_sk_proto_close(struct sock *sk, long timeout) { struct tls_context *ctx = tls_get_ctx(sk); @@ -246,7 +255,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) kfree(ctx->iv); sk_proto_close = ctx->sk_proto_close; - kfree(ctx); + tls_ctx_free(ctx); release_sock(sk); sk_proto_close(sk, timeout); @@ -274,7 +283,7 @@ static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval, } /* get user crypto info */ - crypto_info = &ctx->crypto_send; + crypto_info = &ctx->crypto_send.info; if (!TLS_CRYPTO_INFO_READY(crypto_info)) { rc = -EBUSY; @@ -371,7 +380,7 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, } /* get user crypto info */ - crypto_info = &ctx->crypto_send; + crypto_info = &ctx->crypto_send.info; /* Currently we don't support set crypto info more than one time */ if (TLS_CRYPTO_INFO_READY(crypto_info)) { diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 7fd5e04755ea9..6ae9ca567d6ca 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -687,7 +687,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx) ctx->priv_ctx = (struct tls_offload_context *)sw_ctx; ctx->free_resources = tls_sw_free_resources; - crypto_info = &ctx->crypto_send; + crypto_info = &ctx->crypto_send.info; switch (crypto_info->cipher_type) { case TLS_CIPHER_AES_GCM_128: { nonce_size = TLS_CIPHER_AES_GCM_128_IV_SIZE; -- GitLab From 18fef87e05d3569979bfce16ad7284259f60b52c Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 12 Sep 2018 17:44:43 +0200 Subject: [PATCH 0517/2269] tls: clear key material from kernel memory when do_tls_setsockopt_conf fails [ Upstream commit c844eb46b7d43c2cf760169df5ae1d5b033af338 ] Fixes: 3c4d7559159b ("tls: kernel TLS support") Signed-off-by: Sabrina Dubroca Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index a66c6a30669c7..efa2cdba99d30 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -425,7 +425,7 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, goto out; err_crypto_info: - memset(crypto_info, 0, sizeof(*crypto_info)); + memzero_explicit(crypto_info, sizeof(union tls_crypto_context)); out: return rc; } -- GitLab From 4a16b3cd084df928ae2b19cf9cf61c406de8a80d Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Mon, 17 Sep 2018 15:51:40 +0200 Subject: [PATCH 0518/2269] NFC: Fix possible memory corruption when handling SHDLC I-Frame commands commit 674d9de02aa7d521ebdf66c3958758bdd9c64e11 upstream. When handling SHDLC I-Frame commands "pipe" field used for indexing into an array should be checked before usage. If left unchecked it might access memory outside of the array of size NFC_HCI_MAX_PIPES(127). Malformed NFC HCI frames could be injected by a malicious NFC device communicating with the device being attacked (remote attack vector), or even by an attacker with physical access to the I2C bus such that they could influence the data transfers on that bus (local attack vector). skb->data is controlled by the attacker and has only been sanitized in the most trivial ways (CRC check), therefore we can consider the create_info struct and all of its members to tainted. 'create_info->pipe' with max value of 255 (uint8) is used to take an offset of the hdev->pipes array of 127 elements which can lead to OOB write. Cc: Samuel Ortiz Cc: Allen Pais Cc: "David S. Miller" Suggested-by: Kevin Deus Signed-off-by: Suren Baghdasaryan Acked-by: Kees Cook Cc: stable Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/nfc/hci/core.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index b740fef0acc5e..6bf14f4f4b428 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -209,6 +209,11 @@ void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, } create_info = (struct hci_create_pipe_resp *)skb->data; + if (create_info->pipe >= NFC_HCI_MAX_PIPES) { + status = NFC_HCI_ANY_E_NOK; + goto exit; + } + /* Save the new created pipe and bind with local gate, * the description for skb->data[3] is destination gate id * but since we received this cmd from host controller, we @@ -232,6 +237,11 @@ void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, } delete_info = (struct hci_delete_pipe_noti *)skb->data; + if (delete_info->pipe >= NFC_HCI_MAX_PIPES) { + status = NFC_HCI_ANY_E_NOK; + goto exit; + } + hdev->pipes[delete_info->pipe].gate = NFC_HCI_INVALID_GATE; hdev->pipes[delete_info->pipe].dest_host = NFC_HCI_INVALID_HOST; break; -- GitLab From 6ead7a8a4ec1c69c39951d96ba397010c7d14e26 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Mon, 17 Sep 2018 15:51:41 +0200 Subject: [PATCH 0519/2269] NFC: Fix the number of pipes commit e285d5bfb7e9785d289663baef252dd315e171f8 upstream. According to ETSI TS 102 622 specification chapter 4.4 pipe identifier is 7 bits long which allows for 128 unique pipe IDs. Because NFC_HCI_MAX_PIPES is used as the number of pipes supported and not as the max pipe ID, its value should be 128 instead of 127. nfc_hci_recv_from_llc extracts pipe ID from packet header using NFC_HCI_FRAGMENT(0x7F) mask which allows for pipe ID value of 127. Same happens when NCI_HCP_MSG_GET_PIPE() is being used. With pipes array having only 127 elements and pipe ID of 127 the OOB memory access will result. Cc: Samuel Ortiz Cc: Allen Pais Cc: "David S. Miller" Suggested-by: Dan Carpenter Signed-off-by: Suren Baghdasaryan Reviewed-by: Kees Cook Cc: stable Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/nfc/hci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h index 316694dafa5ba..008f466d1da7e 100644 --- a/include/net/nfc/hci.h +++ b/include/net/nfc/hci.h @@ -87,7 +87,7 @@ struct nfc_hci_pipe { * According to specification 102 622 chapter 4.4 Pipes, * the pipe identifier is 7 bits long. */ -#define NFC_HCI_MAX_PIPES 127 +#define NFC_HCI_MAX_PIPES 128 struct nfc_hci_init_data { u8 gate_count; struct nfc_hci_gate gates[NFC_HCI_MAX_CUSTOM_GATES]; -- GitLab From a388e6d7a822e1a6c87e25d90bb744b34c44ca34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Szymanski?= Date: Thu, 6 Sep 2018 11:16:00 +0200 Subject: [PATCH 0520/2269] ASoC: cs4265: fix MMTLR Data switch control MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 90a3b7f8aba3011badacd6d8121e03aa24ac79d1 upstream. The MMTLR bit is in the CS4265_SPDIF_CTL2 register at address 0x12 bit 0 and not at address 0x0 bit 1. Fix this. Signed-off-by: Sébastien Szymanski Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/cs4265.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/cs4265.c b/sound/soc/codecs/cs4265.c index fd966bb851cbc..6e8eb1f5a0415 100644 --- a/sound/soc/codecs/cs4265.c +++ b/sound/soc/codecs/cs4265.c @@ -157,8 +157,8 @@ static const struct snd_kcontrol_new cs4265_snd_controls[] = { SOC_SINGLE("Validity Bit Control Switch", CS4265_SPDIF_CTL2, 3, 1, 0), SOC_ENUM("SPDIF Mono/Stereo", spdif_mono_stereo_enum), - SOC_SINGLE("MMTLR Data Switch", 0, - 1, 1, 0), + SOC_SINGLE("MMTLR Data Switch", CS4265_SPDIF_CTL2, + 0, 1, 0), SOC_ENUM("Mono Channel Select", spdif_mono_select_enum), SND_SOC_BYTES("C Data Buffer", CS4265_C_DATA_BUFF, 24), }; -- GitLab From c7cf0304d41f68b47b3eaf6ef2f8ca9c3debfefc Mon Sep 17 00:00:00 2001 From: Jiada Wang Date: Mon, 3 Sep 2018 07:08:58 +0000 Subject: [PATCH 0521/2269] ASoC: rsnd: fixup not to call clk_get/set under non-atomic commit 4d230d12710646788af581ba0155d83ab48b955c upstream. Clocking operations clk_get/set_rate, are non-atomic, they shouldn't be called in soc_pcm_trigger() which is atomic. Following issue was found due to execution of clk_get_rate() causes sleep in soc_pcm_trigger(), which shouldn't be blocked. We can reproduce this issue by following > enable CONFIG_DEBUG_ATOMIC_SLEEP=y > compile, and boot > mount -t debugfs none /sys/kernel/debug > while true; do cat /sys/kernel/debug/clk/clk_summary > /dev/null; done & > while true; do aplay xxx; done This patch adds support to .prepare callback, and moves non-atomic clocking operations to it. As .prepare is non-atomic, it is always called before trigger_start/trigger_stop. BUG: sleeping function called from invalid context at kernel/locking/mutex.c:620 in_atomic(): 1, irqs_disabled(): 128, pid: 2242, name: aplay INFO: lockdep is turned off. irq event stamp: 5964 hardirqs last enabled at (5963): [] mutex_lock_nested+0x6e8/0x6f0 hardirqs last disabled at (5964): [] _raw_spin_lock_irqsave+0x24/0x68 softirqs last enabled at (5502): [] __do_softirq+0x560/0x10c0 softirqs last disabled at (5495): [] irq_exit+0x160/0x25c Preemption disabled at:[ 62.904063] [] snd_pcm_stream_lock+0xb4/0xc0 CPU: 2 PID: 2242 Comm: aplay Tainted: G B C 4.9.54+ #186 Hardware name: Renesas Salvator-X board based on r8a7795 (DT) Call trace: [] dump_backtrace+0x0/0x37c [] show_stack+0x14/0x1c [] dump_stack+0xfc/0x154 [] ___might_sleep+0x57c/0x58c [] __might_sleep+0x208/0x21c [] mutex_lock_nested+0xb4/0x6f0 [] clk_prepare_lock+0xb0/0x184 [] clk_core_get_rate+0x14/0x54 [] clk_get_rate+0x20/0x34 [] rsnd_adg_ssi_clk_try_start+0x158/0x4f8 [snd_soc_rcar] [] rsnd_ssi_init+0x668/0x7a0 [snd_soc_rcar] [] rsnd_soc_dai_trigger+0x4bc/0xcf8 [snd_soc_rcar] [] soc_pcm_trigger+0x2a4/0x2d4 Fixes: e7d850dd10f4 ("ASoC: rsnd: use mod base common method on SSI-parent") Signed-off-by: Jiada Wang Signed-off-by: Timo Wischer [Kuninori: tidyup for upstream] Signed-off-by: Kuninori Morimoto Tested-by: Hiroyuki Yokoyama Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- sound/soc/sh/rcar/core.c | 11 +++++++++++ sound/soc/sh/rcar/rsnd.h | 7 +++++++ sound/soc/sh/rcar/ssi.c | 16 ++++++++++------ 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index 107133297e8dd..9896e736fa5cc 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -925,12 +925,23 @@ static void rsnd_soc_dai_shutdown(struct snd_pcm_substream *substream, rsnd_dai_call(nolock_stop, io, priv); } +static int rsnd_soc_dai_prepare(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct rsnd_priv *priv = rsnd_dai_to_priv(dai); + struct rsnd_dai *rdai = rsnd_dai_to_rdai(dai); + struct rsnd_dai_stream *io = rsnd_rdai_to_io(rdai, substream); + + return rsnd_dai_call(prepare, io, priv); +} + static const struct snd_soc_dai_ops rsnd_soc_dai_ops = { .startup = rsnd_soc_dai_startup, .shutdown = rsnd_soc_dai_shutdown, .trigger = rsnd_soc_dai_trigger, .set_fmt = rsnd_soc_dai_set_fmt, .set_tdm_slot = rsnd_soc_set_dai_tdm_slot, + .prepare = rsnd_soc_dai_prepare, }; void rsnd_parse_connect_common(struct rsnd_dai *rdai, diff --git a/sound/soc/sh/rcar/rsnd.h b/sound/soc/sh/rcar/rsnd.h index c5de71f2dc8c4..1768a0ae469d0 100644 --- a/sound/soc/sh/rcar/rsnd.h +++ b/sound/soc/sh/rcar/rsnd.h @@ -283,6 +283,9 @@ struct rsnd_mod_ops { int (*nolock_stop)(struct rsnd_mod *mod, struct rsnd_dai_stream *io, struct rsnd_priv *priv); + int (*prepare)(struct rsnd_mod *mod, + struct rsnd_dai_stream *io, + struct rsnd_priv *priv); }; struct rsnd_dai_stream; @@ -312,6 +315,7 @@ struct rsnd_mod { * H 0: fallback * H 0: hw_params * H 0: pointer + * H 0: prepare */ #define __rsnd_mod_shift_nolock_start 0 #define __rsnd_mod_shift_nolock_stop 0 @@ -326,6 +330,7 @@ struct rsnd_mod { #define __rsnd_mod_shift_fallback 28 /* always called */ #define __rsnd_mod_shift_hw_params 28 /* always called */ #define __rsnd_mod_shift_pointer 28 /* always called */ +#define __rsnd_mod_shift_prepare 28 /* always called */ #define __rsnd_mod_add_probe 0 #define __rsnd_mod_add_remove 0 @@ -340,6 +345,7 @@ struct rsnd_mod { #define __rsnd_mod_add_fallback 0 #define __rsnd_mod_add_hw_params 0 #define __rsnd_mod_add_pointer 0 +#define __rsnd_mod_add_prepare 0 #define __rsnd_mod_call_probe 0 #define __rsnd_mod_call_remove 0 @@ -354,6 +360,7 @@ struct rsnd_mod { #define __rsnd_mod_call_pointer 0 #define __rsnd_mod_call_nolock_start 0 #define __rsnd_mod_call_nolock_stop 1 +#define __rsnd_mod_call_prepare 0 #define rsnd_mod_to_priv(mod) ((mod)->priv) #define rsnd_mod_id(mod) ((mod) ? (mod)->id : -1) diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index f0fb85fda42d8..34223c8c28a87 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -280,7 +280,7 @@ static int rsnd_ssi_master_clk_start(struct rsnd_mod *mod, if (rsnd_ssi_is_multi_slave(mod, io)) return 0; - if (ssi->usrcnt > 1) { + if (ssi->rate) { if (ssi->rate != rate) { dev_err(dev, "SSI parent/child should use same rate\n"); return -EINVAL; @@ -482,7 +482,6 @@ static int rsnd_ssi_init(struct rsnd_mod *mod, struct rsnd_priv *priv) { struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); - int ret; if (!rsnd_ssi_is_run_mods(mod, io)) return 0; @@ -493,10 +492,6 @@ static int rsnd_ssi_init(struct rsnd_mod *mod, rsnd_mod_power_on(mod); - ret = rsnd_ssi_master_clk_start(mod, io); - if (ret < 0) - return ret; - rsnd_ssi_config_init(mod, io); rsnd_ssi_register_setup(mod); @@ -847,6 +842,13 @@ static int rsnd_ssi_pointer(struct rsnd_mod *mod, return 0; } +static int rsnd_ssi_prepare(struct rsnd_mod *mod, + struct rsnd_dai_stream *io, + struct rsnd_priv *priv) +{ + return rsnd_ssi_master_clk_start(mod, io); +} + static struct rsnd_mod_ops rsnd_ssi_pio_ops = { .name = SSI_NAME, .probe = rsnd_ssi_common_probe, @@ -859,6 +861,7 @@ static struct rsnd_mod_ops rsnd_ssi_pio_ops = { .pointer= rsnd_ssi_pointer, .pcm_new = rsnd_ssi_pcm_new, .hw_params = rsnd_ssi_hw_params, + .prepare = rsnd_ssi_prepare, }; static int rsnd_ssi_dma_probe(struct rsnd_mod *mod, @@ -935,6 +938,7 @@ static struct rsnd_mod_ops rsnd_ssi_dma_ops = { .pcm_new = rsnd_ssi_pcm_new, .fallback = rsnd_ssi_fallback, .hw_params = rsnd_ssi_hw_params, + .prepare = rsnd_ssi_prepare, }; int rsnd_ssi_is_dma_mode(struct rsnd_mod *mod) -- GitLab From 16b8c03864b7cc6b8413b08b8fb5fd980b39b4b5 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Mon, 17 Sep 2018 17:25:24 +0900 Subject: [PATCH 0522/2269] ALSA: bebob: fix memory leak for M-Audio FW1814 and ProjectMix I/O at error path commit b1fbebd4164b3d170ad916dcd692cf843c9c065d upstream. After allocating model-dependent data for M-Audio FW1814 and ProjectMix I/O, ALSA bebob driver has memory leak at error path. This commit releases the allocated data at the error path. Fixes: 04a2c73c97eb('ALSA: bebob: delayed registration of sound card') Cc: # v4.7+ Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/bebob/bebob.c | 2 ++ sound/firewire/bebob/bebob_maudio.c | 4 ---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/sound/firewire/bebob/bebob.c b/sound/firewire/bebob/bebob.c index 730ea91d9be88..93676354f87f4 100644 --- a/sound/firewire/bebob/bebob.c +++ b/sound/firewire/bebob/bebob.c @@ -263,6 +263,8 @@ do_registration(struct work_struct *work) error: mutex_unlock(&devices_mutex); snd_bebob_stream_destroy_duplex(bebob); + kfree(bebob->maudio_special_quirk); + bebob->maudio_special_quirk = NULL; snd_card_free(bebob->card); dev_info(&bebob->unit->device, "Sound card registration failed: %d\n", err); diff --git a/sound/firewire/bebob/bebob_maudio.c b/sound/firewire/bebob/bebob_maudio.c index bd55620c6a479..164f2126a6769 100644 --- a/sound/firewire/bebob/bebob_maudio.c +++ b/sound/firewire/bebob/bebob_maudio.c @@ -290,10 +290,6 @@ snd_bebob_maudio_special_discover(struct snd_bebob *bebob, bool is1814) bebob->midi_output_ports = 2; } end: - if (err < 0) { - kfree(params); - bebob->maudio_special_quirk = NULL; - } mutex_unlock(&bebob->mutex); return err; } -- GitLab From 7c4881d64ed750dd8fac1d824fd1f99b2b9d124b Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 9 Sep 2018 22:25:12 +0900 Subject: [PATCH 0523/2269] ALSA: bebob: use address returned by kmalloc() instead of kernel stack for streaming DMA mapping commit 493626f2d87a74e6dbea1686499ed6e7e600484e upstream. When executing 'fw_run_transaction()' with 'TCODE_WRITE_BLOCK_REQUEST', an address of 'payload' argument is used for streaming DMA mapping by 'firewire_ohci' module if 'size' argument is larger than 8 byte. Although in this case the address should not be on kernel stack, current implementation of ALSA bebob driver uses data in kernel stack for a cue to boot M-Audio devices. This often brings unexpected result, especially for a case of CONFIG_VMAP_STACK=y. This commit fixes the bug. Reference: https://bugzilla.kernel.org/show_bug.cgi?id=201021 Reference: https://forum.manjaro.org/t/firewire-m-audio-410-driver-wont-load-firmware/51165 Fixes: a2b2a7798fb6('ALSA: bebob: Send a cue to load firmware for M-Audio Firewire series') Cc: # v3.16+ Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/bebob/bebob_maudio.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/sound/firewire/bebob/bebob_maudio.c b/sound/firewire/bebob/bebob_maudio.c index 164f2126a6769..c266997ad299d 100644 --- a/sound/firewire/bebob/bebob_maudio.c +++ b/sound/firewire/bebob/bebob_maudio.c @@ -96,17 +96,13 @@ int snd_bebob_maudio_load_firmware(struct fw_unit *unit) struct fw_device *device = fw_parent_device(unit); int err, rcode; u64 date; - __le32 cues[3] = { - cpu_to_le32(MAUDIO_BOOTLOADER_CUE1), - cpu_to_le32(MAUDIO_BOOTLOADER_CUE2), - cpu_to_le32(MAUDIO_BOOTLOADER_CUE3) - }; + __le32 *cues; /* check date of software used to build */ err = snd_bebob_read_block(unit, INFO_OFFSET_SW_DATE, &date, sizeof(u64)); if (err < 0) - goto end; + return err; /* * firmware version 5058 or later has date later than "20070401", but * 'date' is not null-terminated. @@ -114,20 +110,28 @@ int snd_bebob_maudio_load_firmware(struct fw_unit *unit) if (date < 0x3230303730343031LL) { dev_err(&unit->device, "Use firmware version 5058 or later\n"); - err = -ENOSYS; - goto end; + return -ENXIO; } + cues = kmalloc_array(3, sizeof(*cues), GFP_KERNEL); + if (!cues) + return -ENOMEM; + + cues[0] = cpu_to_le32(MAUDIO_BOOTLOADER_CUE1); + cues[1] = cpu_to_le32(MAUDIO_BOOTLOADER_CUE2); + cues[2] = cpu_to_le32(MAUDIO_BOOTLOADER_CUE3); + rcode = fw_run_transaction(device->card, TCODE_WRITE_BLOCK_REQUEST, device->node_id, device->generation, device->max_speed, BEBOB_ADDR_REG_REQ, - cues, sizeof(cues)); + cues, 3 * sizeof(*cues)); + kfree(cues); if (rcode != RCODE_COMPLETE) { dev_err(&unit->device, "Failed to send a cue to load firmware\n"); err = -EIO; } -end: + return err; } -- GitLab From 352701c288c1aef805d1c904c9de3b3a211ac71b Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sat, 8 Sep 2018 08:12:21 +0200 Subject: [PATCH 0524/2269] ALSA: emu10k1: fix possible info leak to userspace on SNDRV_EMU10K1_IOCTL_INFO commit 49434c6c575d2008c0abbc93e615019f39e01252 upstream. snd_emu10k1_fx8010_ioctl(SNDRV_EMU10K1_IOCTL_INFO) allocates memory using kmalloc() and partially fills it by calling snd_emu10k1_fx8010_info() before returning the resulting structure to userspace, leaving uninitialized holes. Let's just use kzalloc() here. BugLink: http://blog.infosectcbr.com.au/2018/09/linux-kernel-infoleaks.html Signed-off-by: Willy Tarreau Cc: Jann Horn Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/emu10k1/emufx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/emu10k1/emufx.c b/sound/pci/emu10k1/emufx.c index a2b56b188be4d..d68bb40d36760 100644 --- a/sound/pci/emu10k1/emufx.c +++ b/sound/pci/emu10k1/emufx.c @@ -2547,7 +2547,7 @@ static int snd_emu10k1_fx8010_ioctl(struct snd_hwdep * hw, struct file *file, un emu->support_tlv = 1; return put_user(SNDRV_EMU10K1_VERSION, (int __user *)argp); case SNDRV_EMU10K1_IOCTL_INFO: - info = kmalloc(sizeof(*info), GFP_KERNEL); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; snd_emu10k1_fx8010_info(emu, info); -- GitLab From 70165a445b00c7bb0f68c955da3c100817ac0c87 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 9 Sep 2018 22:25:52 +0900 Subject: [PATCH 0525/2269] ALSA: fireface: fix memory leak in ff400_switch_fetching_mode() commit 36f3a6e02c143a7e9e4e143e416371f67bc1fae6 upstream. An allocated memory forgets to be released. Fixes: 76fdb3a9e13 ('ALSA: fireface: add support for Fireface 400') Cc: # 4.12+ Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/fireface/ff-protocol-ff400.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sound/firewire/fireface/ff-protocol-ff400.c b/sound/firewire/fireface/ff-protocol-ff400.c index 12aa15df435d1..9f5036442ab96 100644 --- a/sound/firewire/fireface/ff-protocol-ff400.c +++ b/sound/firewire/fireface/ff-protocol-ff400.c @@ -146,6 +146,7 @@ static int ff400_switch_fetching_mode(struct snd_ff *ff, bool enable) { __le32 *reg; int i; + int err; reg = kzalloc(sizeof(__le32) * 18, GFP_KERNEL); if (reg == NULL) @@ -163,9 +164,11 @@ static int ff400_switch_fetching_mode(struct snd_ff *ff, bool enable) reg[i] = cpu_to_le32(0x00000001); } - return snd_fw_transaction(ff->unit, TCODE_WRITE_BLOCK_REQUEST, - FF400_FETCH_PCM_FRAMES, reg, - sizeof(__le32) * 18, 0); + err = snd_fw_transaction(ff->unit, TCODE_WRITE_BLOCK_REQUEST, + FF400_FETCH_PCM_FRAMES, reg, + sizeof(__le32) * 18, 0); + kfree(reg); + return err; } static void ff400_dump_sync_status(struct snd_ff *ff, -- GitLab From 933f20a61e26467cae26935aeda43979035fb286 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Sep 2018 21:30:34 +0900 Subject: [PATCH 0526/2269] ALSA: firewire-digi00x: fix memory leak of private data commit a49a83ab05e34edd6c71a4fbd062c9a7ba6d18aa upstream. Although private data of sound card instance is usually allocated in the tail of the instance, drivers in ALSA firewire stack allocate the private data before allocating the instance. In this case, the private data should be released explicitly at .private_free callback of the instance. This commit fixes memory leak following to the above design. Fixes: 86c8dd7f4da3 ('ALSA: firewire-digi00x: delayed registration of sound card') Cc: # v4.7+ Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/digi00x/digi00x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/firewire/digi00x/digi00x.c b/sound/firewire/digi00x/digi00x.c index 1f5e1d23f31a7..ef689997d6a5b 100644 --- a/sound/firewire/digi00x/digi00x.c +++ b/sound/firewire/digi00x/digi00x.c @@ -49,6 +49,7 @@ static void dg00x_free(struct snd_dg00x *dg00x) fw_unit_put(dg00x->unit); mutex_destroy(&dg00x->mutex); + kfree(dg00x); } static void dg00x_card_free(struct snd_card *card) -- GitLab From 40e2596f06b0b89ae03c14e6fb48bdc2600f4003 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Sep 2018 21:31:05 +0900 Subject: [PATCH 0527/2269] ALSA: firewire-tascam: fix memory leak of private data commit 8d28277c065a974873c6781d44b7bcdcd8fb4e8a upstream. Although private data of sound card instance is usually allocated in the tail of the instance, drivers in ALSA firewire stack allocate the private data before allocating the instance. In this case, the private data should be released explicitly at .private_free callback of the instance. This commit fixes memory leak following to the above design. Fixes: b610386c8afb ('ALSA: firewire-tascam: deleyed registration of sound card') Cc: # v4.7+ Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/tascam/tascam.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/firewire/tascam/tascam.c b/sound/firewire/tascam/tascam.c index 44ad41fb73740..d3fdc463a884e 100644 --- a/sound/firewire/tascam/tascam.c +++ b/sound/firewire/tascam/tascam.c @@ -93,6 +93,7 @@ static void tscm_free(struct snd_tscm *tscm) fw_unit_put(tscm->unit); mutex_destroy(&tscm->mutex); + kfree(tscm); } static void tscm_card_free(struct snd_card *card) -- GitLab From d9929097176dc69d993715b80983bdbca06307e7 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Mon, 17 Sep 2018 17:26:41 +0900 Subject: [PATCH 0528/2269] ALSA: fireworks: fix memory leak of response buffer at error path commit c3b55e2ec9c76e7a0de2a0b1dc851fdc9440385b upstream. After allocating memory object for response buffer, ALSA fireworks driver has leak of the memory object at error path. This commit releases the object at the error path. Fixes: 7d3c1d5901aa('ALSA: fireworks: delayed registration of sound card') Cc: # v4.7+ Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/fireworks/fireworks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/firewire/fireworks/fireworks.c b/sound/firewire/fireworks/fireworks.c index 71a0613d3da04..f2d073365cf63 100644 --- a/sound/firewire/fireworks/fireworks.c +++ b/sound/firewire/fireworks/fireworks.c @@ -301,6 +301,8 @@ error: snd_efw_transaction_remove_instance(efw); snd_efw_stream_destroy_duplex(efw); snd_card_free(efw->card); + kfree(efw->resp_buf); + efw->resp_buf = NULL; dev_info(&efw->unit->device, "Sound card registration failed: %d\n", err); } -- GitLab From 996899a9cbd85b05804b80574ff4290d27913693 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Mon, 17 Sep 2018 17:26:08 +0900 Subject: [PATCH 0529/2269] ALSA: oxfw: fix memory leak for model-dependent data at error path commit ce925f088b979537f22f9e05eb923ef9822ca139 upstream. After allocating model-dependent data, ALSA OXFW driver has memory leak of the data at error path. This commit releases the data at the error path. Fixes: 6c29230e2a5f ('ALSA: oxfw: delayed registration of sound card') Cc: # v4.7+ Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/oxfw/oxfw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/firewire/oxfw/oxfw.c b/sound/firewire/oxfw/oxfw.c index 413ab6313bb66..d696d548c23af 100644 --- a/sound/firewire/oxfw/oxfw.c +++ b/sound/firewire/oxfw/oxfw.c @@ -276,6 +276,8 @@ error: if (oxfw->has_output) snd_oxfw_stream_destroy_simplex(oxfw, &oxfw->tx_stream); snd_card_free(oxfw->card); + kfree(oxfw->spec); + oxfw->spec = NULL; dev_info(&oxfw->unit->device, "Sound card registration failed: %d\n", err); } -- GitLab From 08f4f8b938092a9b4b99eeff3ee6828e5cc20373 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Mon, 17 Sep 2018 17:26:20 +0900 Subject: [PATCH 0530/2269] ALSA: oxfw: fix memory leak of discovered stream formats at error path commit 1064bc685d359f549f91c2d5f111965a9284f328 upstream. After finishing discover of stream formats, ALSA OXFW driver has memory leak of allocated memory object at error path. This commit releases the memory object at the error path. Fixes: 6c29230e2a5f ('ALSA: oxfw: delayed registration of sound card') Cc: # v4.7+ Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/oxfw/oxfw.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/firewire/oxfw/oxfw.c b/sound/firewire/oxfw/oxfw.c index d696d548c23af..79b2942632595 100644 --- a/sound/firewire/oxfw/oxfw.c +++ b/sound/firewire/oxfw/oxfw.c @@ -213,6 +213,7 @@ static int detect_quirks(struct snd_oxfw *oxfw) static void do_registration(struct work_struct *work) { struct snd_oxfw *oxfw = container_of(work, struct snd_oxfw, dwork.work); + int i; int err; if (oxfw->registered) @@ -275,6 +276,12 @@ error: snd_oxfw_stream_destroy_simplex(oxfw, &oxfw->rx_stream); if (oxfw->has_output) snd_oxfw_stream_destroy_simplex(oxfw, &oxfw->tx_stream); + for (i = 0; i < SND_OXFW_STREAM_FORMAT_ENTRIES; ++i) { + kfree(oxfw->tx_stream_formats[i]); + oxfw->tx_stream_formats[i] = NULL; + kfree(oxfw->rx_stream_formats[i]); + oxfw->rx_stream_formats[i] = NULL; + } snd_card_free(oxfw->card); kfree(oxfw->spec); oxfw->spec = NULL; -- GitLab From ff6805037d838dd003b6fb88b9b0232b3d05bc07 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Sep 2018 21:31:18 +0900 Subject: [PATCH 0531/2269] ALSA: oxfw: fix memory leak of private data commit 498fe23aad8e3b5a9554f55719c537603b4476ea upstream. Although private data of sound card instance is usually allocated in the tail of the instance, drivers in ALSA firewire stack allocate the private data before allocating the instance. In this case, the private data should be released explicitly at .private_free callback of the instance. This commit fixes memory leak following to the above design. Fixes: 6c29230e2a5f ('ALSA: oxfw: delayed registration of sound card') Cc: # v4.7+ Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/oxfw/oxfw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/firewire/oxfw/oxfw.c b/sound/firewire/oxfw/oxfw.c index 79b2942632595..a315d5b6b86b5 100644 --- a/sound/firewire/oxfw/oxfw.c +++ b/sound/firewire/oxfw/oxfw.c @@ -136,6 +136,7 @@ static void oxfw_free(struct snd_oxfw *oxfw) kfree(oxfw->spec); mutex_destroy(&oxfw->mutex); + kfree(oxfw); } /* -- GitLab From e2d5285b98ef43f55546873d1f58b41fe583f72d Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 10 Sep 2018 13:01:53 -0500 Subject: [PATCH 0532/2269] platform/x86: alienware-wmi: Correct a memory leak commit ff0e9f26288d2daee4950f42b37a3d3d30d36ec1 upstream. An ACPI buffer that was allocated was not being freed after use. Signed-off-by: Mario Limonciello Cc: stable@vger.kernel.org Signed-off-by: Darren Hart (VMware) Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/alienware-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/alienware-wmi.c b/drivers/platform/x86/alienware-wmi.c index 4eb8e1a472b23..e335b18da20fc 100644 --- a/drivers/platform/x86/alienware-wmi.c +++ b/drivers/platform/x86/alienware-wmi.c @@ -519,6 +519,7 @@ static acpi_status alienware_wmax_command(struct wmax_basic_args *in_args, if (obj && obj->type == ACPI_TYPE_INTEGER) *out_data = (u32) obj->integer.value; } + kfree(output.pointer); return status; } -- GitLab From 7eced4478f4ec6ff0a603ddfd6e24a5d3831fca7 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 11 Sep 2018 09:04:48 +0200 Subject: [PATCH 0533/2269] xen/netfront: don't bug in case of too many frags commit ad4f15dc2c70b1de5e0a64d27335962fbc9cf71c upstream. Commit 57f230ab04d291 ("xen/netfront: raise max number of slots in xennet_get_responses()") raised the max number of allowed slots by one. This seems to be problematic in some configurations with netback using a larger MAX_SKB_FRAGS value (e.g. old Linux kernel with MAX_SKB_FRAGS defined as 18 instead of nowadays 17). Instead of BUG_ON() in this case just fall back to retransmission. Fixes: 57f230ab04d291 ("xen/netfront: raise max number of slots in xennet_get_responses()") Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index d80343429de5f..1a40fc3517a8f 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -907,7 +907,11 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue, BUG_ON(pull_to <= skb_headlen(skb)); __pskb_pull_tail(skb, pull_to - skb_headlen(skb)); } - BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS); + if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) { + queue->rx.rsp_cons = ++cons; + kfree_skb(nskb); + return ~0U; + } skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, skb_frag_page(nfrag), @@ -1044,6 +1048,8 @@ err: skb->len += rx->status; i = xennet_fill_frags(queue, skb, &tmpq); + if (unlikely(i == ~0U)) + goto err; if (rx->flags & XEN_NETRXF_csum_blank) skb->ip_summed = CHECKSUM_PARTIAL; -- GitLab From 5ca87a38202e6265214afbe49353a10be81a6ecc Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Thu, 12 Jul 2018 13:27:00 -0400 Subject: [PATCH 0534/2269] xen/x86/vpmu: Zero struct pt_regs before calling into sample handling code commit 70513d58751d7c6c1a0133557b13089b9f2e3e66 upstream. Otherwise we may leak kernel stack for events that sample user registers. Reported-by: Mark Rutland Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index 7d00d4ad44d44..95997e6c06960 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -478,7 +478,7 @@ static void xen_convert_regs(const struct xen_pmu_regs *xen_regs, irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id) { int err, ret = IRQ_NONE; - struct pt_regs regs; + struct pt_regs regs = {0}; const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); uint8_t xenpmu_flags = get_xenpmu_flags(); -- GitLab From f3765abb60c707243f3cbdd4be07bdac689576b5 Mon Sep 17 00:00:00 2001 From: Kirill Kapranov Date: Mon, 13 Aug 2018 19:48:10 +0300 Subject: [PATCH 0535/2269] spi: fix IDR collision on systems with both fixed and dynamic SPI bus numbers commit 1a4327fbf4554d5b78d75b19a13d40d6de220159 upstream. On systems where some controllers get a dynamic ID assigned and some have a fixed number (e.g. from ACPI tables), the current implementation might run into an IDR collision: in case of a fixed bus number is gotten by a driver (but not marked busy in IDR tree) and a driver with dynamic bus number gets the same ID and predictably fails. Fix this by means of checking-in fixed IDsin IDR as far as dynamic ones at the moment of the controller registration. Fixes: 9b61e302210e (spi: Pick spi bus number from Linux idr or spi alias) Signed-off-by: Kirill Kapranov Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index f85d30dc91878..780a32175ec09 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2135,6 +2135,15 @@ int spi_register_controller(struct spi_controller *ctlr) if (WARN(id < 0, "couldn't get idr")) return id; ctlr->bus_num = id; + } else { + /* devices with a fixed bus num must check-in with the num */ + mutex_lock(&board_lock); + id = idr_alloc(&spi_master_idr, ctlr, ctlr->bus_num, + ctlr->bus_num + 1, GFP_KERNEL); + mutex_unlock(&board_lock); + if (WARN(id < 0, "couldn't get idr")) + return id == -ENOSPC ? -EBUSY : id; + ctlr->bus_num = id; } INIT_LIST_HEAD(&ctlr->queue); spin_lock_init(&ctlr->queue_lock); -- GitLab From 0e5cdbac030358fea3b757d033c8fda29138b866 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 5 Sep 2018 14:09:54 +0300 Subject: [PATCH 0536/2269] Revert "PCI: Add ACS quirk for Intel 300 series" commit 50ca031b51106b1b46162d4e9ecccb7edc95682f upstream. This reverts f154a718e6cc ("PCI: Add ACS quirk for Intel 300 series"). It turns out that erratum "PCH PCIe* Controller Root Port (ACSCTLR) Appear As Read Only" has been fixed in 300 series chipsets, even though the datasheet [1] claims otherwise. To make ACS work properly on 300 series root ports, revert the faulty commit. [1] https://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/300-series-c240-series-chipset-pch-spec-update.pdf Fixes: f154a718e6cc ("PCI: Add ACS quirk for Intel 300 series") Signed-off-by: Mika Westerberg Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v4.18+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index ec2911c4ee425..35c9b2f4b293a 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4388,11 +4388,6 @@ static int pci_quirk_qcom_rp_acs(struct pci_dev *dev, u16 acs_flags) * * 0x9d10-0x9d1b PCI Express Root port #{1-12} * - * The 300 series chipset suffers from the same bug so include those root - * ports here as well. - * - * 0xa32c-0xa343 PCI Express Root port #{0-24} - * * [1] http://www.intel.com/content/www/us/en/chipsets/100-series-chipset-datasheet-vol-2.html * [2] http://www.intel.com/content/www/us/en/chipsets/100-series-chipset-datasheet-vol-1.html * [3] http://www.intel.com/content/www/us/en/chipsets/100-series-chipset-spec-update.html @@ -4410,7 +4405,6 @@ static bool pci_quirk_intel_spt_pch_acs_match(struct pci_dev *dev) case 0xa110 ... 0xa11f: case 0xa167 ... 0xa16a: /* Sunrise Point */ case 0xa290 ... 0xa29f: case 0xa2e7 ... 0xa2ee: /* Union Point */ case 0x9d10 ... 0x9d1b: /* 7th & 8th Gen Mobile */ - case 0xa32c ... 0xa343: /* 300 series */ return true; } -- GitLab From 7eba38a3f65d3cec79dbe1f10166e9423d4074f4 Mon Sep 17 00:00:00 2001 From: Vaibhav Nagarnaik Date: Fri, 7 Sep 2018 15:31:29 -0700 Subject: [PATCH 0537/2269] ring-buffer: Allow for rescheduling when removing pages commit 83f365554e47997ec68dc4eca3f5dce525cd15c3 upstream. When reducing ring buffer size, pages are removed by scheduling a work item on each CPU for the corresponding CPU ring buffer. After the pages are removed from ring buffer linked list, the pages are free()d in a tight loop. The loop does not give up CPU until all pages are removed. In a worst case behavior, when lot of pages are to be freed, it can cause system stall. After the pages are removed from the list, the free() can happen while the work is rescheduled. Call cond_resched() in the loop to prevent the system hangup. Link: http://lkml.kernel.org/r/20180907223129.71994-1-vnagarnaik@google.com Cc: stable@vger.kernel.org Fixes: 83f40318dab00 ("ring-buffer: Make removal of ring buffer pages atomic") Reported-by: Jason Behmer Signed-off-by: Vaibhav Nagarnaik Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index fd78090042973..a1d5e0949dcf2 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1479,6 +1479,8 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) tmp_iter_page = first_page; do { + cond_resched(); + to_remove_page = tmp_iter_page; rb_inc_page(cpu_buffer, &tmp_iter_page); -- GitLab From 6447b34fc2708725e97707ab8256a9a88b31682d Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Thu, 20 Sep 2018 12:22:39 -0700 Subject: [PATCH 0538/2269] mm: shmem.c: Correctly annotate new inodes for lockdep commit b45d71fb89ab8adfe727b9d0ee188ed58582a647 upstream. Directories and inodes don't necessarily need to be in the same lockdep class. For ex, hugetlbfs splits them out too to prevent false positives in lockdep. Annotate correctly after new inode creation. If its a directory inode, it will be put into a different class. This should fix a lockdep splat reported by syzbot: > ====================================================== > WARNING: possible circular locking dependency detected > 4.18.0-rc8-next-20180810+ #36 Not tainted > ------------------------------------------------------ > syz-executor900/4483 is trying to acquire lock: > 00000000d2bfc8fe (&sb->s_type->i_mutex_key#9){++++}, at: inode_lock > include/linux/fs.h:765 [inline] > 00000000d2bfc8fe (&sb->s_type->i_mutex_key#9){++++}, at: > shmem_fallocate+0x18b/0x12e0 mm/shmem.c:2602 > > but task is already holding lock: > 0000000025208078 (ashmem_mutex){+.+.}, at: ashmem_shrink_scan+0xb4/0x630 > drivers/staging/android/ashmem.c:448 > > which lock already depends on the new lock. > > -> #2 (ashmem_mutex){+.+.}: > __mutex_lock_common kernel/locking/mutex.c:925 [inline] > __mutex_lock+0x171/0x1700 kernel/locking/mutex.c:1073 > mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:1088 > ashmem_mmap+0x55/0x520 drivers/staging/android/ashmem.c:361 > call_mmap include/linux/fs.h:1844 [inline] > mmap_region+0xf27/0x1c50 mm/mmap.c:1762 > do_mmap+0xa10/0x1220 mm/mmap.c:1535 > do_mmap_pgoff include/linux/mm.h:2298 [inline] > vm_mmap_pgoff+0x213/0x2c0 mm/util.c:357 > ksys_mmap_pgoff+0x4da/0x660 mm/mmap.c:1585 > __do_sys_mmap arch/x86/kernel/sys_x86_64.c:100 [inline] > __se_sys_mmap arch/x86/kernel/sys_x86_64.c:91 [inline] > __x64_sys_mmap+0xe9/0x1b0 arch/x86/kernel/sys_x86_64.c:91 > do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 > entry_SYSCALL_64_after_hwframe+0x49/0xbe > > -> #1 (&mm->mmap_sem){++++}: > __might_fault+0x155/0x1e0 mm/memory.c:4568 > _copy_to_user+0x30/0x110 lib/usercopy.c:25 > copy_to_user include/linux/uaccess.h:155 [inline] > filldir+0x1ea/0x3a0 fs/readdir.c:196 > dir_emit_dot include/linux/fs.h:3464 [inline] > dir_emit_dots include/linux/fs.h:3475 [inline] > dcache_readdir+0x13a/0x620 fs/libfs.c:193 > iterate_dir+0x48b/0x5d0 fs/readdir.c:51 > __do_sys_getdents fs/readdir.c:231 [inline] > __se_sys_getdents fs/readdir.c:212 [inline] > __x64_sys_getdents+0x29f/0x510 fs/readdir.c:212 > do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 > entry_SYSCALL_64_after_hwframe+0x49/0xbe > > -> #0 (&sb->s_type->i_mutex_key#9){++++}: > lock_acquire+0x1e4/0x540 kernel/locking/lockdep.c:3924 > down_write+0x8f/0x130 kernel/locking/rwsem.c:70 > inode_lock include/linux/fs.h:765 [inline] > shmem_fallocate+0x18b/0x12e0 mm/shmem.c:2602 > ashmem_shrink_scan+0x236/0x630 drivers/staging/android/ashmem.c:455 > ashmem_ioctl+0x3ae/0x13a0 drivers/staging/android/ashmem.c:797 > vfs_ioctl fs/ioctl.c:46 [inline] > file_ioctl fs/ioctl.c:501 [inline] > do_vfs_ioctl+0x1de/0x1720 fs/ioctl.c:685 > ksys_ioctl+0xa9/0xd0 fs/ioctl.c:702 > __do_sys_ioctl fs/ioctl.c:709 [inline] > __se_sys_ioctl fs/ioctl.c:707 [inline] > __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:707 > do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 > entry_SYSCALL_64_after_hwframe+0x49/0xbe > > other info that might help us debug this: > > Chain exists of: > &sb->s_type->i_mutex_key#9 --> &mm->mmap_sem --> ashmem_mutex > > Possible unsafe locking scenario: > > CPU0 CPU1 > ---- ---- > lock(ashmem_mutex); > lock(&mm->mmap_sem); > lock(ashmem_mutex); > lock(&sb->s_type->i_mutex_key#9); > > *** DEADLOCK *** > > 1 lock held by syz-executor900/4483: > #0: 0000000025208078 (ashmem_mutex){+.+.}, at: > ashmem_shrink_scan+0xb4/0x630 drivers/staging/android/ashmem.c:448 Link: http://lkml.kernel.org/r/20180821231835.166639-1-joel@joelfernandes.org Signed-off-by: Joel Fernandes (Google) Reported-by: syzbot Reviewed-by: NeilBrown Suggested-by: NeilBrown Cc: Matthew Wilcox Cc: Peter Zijlstra Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/shmem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/shmem.c b/mm/shmem.c index f383298b7280a..ea786a504e1b9 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2207,6 +2207,8 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode mpol_shared_policy_init(&info->policy, NULL); break; } + + lockdep_annotate_inode_mutex_key(inode); } else shmem_free_inode(sb); return inode; -- GitLab From 13d216167d3d89d882b264857fe1d14c95da1490 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 26 Sep 2018 13:53:39 +0200 Subject: [PATCH 0539/2269] Revert "rpmsg: core: add support to power domains for devices" This reverts commit 1ed3a93072307265d6385031b72929a904b50f87 which is commit fe782affd0f440a4e60e2cc81b8f2eccb2923113 upstream Rafael reports that this patch causes problems: > -rc2 looks good. There is a problem on dragonboard during boot that was > introduced in v4.14.71 that I didn't notice last week. We'll bisect it > and report back later this week. dragonboard on the other branches (4.9, > 4.18, mainline) looks fine. As Dan pointed out, during validation, we have bisected this issue on a dragonboard 410c (can't find root device) to the following commit for v4.14: [1ed3a9307230] rpmsg: core: add support to power domains for devices There is an on-going discussion on "[PATCH] rpmsg: core: add support to power domains for devices" about this patch having other dependencies and breaking something else on v4.14 as well. so drop it. Reported-by: Rafael Tinoco Cc: Srinivas Kandagatla Cc: Bjorn Andersson Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/rpmsg/rpmsg_core.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c index cec4c3223044c..dffa3aab71782 100644 --- a/drivers/rpmsg/rpmsg_core.c +++ b/drivers/rpmsg/rpmsg_core.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include "rpmsg_internal.h" @@ -419,10 +418,6 @@ static int rpmsg_dev_probe(struct device *dev) struct rpmsg_endpoint *ept = NULL; int err; - err = dev_pm_domain_attach(dev, true); - if (err) - goto out; - if (rpdrv->callback) { strncpy(chinfo.name, rpdev->id.name, RPMSG_NAME_SIZE); chinfo.src = rpdev->src; @@ -464,8 +459,6 @@ static int rpmsg_dev_remove(struct device *dev) rpdrv->remove(rpdev); - dev_pm_domain_detach(dev, true); - if (rpdev->ept) rpmsg_destroy_ept(rpdev->ept); -- GitLab From 50ec69edf3f097693e22b56c7b2140acf98f28eb Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Mon, 24 Sep 2018 13:18:34 +0100 Subject: [PATCH 0540/2269] Revert "uapi/linux/keyctl.h: don't use C++ reserved keyword as a struct member name" commit 8c0f9f5b309d627182d5da72a69246f58bde1026 upstream. This changes UAPI, breaking iwd and libell: ell/key.c: In function 'kernel_dh_compute': ell/key.c:205:38: error: 'struct keyctl_dh_params' has no member named 'private'; did you mean 'dh_private'? struct keyctl_dh_params params = { .private = private, ^~~~~~~ dh_private This reverts commit 8a2336e549d385bb0b46880435b411df8d8200e8. Fixes: 8a2336e549d3 ("uapi/linux/keyctl.h: don't use C++ reserved keyword as a struct member name") Signed-off-by: Lubomir Rintel Signed-off-by: David Howells cc: Randy Dunlap cc: Mat Martineau cc: Stephan Mueller cc: James Morris cc: "Serge E. Hallyn" cc: Mat Martineau cc: Andrew Morton cc: Linus Torvalds cc: Signed-off-by: James Morris Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/keyctl.h | 2 +- security/keys/dh.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h index 910cc4334b215..7b8c9e19bad1c 100644 --- a/include/uapi/linux/keyctl.h +++ b/include/uapi/linux/keyctl.h @@ -65,7 +65,7 @@ /* keyctl structures */ struct keyctl_dh_params { - __s32 dh_private; + __s32 private; __s32 prime; __s32 base; }; diff --git a/security/keys/dh.c b/security/keys/dh.c index 35543f04e7594..d1ea9f325f947 100644 --- a/security/keys/dh.c +++ b/security/keys/dh.c @@ -307,7 +307,7 @@ long __keyctl_dh_compute(struct keyctl_dh_params __user *params, } dh_inputs.g_size = dlen; - dlen = dh_data_from_key(pcopy.dh_private, &dh_inputs.key); + dlen = dh_data_from_key(pcopy.private, &dh_inputs.key); if (dlen < 0) { ret = dlen; goto out2; -- GitLab From 755e45f3155cc51e37dc1cce9ccde10b84df7d93 Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Sun, 9 Sep 2018 04:09:26 +0000 Subject: [PATCH 0541/2269] scsi: target: iscsi: Use hex2bin instead of a re-implementation commit 1816494330a83f2a064499d8ed2797045641f92c upstream. This change has the following effects, in order of descreasing importance: 1) Prevent a stack buffer overflow 2) Do not append an unnecessary NULL to an anyway binary buffer, which is writing one byte past client_digest when caller is: chap_string_to_hex(client_digest, chap_r, strlen(chap_r)); The latter was found by KASAN (see below) when input value hes expected size (32 hex chars), and further analysis revealed a stack buffer overflow can happen when network-received value is longer, allowing an unauthenticated remote attacker to smash up to 17 bytes after destination buffer (16 bytes attacker-controlled and one null). As switching to hex2bin requires specifying destination buffer length, and does not internally append any null, it solves both issues. This addresses CVE-2018-14633. Beyond this: - Validate received value length and check hex2bin accepted the input, to log this rejection reason instead of just failing authentication. - Only log received CHAP_R and CHAP_C values once they passed sanity checks. ================================================================== BUG: KASAN: stack-out-of-bounds in chap_string_to_hex+0x32/0x60 [iscsi_target_mod] Write of size 1 at addr ffff8801090ef7c8 by task kworker/0:0/1021 CPU: 0 PID: 1021 Comm: kworker/0:0 Tainted: G O 4.17.8kasan.sess.connops+ #2 Hardware name: To be filled by O.E.M. To be filled by O.E.M./Aptio CRB, BIOS 5.6.5 05/19/2014 Workqueue: events iscsi_target_do_login_rx [iscsi_target_mod] Call Trace: dump_stack+0x71/0xac print_address_description+0x65/0x22e ? chap_string_to_hex+0x32/0x60 [iscsi_target_mod] kasan_report.cold.6+0x241/0x2fd chap_string_to_hex+0x32/0x60 [iscsi_target_mod] chap_server_compute_md5.isra.2+0x2cb/0x860 [iscsi_target_mod] ? chap_binaryhex_to_asciihex.constprop.5+0x50/0x50 [iscsi_target_mod] ? ftrace_caller_op_ptr+0xe/0xe ? __orc_find+0x6f/0xc0 ? unwind_next_frame+0x231/0x850 ? kthread+0x1a0/0x1c0 ? ret_from_fork+0x35/0x40 ? ret_from_fork+0x35/0x40 ? iscsi_target_do_login_rx+0x3bc/0x4c0 [iscsi_target_mod] ? deref_stack_reg+0xd0/0xd0 ? iscsi_target_do_login_rx+0x3bc/0x4c0 [iscsi_target_mod] ? is_module_text_address+0xa/0x11 ? kernel_text_address+0x4c/0x110 ? __save_stack_trace+0x82/0x100 ? ret_from_fork+0x35/0x40 ? save_stack+0x8c/0xb0 ? 0xffffffffc1660000 ? iscsi_target_do_login+0x155/0x8d0 [iscsi_target_mod] ? iscsi_target_do_login_rx+0x3bc/0x4c0 [iscsi_target_mod] ? process_one_work+0x35c/0x640 ? worker_thread+0x66/0x5d0 ? kthread+0x1a0/0x1c0 ? ret_from_fork+0x35/0x40 ? iscsi_update_param_value+0x80/0x80 [iscsi_target_mod] ? iscsit_release_cmd+0x170/0x170 [iscsi_target_mod] chap_main_loop+0x172/0x570 [iscsi_target_mod] ? chap_server_compute_md5.isra.2+0x860/0x860 [iscsi_target_mod] ? rx_data+0xd6/0x120 [iscsi_target_mod] ? iscsit_print_session_params+0xd0/0xd0 [iscsi_target_mod] ? cyc2ns_read_begin.part.2+0x90/0x90 ? _raw_spin_lock_irqsave+0x25/0x50 ? memcmp+0x45/0x70 iscsi_target_do_login+0x875/0x8d0 [iscsi_target_mod] ? iscsi_target_check_first_request.isra.5+0x1a0/0x1a0 [iscsi_target_mod] ? del_timer+0xe0/0xe0 ? memset+0x1f/0x40 ? flush_sigqueue+0x29/0xd0 iscsi_target_do_login_rx+0x3bc/0x4c0 [iscsi_target_mod] ? iscsi_target_nego_release+0x80/0x80 [iscsi_target_mod] ? iscsi_target_restore_sock_callbacks+0x130/0x130 [iscsi_target_mod] process_one_work+0x35c/0x640 worker_thread+0x66/0x5d0 ? flush_rcu_work+0x40/0x40 kthread+0x1a0/0x1c0 ? kthread_bind+0x30/0x30 ret_from_fork+0x35/0x40 The buggy address belongs to the page: page:ffffea0004243bc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 flags: 0x17fffc000000000() raw: 017fffc000000000 0000000000000000 0000000000000000 00000000ffffffff raw: ffffea0004243c20 ffffea0004243ba0 0000000000000000 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8801090ef680: f2 f2 f2 f2 f2 f2 f2 01 f2 f2 f2 f2 f2 f2 f2 00 ffff8801090ef700: f2 f2 f2 f2 f2 f2 f2 00 02 f2 f2 f2 f2 f2 f2 00 >ffff8801090ef780: 00 f2 f2 f2 f2 f2 f2 00 00 f2 f2 f2 f2 f2 f2 00 ^ ffff8801090ef800: 00 f2 f2 f2 f2 f2 f2 00 00 00 00 02 f2 f2 f2 f2 ffff8801090ef880: f2 f2 f2 00 00 00 00 00 00 00 00 f2 f2 f2 f2 00 ================================================================== Signed-off-by: Vincent Pelletier Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target_auth.c | 30 +++++++++++------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c index 9518ffd8b8bac..6c3b4c0228940 100644 --- a/drivers/target/iscsi/iscsi_target_auth.c +++ b/drivers/target/iscsi/iscsi_target_auth.c @@ -26,18 +26,6 @@ #include "iscsi_target_nego.h" #include "iscsi_target_auth.h" -static int chap_string_to_hex(unsigned char *dst, unsigned char *src, int len) -{ - int j = DIV_ROUND_UP(len, 2), rc; - - rc = hex2bin(dst, src, j); - if (rc < 0) - pr_debug("CHAP string contains non hex digit symbols\n"); - - dst[j] = '\0'; - return j; -} - static void chap_binaryhex_to_asciihex(char *dst, char *src, int src_len) { int i; @@ -248,9 +236,16 @@ static int chap_server_compute_md5( pr_err("Could not find CHAP_R.\n"); goto out; } + if (strlen(chap_r) != MD5_SIGNATURE_SIZE * 2) { + pr_err("Malformed CHAP_R\n"); + goto out; + } + if (hex2bin(client_digest, chap_r, MD5_SIGNATURE_SIZE) < 0) { + pr_err("Malformed CHAP_R\n"); + goto out; + } pr_debug("[server] Got CHAP_R=%s\n", chap_r); - chap_string_to_hex(client_digest, chap_r, strlen(chap_r)); tfm = crypto_alloc_shash("md5", 0, 0); if (IS_ERR(tfm)) { @@ -349,9 +344,7 @@ static int chap_server_compute_md5( pr_err("Could not find CHAP_C.\n"); goto out; } - pr_debug("[server] Got CHAP_C=%s\n", challenge); - challenge_len = chap_string_to_hex(challenge_binhex, challenge, - strlen(challenge)); + challenge_len = DIV_ROUND_UP(strlen(challenge), 2); if (!challenge_len) { pr_err("Unable to convert incoming challenge\n"); goto out; @@ -360,6 +353,11 @@ static int chap_server_compute_md5( pr_err("CHAP_C exceeds maximum binary size of 1024 bytes\n"); goto out; } + if (hex2bin(challenge_binhex, challenge, challenge_len) < 0) { + pr_err("Malformed CHAP_C\n"); + goto out; + } + pr_debug("[server] Got CHAP_C=%s\n", challenge); /* * During mutual authentication, the CHAP_C generated by the * initiator must not match the original CHAP_C generated by -- GitLab From 44383139a39c46d08c8e169121c16aeb0427d147 Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Sun, 9 Sep 2018 04:09:27 +0000 Subject: [PATCH 0542/2269] scsi: target: iscsi: Use bin2hex instead of a re-implementation commit 8c39e2699f8acb2e29782a834e56306da24937fe upstream. Signed-off-by: Vincent Pelletier Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target_auth.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c index 6c3b4c0228940..4e680d753941f 100644 --- a/drivers/target/iscsi/iscsi_target_auth.c +++ b/drivers/target/iscsi/iscsi_target_auth.c @@ -26,15 +26,6 @@ #include "iscsi_target_nego.h" #include "iscsi_target_auth.h" -static void chap_binaryhex_to_asciihex(char *dst, char *src, int src_len) -{ - int i; - - for (i = 0; i < src_len; i++) { - sprintf(&dst[i*2], "%02x", (int) src[i] & 0xff); - } -} - static int chap_gen_challenge( struct iscsi_conn *conn, int caller, @@ -50,7 +41,7 @@ static int chap_gen_challenge( ret = get_random_bytes_wait(chap->challenge, CHAP_CHALLENGE_LENGTH); if (unlikely(ret)) return ret; - chap_binaryhex_to_asciihex(challenge_asciihex, chap->challenge, + bin2hex(challenge_asciihex, chap->challenge, CHAP_CHALLENGE_LENGTH); /* * Set CHAP_C, and copy the generated challenge into c_str. @@ -289,7 +280,7 @@ static int chap_server_compute_md5( goto out; } - chap_binaryhex_to_asciihex(response, server_digest, MD5_SIGNATURE_SIZE); + bin2hex(response, server_digest, MD5_SIGNATURE_SIZE); pr_debug("[server] MD5 Server Digest: %s\n", response); if (memcmp(server_digest, client_digest, MD5_SIGNATURE_SIZE) != 0) { @@ -411,7 +402,7 @@ static int chap_server_compute_md5( /* * Convert response from binary hex to ascii hext. */ - chap_binaryhex_to_asciihex(response, digest, MD5_SIGNATURE_SIZE); + bin2hex(response, digest, MD5_SIGNATURE_SIZE); *nr_out_len += sprintf(nr_out_ptr + *nr_out_len, "CHAP_R=0x%s", response); *nr_out_len += 1; -- GitLab From 1d7e23f9068fc4c6047457eaace8453ec2d220f5 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 16 Sep 2018 23:57:35 +0200 Subject: [PATCH 0543/2269] Revert "ubifs: xattr: Don't operate on deleted inodes" commit f061c1cc404a618858a77aea233fde0aeaad2f2d upstream. This reverts commit 11a6fc3dc743e22fb50f2196ec55bee5140d3c52. UBIFS wants to assert that xattr operations are only issued on files with positive link count. The said patch made this operations return -ENOENT for unlinked files such that the asserts will no longer trigger. This was wrong since xattr operations are perfectly fine on unlinked files. Instead the assertions need to be fixed/removed. Cc: Fixes: 11a6fc3dc743 ("ubifs: xattr: Don't operate on deleted inodes") Reported-by: Koen Vandeputte Tested-by: Joel Stanley Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/xattr.c | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index d47f16c0d5829..c13eae819cbc8 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -152,12 +152,6 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, ui->data_len = size; mutex_lock(&host_ui->ui_mutex); - - if (!host->i_nlink) { - err = -ENOENT; - goto out_noent; - } - host->i_ctime = current_time(host); host_ui->xattr_cnt += 1; host_ui->xattr_size += CALC_DENT_SIZE(fname_len(nm)); @@ -189,7 +183,6 @@ out_cancel: host_ui->xattr_size -= CALC_XATTR_BYTES(size); host_ui->xattr_names -= fname_len(nm); host_ui->flags &= ~UBIFS_CRYPT_FL; -out_noent: mutex_unlock(&host_ui->ui_mutex); out_free: make_bad_inode(inode); @@ -241,12 +234,6 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, mutex_unlock(&ui->ui_mutex); mutex_lock(&host_ui->ui_mutex); - - if (!host->i_nlink) { - err = -ENOENT; - goto out_noent; - } - host->i_ctime = current_time(host); host_ui->xattr_size -= CALC_XATTR_BYTES(old_size); host_ui->xattr_size += CALC_XATTR_BYTES(size); @@ -268,7 +255,6 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, out_cancel: host_ui->xattr_size -= CALC_XATTR_BYTES(size); host_ui->xattr_size += CALC_XATTR_BYTES(old_size); -out_noent: mutex_unlock(&host_ui->ui_mutex); make_bad_inode(inode); out_free: @@ -497,12 +483,6 @@ static int remove_xattr(struct ubifs_info *c, struct inode *host, return err; mutex_lock(&host_ui->ui_mutex); - - if (!host->i_nlink) { - err = -ENOENT; - goto out_noent; - } - host->i_ctime = current_time(host); host_ui->xattr_cnt -= 1; host_ui->xattr_size -= CALC_DENT_SIZE(fname_len(nm)); @@ -522,7 +502,6 @@ out_cancel: host_ui->xattr_size += CALC_DENT_SIZE(fname_len(nm)); host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len); host_ui->xattr_names += fname_len(nm); -out_noent: mutex_unlock(&host_ui->ui_mutex); ubifs_release_budget(c, &req); make_bad_inode(inode); @@ -562,9 +541,6 @@ static int ubifs_xattr_remove(struct inode *host, const char *name) ubifs_assert(inode_is_locked(host)); - if (!host->i_nlink) - return -ENOENT; - if (fname_len(&nm) > UBIFS_MAX_NLEN) return -ENAMETOOLONG; -- GitLab From 7c1ca8fb8633e833eec8047a5b0cefa4e2a2119c Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Thu, 20 Sep 2018 12:22:51 -0700 Subject: [PATCH 0544/2269] ocfs2: fix ocfs2 read block panic commit 234b69e3e089d850a98e7b3145bd00e9b52b1111 upstream. While reading block, it is possible that io error return due to underlying storage issue, in this case, BH_NeedsValidate was left in the buffer head. Then when reading the very block next time, if it was already linked into journal, that will trigger the following panic. [203748.702517] kernel BUG at fs/ocfs2/buffer_head_io.c:342! [203748.702533] invalid opcode: 0000 [#1] SMP [203748.702561] Modules linked in: ocfs2 ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm ocfs2_nodemanager ocfs2_stackglue configfs sunrpc dm_switch dm_queue_length dm_multipath bonding be2iscsi iscsi_boot_sysfs bnx2i cnic uio cxgb4i iw_cxgb4 cxgb4 cxgb3i libcxgbi iw_cxgb3 cxgb3 mdio ib_iser rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr ipv6 iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ipmi_devintf iTCO_wdt iTCO_vendor_support dcdbas ipmi_ssif i2c_core ipmi_si ipmi_msghandler acpi_pad pcspkr sb_edac edac_core lpc_ich mfd_core shpchp sg tg3 ptp pps_core ext4 jbd2 mbcache2 sr_mod cdrom sd_mod ahci libahci megaraid_sas wmi dm_mirror dm_region_hash dm_log dm_mod [203748.703024] CPU: 7 PID: 38369 Comm: touch Not tainted 4.1.12-124.18.6.el6uek.x86_64 #2 [203748.703045] Hardware name: Dell Inc. PowerEdge R620/0PXXHP, BIOS 2.5.2 01/28/2015 [203748.703067] task: ffff880768139c00 ti: ffff88006ff48000 task.ti: ffff88006ff48000 [203748.703088] RIP: 0010:[] [] ocfs2_read_blocks+0x669/0x7f0 [ocfs2] [203748.703130] RSP: 0018:ffff88006ff4b818 EFLAGS: 00010206 [203748.703389] RAX: 0000000008620029 RBX: ffff88006ff4b910 RCX: 0000000000000000 [203748.703885] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 00000000023079fe [203748.704382] RBP: ffff88006ff4b8d8 R08: 0000000000000000 R09: ffff8807578c25b0 [203748.704877] R10: 000000000f637376 R11: 000000003030322e R12: 0000000000000000 [203748.705373] R13: ffff88006ff4b910 R14: ffff880732fe38f0 R15: 0000000000000000 [203748.705871] FS: 00007f401992c700(0000) GS:ffff880bfebc0000(0000) knlGS:0000000000000000 [203748.706370] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [203748.706627] CR2: 00007f4019252440 CR3: 00000000a621e000 CR4: 0000000000060670 [203748.707124] Stack: [203748.707371] ffff88006ff4b828 ffffffffa0609f52 ffff88006ff4b838 0000000000000001 [203748.707885] 0000000000000000 0000000000000000 ffff880bf67c3800 ffffffffa05eca00 [203748.708399] 00000000023079ff ffffffff81c58b80 0000000000000000 0000000000000000 [203748.708915] Call Trace: [203748.709175] [] ? ocfs2_inode_cache_io_unlock+0x12/0x20 [ocfs2] [203748.709680] [] ? ocfs2_empty_dir_filldir+0x80/0x80 [ocfs2] [203748.710185] [] ocfs2_read_dir_block_direct+0x3b/0x200 [ocfs2] [203748.710691] [] ocfs2_prepare_dx_dir_for_insert.isra.57+0x19f/0xf60 [ocfs2] [203748.711204] [] ? ocfs2_metadata_cache_io_unlock+0x1f/0x30 [ocfs2] [203748.711716] [] ocfs2_prepare_dir_for_insert+0x13a/0x890 [ocfs2] [203748.712227] [] ? ocfs2_check_dir_for_entry+0x8e/0x140 [ocfs2] [203748.712737] [] ocfs2_mknod+0x4b2/0x1370 [ocfs2] [203748.713003] [] ocfs2_create+0x65/0x170 [ocfs2] [203748.713263] [] vfs_create+0xdb/0x150 [203748.713518] [] do_last+0x815/0x1210 [203748.713772] [] ? path_init+0xb9/0x450 [203748.714123] [] path_openat+0x80/0x600 [203748.714378] [] ? handle_pte_fault+0xd15/0x1620 [203748.714634] [] do_filp_open+0x3a/0xb0 [203748.714888] [] ? __alloc_fd+0xa7/0x130 [203748.715143] [] do_sys_open+0x12c/0x220 [203748.715403] [] ? syscall_trace_enter_phase1+0x11b/0x180 [203748.715668] [] ? system_call_after_swapgs+0xe9/0x190 [203748.715928] [] SyS_open+0x1e/0x20 [203748.716184] [] system_call_fastpath+0x18/0xd7 [203748.716440] Code: 00 00 48 8b 7b 08 48 83 c3 10 45 89 f8 44 89 e1 44 89 f2 4c 89 ee e8 07 06 11 e1 48 8b 03 48 85 c0 75 df 8b 5d c8 e9 4d fa ff ff <0f> 0b 48 8b 7d a0 e8 dc c6 06 00 48 b8 00 00 00 00 00 00 00 10 [203748.717505] RIP [] ocfs2_read_blocks+0x669/0x7f0 [ocfs2] [203748.717775] RSP Joesph ever reported a similar panic. Link: https://oss.oracle.com/pipermail/ocfs2-devel/2013-May/008931.html Link: http://lkml.kernel.org/r/20180912063207.29484-1-junxiao.bi@oracle.com Signed-off-by: Junxiao Bi Cc: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Changwei Ge Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/buffer_head_io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index d9ebe11c89909..1d098c3c00e02 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -342,6 +342,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, * for this bh as it's not marked locally * uptodate. */ status = -EIO; + clear_buffer_needs_validate(bh); put_bh(bh); bhs[i] = NULL; continue; -- GitLab From 42387d8e4aefffe928e1a4ec99c45e6e2c4d2167 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Wed, 15 Aug 2018 15:00:15 -0400 Subject: [PATCH 0545/2269] drm/nouveau: Fix deadlocks in nouveau_connector_detect() commit 3e1a12754d4df5804bfca5dedf09d2ba291bdc2a upstream. When we disable hotplugging on the GPU, we need to be able to synchronize with each connector's hotplug interrupt handler before the interrupt is finally disabled. This can be a problem however, since nouveau_connector_detect() currently grabs a runtime power reference when handling connector probing. This will deadlock the runtime suspend handler like so: [ 861.480896] INFO: task kworker/0:2:61 blocked for more than 120 seconds. [ 861.483290] Tainted: G O 4.18.0-rc6Lyude-Test+ #1 [ 861.485158] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 861.486332] kworker/0:2 D 0 61 2 0x80000000 [ 861.487044] Workqueue: events nouveau_display_hpd_work [nouveau] [ 861.487737] Call Trace: [ 861.488394] __schedule+0x322/0xaf0 [ 861.489070] schedule+0x33/0x90 [ 861.489744] rpm_resume+0x19c/0x850 [ 861.490392] ? finish_wait+0x90/0x90 [ 861.491068] __pm_runtime_resume+0x4e/0x90 [ 861.491753] nouveau_display_hpd_work+0x22/0x60 [nouveau] [ 861.492416] process_one_work+0x231/0x620 [ 861.493068] worker_thread+0x44/0x3a0 [ 861.493722] kthread+0x12b/0x150 [ 861.494342] ? wq_pool_ids_show+0x140/0x140 [ 861.494991] ? kthread_create_worker_on_cpu+0x70/0x70 [ 861.495648] ret_from_fork+0x3a/0x50 [ 861.496304] INFO: task kworker/6:2:320 blocked for more than 120 seconds. [ 861.496968] Tainted: G O 4.18.0-rc6Lyude-Test+ #1 [ 861.497654] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 861.498341] kworker/6:2 D 0 320 2 0x80000080 [ 861.499045] Workqueue: pm pm_runtime_work [ 861.499739] Call Trace: [ 861.500428] __schedule+0x322/0xaf0 [ 861.501134] ? wait_for_completion+0x104/0x190 [ 861.501851] schedule+0x33/0x90 [ 861.502564] schedule_timeout+0x3a5/0x590 [ 861.503284] ? mark_held_locks+0x58/0x80 [ 861.503988] ? _raw_spin_unlock_irq+0x2c/0x40 [ 861.504710] ? wait_for_completion+0x104/0x190 [ 861.505417] ? trace_hardirqs_on_caller+0xf4/0x190 [ 861.506136] ? wait_for_completion+0x104/0x190 [ 861.506845] wait_for_completion+0x12c/0x190 [ 861.507555] ? wake_up_q+0x80/0x80 [ 861.508268] flush_work+0x1c9/0x280 [ 861.508990] ? flush_workqueue_prep_pwqs+0x1b0/0x1b0 [ 861.509735] nvif_notify_put+0xb1/0xc0 [nouveau] [ 861.510482] nouveau_display_fini+0xbd/0x170 [nouveau] [ 861.511241] nouveau_display_suspend+0x67/0x120 [nouveau] [ 861.511969] nouveau_do_suspend+0x5e/0x2d0 [nouveau] [ 861.512715] nouveau_pmops_runtime_suspend+0x47/0xb0 [nouveau] [ 861.513435] pci_pm_runtime_suspend+0x6b/0x180 [ 861.514165] ? pci_has_legacy_pm_support+0x70/0x70 [ 861.514897] __rpm_callback+0x7a/0x1d0 [ 861.515618] ? pci_has_legacy_pm_support+0x70/0x70 [ 861.516313] rpm_callback+0x24/0x80 [ 861.517027] ? pci_has_legacy_pm_support+0x70/0x70 [ 861.517741] rpm_suspend+0x142/0x6b0 [ 861.518449] pm_runtime_work+0x97/0xc0 [ 861.519144] process_one_work+0x231/0x620 [ 861.519831] worker_thread+0x44/0x3a0 [ 861.520522] kthread+0x12b/0x150 [ 861.521220] ? wq_pool_ids_show+0x140/0x140 [ 861.521925] ? kthread_create_worker_on_cpu+0x70/0x70 [ 861.522622] ret_from_fork+0x3a/0x50 [ 861.523299] INFO: task kworker/6:0:1329 blocked for more than 120 seconds. [ 861.523977] Tainted: G O 4.18.0-rc6Lyude-Test+ #1 [ 861.524644] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 861.525349] kworker/6:0 D 0 1329 2 0x80000000 [ 861.526073] Workqueue: events nvif_notify_work [nouveau] [ 861.526751] Call Trace: [ 861.527411] __schedule+0x322/0xaf0 [ 861.528089] schedule+0x33/0x90 [ 861.528758] rpm_resume+0x19c/0x850 [ 861.529399] ? finish_wait+0x90/0x90 [ 861.530073] __pm_runtime_resume+0x4e/0x90 [ 861.530798] nouveau_connector_detect+0x7e/0x510 [nouveau] [ 861.531459] ? ww_mutex_lock+0x47/0x80 [ 861.532097] ? ww_mutex_lock+0x47/0x80 [ 861.532819] ? drm_modeset_lock+0x88/0x130 [drm] [ 861.533481] drm_helper_probe_detect_ctx+0xa0/0x100 [drm_kms_helper] [ 861.534127] drm_helper_hpd_irq_event+0xa4/0x120 [drm_kms_helper] [ 861.534940] nouveau_connector_hotplug+0x98/0x120 [nouveau] [ 861.535556] nvif_notify_work+0x2d/0xb0 [nouveau] [ 861.536221] process_one_work+0x231/0x620 [ 861.536994] worker_thread+0x44/0x3a0 [ 861.537757] kthread+0x12b/0x150 [ 861.538463] ? wq_pool_ids_show+0x140/0x140 [ 861.539102] ? kthread_create_worker_on_cpu+0x70/0x70 [ 861.539815] ret_from_fork+0x3a/0x50 [ 861.540521] Showing all locks held in the system: [ 861.541696] 2 locks held by kworker/0:2/61: [ 861.542406] #0: 000000002dbf8af5 ((wq_completion)"events"){+.+.}, at: process_one_work+0x1b3/0x620 [ 861.543071] #1: 0000000076868126 ((work_completion)(&drm->hpd_work)){+.+.}, at: process_one_work+0x1b3/0x620 [ 861.543814] 1 lock held by khungtaskd/64: [ 861.544535] #0: 0000000059db4b53 (rcu_read_lock){....}, at: debug_show_all_locks+0x23/0x185 [ 861.545160] 3 locks held by kworker/6:2/320: [ 861.545896] #0: 00000000d9e1bc59 ((wq_completion)"pm"){+.+.}, at: process_one_work+0x1b3/0x620 [ 861.546702] #1: 00000000c9f92d84 ((work_completion)(&dev->power.work)){+.+.}, at: process_one_work+0x1b3/0x620 [ 861.547443] #2: 000000004afc5de1 (drm_connector_list_iter){.+.+}, at: nouveau_display_fini+0x96/0x170 [nouveau] [ 861.548146] 1 lock held by dmesg/983: [ 861.548889] 2 locks held by zsh/1250: [ 861.549605] #0: 00000000348e3cf6 (&tty->ldisc_sem){++++}, at: ldsem_down_read+0x37/0x40 [ 861.550393] #1: 000000007009a7a8 (&ldata->atomic_read_lock){+.+.}, at: n_tty_read+0xc1/0x870 [ 861.551122] 6 locks held by kworker/6:0/1329: [ 861.551957] #0: 000000002dbf8af5 ((wq_completion)"events"){+.+.}, at: process_one_work+0x1b3/0x620 [ 861.552765] #1: 00000000ddb499ad ((work_completion)(¬ify->work)#2){+.+.}, at: process_one_work+0x1b3/0x620 [ 861.553582] #2: 000000006e013cbe (&dev->mode_config.mutex){+.+.}, at: drm_helper_hpd_irq_event+0x6c/0x120 [drm_kms_helper] [ 861.554357] #3: 000000004afc5de1 (drm_connector_list_iter){.+.+}, at: drm_helper_hpd_irq_event+0x78/0x120 [drm_kms_helper] [ 861.555227] #4: 0000000044f294d9 (crtc_ww_class_acquire){+.+.}, at: drm_helper_probe_detect_ctx+0x3d/0x100 [drm_kms_helper] [ 861.556133] #5: 00000000db193642 (crtc_ww_class_mutex){+.+.}, at: drm_modeset_lock+0x4b/0x130 [drm] [ 861.557864] ============================================= [ 861.559507] NMI backtrace for cpu 2 [ 861.560363] CPU: 2 PID: 64 Comm: khungtaskd Tainted: G O 4.18.0-rc6Lyude-Test+ #1 [ 861.561197] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET78W (1.51 ) 05/18/2018 [ 861.561948] Call Trace: [ 861.562757] dump_stack+0x8e/0xd3 [ 861.563516] nmi_cpu_backtrace.cold.3+0x14/0x5a [ 861.564269] ? lapic_can_unplug_cpu.cold.27+0x42/0x42 [ 861.565029] nmi_trigger_cpumask_backtrace+0xa1/0xae [ 861.565789] arch_trigger_cpumask_backtrace+0x19/0x20 [ 861.566558] watchdog+0x316/0x580 [ 861.567355] kthread+0x12b/0x150 [ 861.568114] ? reset_hung_task_detector+0x20/0x20 [ 861.568863] ? kthread_create_worker_on_cpu+0x70/0x70 [ 861.569598] ret_from_fork+0x3a/0x50 [ 861.570370] Sending NMI from CPU 2 to CPUs 0-1,3-7: [ 861.571426] NMI backtrace for cpu 6 skipped: idling at intel_idle+0x7f/0x120 [ 861.571429] NMI backtrace for cpu 7 skipped: idling at intel_idle+0x7f/0x120 [ 861.571432] NMI backtrace for cpu 3 skipped: idling at intel_idle+0x7f/0x120 [ 861.571464] NMI backtrace for cpu 5 skipped: idling at intel_idle+0x7f/0x120 [ 861.571467] NMI backtrace for cpu 0 skipped: idling at intel_idle+0x7f/0x120 [ 861.571469] NMI backtrace for cpu 4 skipped: idling at intel_idle+0x7f/0x120 [ 861.571472] NMI backtrace for cpu 1 skipped: idling at intel_idle+0x7f/0x120 [ 861.572428] Kernel panic - not syncing: hung_task: blocked tasks So: fix this by making it so that normal hotplug handling /only/ happens so long as the GPU is currently awake without any pending runtime PM requests. In the event that a hotplug occurs while the device is suspending or resuming, we can simply defer our response until the GPU is fully runtime resumed again. Changes since v4: - Use a new trick I came up with using pm_runtime_get() instead of the hackish junk we had before Signed-off-by: Lyude Paul Reviewed-by: Karol Herbst Acked-by: Daniel Vetter Cc: stable@vger.kernel.org Cc: Lukas Wunner Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_connector.c | 22 +++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 430830d63a33d..6246c40b9e20f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -1120,6 +1120,26 @@ nouveau_connector_hotplug(struct nvif_notify *notify) const struct nvif_notify_conn_rep_v0 *rep = notify->data; const char *name = connector->name; struct nouveau_encoder *nv_encoder; + int ret; + + ret = pm_runtime_get(drm->dev->dev); + if (ret == 0) { + /* We can't block here if there's a pending PM request + * running, as we'll deadlock nouveau_display_fini() when it + * calls nvif_put() on our nvif_notify struct. So, simply + * defer the hotplug event until the device finishes resuming + */ + NV_DEBUG(drm, "Deferring HPD on %s until runtime resume\n", + name); + schedule_work(&drm->hpd_work); + + pm_runtime_put_noidle(drm->dev->dev); + return NVIF_NOTIFY_KEEP; + } else if (ret != 1 && ret != -EACCES) { + NV_WARN(drm, "HPD on %s dropped due to RPM failure: %d\n", + name, ret); + return NVIF_NOTIFY_DROP; + } if (rep->mask & NVIF_NOTIFY_CONN_V0_IRQ) { NV_DEBUG(drm, "service %s\n", name); @@ -1137,6 +1157,8 @@ nouveau_connector_hotplug(struct nvif_notify *notify) drm_helper_hpd_irq_event(connector->dev); } + pm_runtime_mark_last_busy(drm->dev->dev); + pm_runtime_put_autosuspend(drm->dev->dev); return NVIF_NOTIFY_KEEP; } -- GitLab From 9ac837e079a0e09771175c33afea8ef291b5d161 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Tue, 7 Aug 2018 17:32:48 -0400 Subject: [PATCH 0546/2269] drm/nouveau/drm/nouveau: Don't forget to cancel hpd_work on suspend/unload commit 2f7ca781fd382cf8dde73ed36dfdd93fd05b3332 upstream. Currently, there's nothing in nouveau that actually cancels this work struct. So, cancel it on suspend/unload. Otherwise, if we're unlucky enough hpd_work might try to keep running up until the system is suspended. Signed-off-by: Lyude Paul Cc: stable@vger.kernel.org Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_display.c | 9 ++++++--- drivers/gpu/drm/nouveau/nouveau_display.h | 2 +- drivers/gpu/drm/nouveau/nouveau_drm.c | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index caf53503c0f7a..41f389cd5ca07 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -426,7 +426,7 @@ nouveau_display_init(struct drm_device *dev) } void -nouveau_display_fini(struct drm_device *dev, bool suspend) +nouveau_display_fini(struct drm_device *dev, bool suspend, bool runtime) { struct nouveau_display *disp = nouveau_display(dev); struct nouveau_drm *drm = nouveau_drm(dev); @@ -451,6 +451,9 @@ nouveau_display_fini(struct drm_device *dev, bool suspend) } drm_connector_list_iter_end(&conn_iter); + if (!runtime) + cancel_work_sync(&drm->hpd_work); + drm_kms_helper_poll_disable(dev); disp->fini(dev); } @@ -640,11 +643,11 @@ nouveau_display_suspend(struct drm_device *dev, bool runtime) } } - nouveau_display_fini(dev, true); + nouveau_display_fini(dev, true, runtime); return 0; } - nouveau_display_fini(dev, true); + nouveau_display_fini(dev, true, runtime); list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { struct nouveau_framebuffer *nouveau_fb; diff --git a/drivers/gpu/drm/nouveau/nouveau_display.h b/drivers/gpu/drm/nouveau/nouveau_display.h index 34cd144681b9b..5e7dae79c2ee3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.h +++ b/drivers/gpu/drm/nouveau/nouveau_display.h @@ -64,7 +64,7 @@ nouveau_display(struct drm_device *dev) int nouveau_display_create(struct drm_device *dev); void nouveau_display_destroy(struct drm_device *dev); int nouveau_display_init(struct drm_device *dev); -void nouveau_display_fini(struct drm_device *dev, bool suspend); +void nouveau_display_fini(struct drm_device *dev, bool suspend, bool runtime); int nouveau_display_suspend(struct drm_device *dev, bool runtime); void nouveau_display_resume(struct drm_device *dev, bool runtime); int nouveau_display_vblank_enable(struct drm_device *, unsigned int); diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index d6f13d7254de9..7d3c7bb0ebfa9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -546,7 +546,7 @@ nouveau_drm_unload(struct drm_device *dev) nouveau_debugfs_fini(drm); if (dev->mode_config.num_crtc) - nouveau_display_fini(dev, false); + nouveau_display_fini(dev, false, false); nouveau_display_destroy(dev); nouveau_bios_takedown(dev); -- GitLab From 409af02c200e232b884451b4099124a95a5b084c Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Wed, 15 Aug 2018 15:00:11 -0400 Subject: [PATCH 0547/2269] drm/nouveau/drm/nouveau: Fix bogus drm_kms_helper_poll_enable() placement commit d77ef138ff572409ab93d492e5e6c826ee6fb21d upstream. Turns out this part is my fault for not noticing when reviewing 9a2eba337cace ("drm/nouveau: Fix drm poll_helper handling"). Currently we call drm_kms_helper_poll_enable() from nouveau_display_hpd_work(). This makes basically no sense however, because that means we're calling drm_kms_helper_poll_enable() every time we schedule the hotplug detection work. This is also against the advice mentioned in drm_kms_helper_poll_enable()'s documentation: Note that calls to enable and disable polling must be strictly ordered, which is automatically the case when they're only call from suspend/resume callbacks. Of course, hotplugs can't really be ordered. They could even happen immediately after we called drm_kms_helper_poll_disable() in nouveau_display_fini(), which can lead to all sorts of issues. Additionally; enabling polling /after/ we call drm_helper_hpd_irq_event() could also mean that we'd miss a hotplug event anyway, since drm_helper_hpd_irq_event() wouldn't bother trying to probe connectors so long as polling is disabled. So; simply move this back into nouveau_display_init() again. The race condition that both of these patches attempted to work around has already been fixed properly in d61a5c106351 ("drm/nouveau: Fix deadlock on runtime suspend") Fixes: 9a2eba337cace ("drm/nouveau: Fix drm poll_helper handling") Signed-off-by: Lyude Paul Acked-by: Karol Herbst Acked-by: Daniel Vetter Cc: Lukas Wunner Cc: Peter Ujfalusi Cc: stable@vger.kernel.org Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_display.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 41f389cd5ca07..9167c8866e95f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -356,8 +356,6 @@ nouveau_display_hpd_work(struct work_struct *work) pm_runtime_get_sync(drm->dev->dev); drm_helper_hpd_irq_event(drm->dev); - /* enable polling for external displays */ - drm_kms_helper_poll_enable(drm->dev); pm_runtime_mark_last_busy(drm->dev->dev); pm_runtime_put_sync(drm->dev->dev); @@ -412,6 +410,11 @@ nouveau_display_init(struct drm_device *dev) if (ret) return ret; + /* enable connector detection and polling for connectors without HPD + * support + */ + drm_kms_helper_poll_enable(dev); + /* enable hotplug interrupts */ drm_connector_list_iter_begin(dev, &conn_iter); nouveau_for_each_non_mst_connector_iter(connector, &conn_iter) { -- GitLab From 0f966da783a38d94ac46bbdd6e75fed1b9b08c93 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Wed, 15 Aug 2018 15:00:14 -0400 Subject: [PATCH 0548/2269] drm/nouveau/drm/nouveau: Use pm_runtime_get_noresume() in connector_detect() commit 6833fb1ec120bf078e1a527c573a09d4de286224 upstream. It's true we can't resume the device from poll workers in nouveau_connector_detect(). We can however, prevent the autosuspend timer from elapsing immediately if it hasn't already without risking any sort of deadlock with the runtime suspend/resume operations. So do that instead of entirely avoiding grabbing a power reference. Signed-off-by: Lyude Paul Reviewed-by: Karol Herbst Acked-by: Daniel Vetter Cc: stable@vger.kernel.org Cc: Lukas Wunner Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_connector.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 6246c40b9e20f..2c6d196836886 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -570,12 +570,16 @@ nouveau_connector_detect(struct drm_connector *connector, bool force) nv_connector->edid = NULL; } - /* Outputs are only polled while runtime active, so acquiring a - * runtime PM ref here is unnecessary (and would deadlock upon - * runtime suspend because it waits for polling to finish). + /* Outputs are only polled while runtime active, so resuming the + * device here is unnecessary (and would deadlock upon runtime suspend + * because it waits for polling to finish). We do however, want to + * prevent the autosuspend timer from elapsing during this operation + * if possible. */ - if (!drm_kms_helper_is_poll_worker()) { - ret = pm_runtime_get_sync(connector->dev->dev); + if (drm_kms_helper_is_poll_worker()) { + pm_runtime_get_noresume(dev->dev); + } else { + ret = pm_runtime_get_sync(dev->dev); if (ret < 0 && ret != -EACCES) return conn_status; } @@ -653,10 +657,8 @@ detect_analog: out: - if (!drm_kms_helper_is_poll_worker()) { - pm_runtime_mark_last_busy(connector->dev->dev); - pm_runtime_put_autosuspend(connector->dev->dev); - } + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); return conn_status; } -- GitLab From 35e48a086071290f5f0617931f40b5824a311f3d Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 16 Aug 2018 16:13:13 -0400 Subject: [PATCH 0549/2269] drm/nouveau/drm/nouveau: Prevent handling ACPI HPD events too early commit 79e765ad665da4b8aa7e9c878bd2fef837f6fea5 upstream. On most systems with ACPI hotplugging support, it seems that we always receive a hotplug event once we re-enable EC interrupts even if the GPU hasn't even been resumed yet. This can cause problems since even though we schedule hpd_work to handle connector reprobing for us, hpd_work synchronizes on pm_runtime_get_sync() to wait until the device is ready to perform reprobing. Since runtime suspend/resume callbacks are disabled before the PM core calls ->suspend(), any calls to pm_runtime_get_sync() during this period will grab a runtime PM ref and return immediately with -EACCES. Because we schedule hpd_work from our ACPI HPD handler, and hpd_work synchronizes on pm_runtime_get_sync(), this causes us to launch a connector reprobe immediately even if the GPU isn't actually resumed just yet. This causes various warnings in dmesg and occasionally, also prevents some displays connected to the dedicated GPU from coming back up after suspend. Example: usb 1-4: USB disconnect, device number 14 usb 1-4.1: USB disconnect, device number 15 WARNING: CPU: 0 PID: 838 at drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h:170 nouveau_dp_detect+0x17e/0x370 [nouveau] CPU: 0 PID: 838 Comm: kworker/0:6 Not tainted 4.17.14-201.Lyude.bz1477182.V3.fc28.x86_64 #1 Hardware name: LENOVO 20EQS64N00/20EQS64N00, BIOS N1EET77W (1.50 ) 03/28/2018 Workqueue: events nouveau_display_hpd_work [nouveau] RIP: 0010:nouveau_dp_detect+0x17e/0x370 [nouveau] RSP: 0018:ffffa15143933cf0 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff8cb4f656c400 RCX: 0000000000000000 RDX: ffffa1514500e4e4 RSI: ffffa1514500e4e4 RDI: 0000000001009002 RBP: ffff8cb4f4a8a800 R08: ffffa15143933cfd R09: ffffa15143933cfc R10: 0000000000000000 R11: 0000000000000000 R12: ffff8cb4fb57a000 R13: ffff8cb4fb57a000 R14: ffff8cb4f4a8f800 R15: ffff8cb4f656c418 FS: 0000000000000000(0000) GS:ffff8cb51f400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f78ec938000 CR3: 000000073720a003 CR4: 00000000003606f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? _cond_resched+0x15/0x30 nouveau_connector_detect+0x2ce/0x520 [nouveau] ? _cond_resched+0x15/0x30 ? ww_mutex_lock+0x12/0x40 drm_helper_probe_detect_ctx+0x8b/0xe0 [drm_kms_helper] drm_helper_hpd_irq_event+0xa8/0x120 [drm_kms_helper] nouveau_display_hpd_work+0x2a/0x60 [nouveau] process_one_work+0x187/0x340 worker_thread+0x2e/0x380 ? pwq_unbound_release_workfn+0xd0/0xd0 kthread+0x112/0x130 ? kthread_create_worker_on_cpu+0x70/0x70 ret_from_fork+0x35/0x40 Code: 4c 8d 44 24 0d b9 00 05 00 00 48 89 ef ba 09 00 00 00 be 01 00 00 00 e8 e1 09 f8 ff 85 c0 0f 85 b2 01 00 00 80 7c 24 0c 03 74 02 <0f> 0b 48 89 ef e8 b8 07 f8 ff f6 05 51 1b c8 ff 02 0f 84 72 ff ---[ end trace 55d811b38fc8e71a ]--- So, to fix this we attempt to grab a runtime PM reference in the ACPI handler itself asynchronously. If the GPU is already awake (it will have normal hotplugging at this point) or runtime PM callbacks are currently disabled on the device, we drop our reference without updating the autosuspend delay. We only schedule connector reprobes when we successfully managed to queue up a resume request with our asynchronous PM ref. This also has the added benefit of preventing redundant connector reprobes from ACPI while the GPU is runtime resumed! Signed-off-by: Lyude Paul Cc: stable@vger.kernel.org Cc: Karol Herbst Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1477182#c41 Signed-off-by: Lyude Paul Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_display.c | 26 +++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 9167c8866e95f..2612702c3a3ff 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -378,15 +378,29 @@ nouveau_display_acpi_ntfy(struct notifier_block *nb, unsigned long val, { struct nouveau_drm *drm = container_of(nb, typeof(*drm), acpi_nb); struct acpi_bus_event *info = data; + int ret; if (!strcmp(info->device_class, ACPI_VIDEO_CLASS)) { if (info->type == ACPI_VIDEO_NOTIFY_PROBE) { - /* - * This may be the only indication we receive of a - * connector hotplug on a runtime suspended GPU, - * schedule hpd_work to check. - */ - schedule_work(&drm->hpd_work); + ret = pm_runtime_get(drm->dev->dev); + if (ret == 1 || ret == -EACCES) { + /* If the GPU is already awake, or in a state + * where we can't wake it up, it can handle + * it's own hotplug events. + */ + pm_runtime_put_autosuspend(drm->dev->dev); + } else if (ret == 0) { + /* This may be the only indication we receive + * of a connector hotplug on a runtime + * suspended GPU, schedule hpd_work to check. + */ + NV_DEBUG(drm, "ACPI requested connector reprobe\n"); + schedule_work(&drm->hpd_work); + pm_runtime_put_noidle(drm->dev->dev); + } else { + NV_WARN(drm, "Dropped ACPI reprobe event due to RPM error: %d\n", + ret); + } /* acpi-video should not generate keypresses for this */ return NOTIFY_BAD; -- GitLab From c70d8a488a410f067141a765af26ef2b69a1dcd8 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 25 Jul 2018 14:29:07 +0200 Subject: [PATCH 0550/2269] drm/vc4: Fix the "no scaling" case on multi-planar YUV formats commit 658d8cbd07dae22ccecf49399e18c609c4e85c53 upstream. When there's no scaling requested ->is_unity should be true no matter the format. Also, when no scaling is requested and we have a multi-planar YUV format, we should leave ->y_scaling[0] to VC4_SCALING_NONE and only set ->x_scaling[0] to VC4_SCALING_PPF. Doing this fixes an hardly visible artifact (seen when using modetest and a rather big overlay plane in YUV420). Fixes: fc04023fafec ("drm/vc4: Add support for YUV planes.") Cc: Signed-off-by: Boris Brezillon Reviewed-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20180725122907.13702-1-boris.brezillon@bootlin.com Signed-off-by: Sean Paul Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vc4/vc4_plane.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 17590cb2b80d9..502c7eb708c25 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -329,6 +329,9 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0], vc4_state->crtc_h); + vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE && + vc4_state->y_scaling[0] == VC4_SCALING_NONE); + if (num_planes > 1) { vc4_state->is_yuv = true; @@ -344,24 +347,17 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) vc4_get_scaling_mode(vc4_state->src_h[1], vc4_state->crtc_h); - /* YUV conversion requires that scaling be enabled, - * even on a plane that's otherwise 1:1. Choose TPZ - * for simplicity. + /* YUV conversion requires that horizontal scaling be enabled, + * even on a plane that's otherwise 1:1. Looks like only PPF + * works in that case, so let's pick that one. */ - if (vc4_state->x_scaling[0] == VC4_SCALING_NONE) - vc4_state->x_scaling[0] = VC4_SCALING_TPZ; - if (vc4_state->y_scaling[0] == VC4_SCALING_NONE) - vc4_state->y_scaling[0] = VC4_SCALING_TPZ; + if (vc4_state->is_unity) + vc4_state->x_scaling[0] = VC4_SCALING_PPF; } else { vc4_state->x_scaling[1] = VC4_SCALING_NONE; vc4_state->y_scaling[1] = VC4_SCALING_NONE; } - vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE && - vc4_state->y_scaling[0] == VC4_SCALING_NONE && - vc4_state->x_scaling[1] == VC4_SCALING_NONE && - vc4_state->y_scaling[1] == VC4_SCALING_NONE); - /* No configuring scaling on the cursor plane, since it gets non-vblank-synced updates, and scaling requires requires LBM changes which have to be vblank-synced. @@ -639,7 +635,10 @@ static int vc4_plane_mode_set(struct drm_plane *plane, vc4_dlist_write(vc4_state, SCALER_CSC2_ITR_R_601_5); } - if (!vc4_state->is_unity) { + if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || + vc4_state->x_scaling[1] != VC4_SCALING_NONE || + vc4_state->y_scaling[0] != VC4_SCALING_NONE || + vc4_state->y_scaling[1] != VC4_SCALING_NONE) { /* LBM Base Address. */ if (vc4_state->y_scaling[0] != VC4_SCALING_NONE || vc4_state->y_scaling[1] != VC4_SCALING_NONE) { -- GitLab From 5575041b09cd00e5dca79f8c6006edfb3012ab25 Mon Sep 17 00:00:00 2001 From: Emil Lundmark Date: Mon, 28 May 2018 16:27:11 +0200 Subject: [PATCH 0551/2269] drm: udl: Destroy framebuffer only if it was initialized MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fcb74da1eb8edd3a4ef9b9724f88ed709d684227 upstream. This fixes a NULL pointer dereference that can happen if the UDL driver is unloaded before the framebuffer is initialized. This can happen e.g. if the USB device is unplugged right after it was plugged in. As explained by Stéphane Marchesin: It happens when fbdev is disabled (which is the case for Chrome OS). Even though intialization of the fbdev part is optional (it's done in udlfb_create which is the callback for fb_probe()), the teardown isn't optional (udl_driver_unload -> udl_fbdev_cleanup -> udl_fbdev_destroy). Note that udl_fbdev_cleanup *tries* to be conditional (you can see it does if (!udl->fbdev)) but that doesn't work, because udl->fbdev is always set during udl_fbdev_init. Cc: stable@vger.kernel.org Suggested-by: Sean Paul Reviewed-by: Sean Paul Acked-by: Daniel Vetter Signed-off-by: Emil Lundmark Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20180528142711.142466-1-lndmrk@chromium.org Signed-off-by: Sean Paul Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/udl/udl_fb.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c index 8746eeeec44d9..491f1892b50e7 100644 --- a/drivers/gpu/drm/udl/udl_fb.c +++ b/drivers/gpu/drm/udl/udl_fb.c @@ -432,9 +432,11 @@ static void udl_fbdev_destroy(struct drm_device *dev, { drm_fb_helper_unregister_fbi(&ufbdev->helper); drm_fb_helper_fini(&ufbdev->helper); - drm_framebuffer_unregister_private(&ufbdev->ufb.base); - drm_framebuffer_cleanup(&ufbdev->ufb.base); - drm_gem_object_put_unlocked(&ufbdev->ufb.obj->base); + if (ufbdev->ufb.obj) { + drm_framebuffer_unregister_private(&ufbdev->ufb.base); + drm_framebuffer_cleanup(&ufbdev->ufb.base); + drm_gem_object_put_unlocked(&ufbdev->ufb.obj->base); + } } int udl_fbdev_init(struct drm_device *dev) -- GitLab From 57c806be0160a1f8f8642e92c83043fc36196c0d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 18 Sep 2018 15:28:24 -0500 Subject: [PATCH 0552/2269] drm/amdgpu: add new polaris pci id commit 30f3984ede683b98a4e8096e200df78bf0609b4f upstream. Add new pci id. Reviewed-by: Rex Zhu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 14 ++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index fd435a96481c2..e8d9479615c95 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -715,12 +715,14 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, break; case CHIP_POLARIS10: if (type == CGS_UCODE_ID_SMU) { - if ((adev->pdev->device == 0x67df) && - ((adev->pdev->revision == 0xe0) || - (adev->pdev->revision == 0xe3) || - (adev->pdev->revision == 0xe4) || - (adev->pdev->revision == 0xe5) || - (adev->pdev->revision == 0xe7) || + if (((adev->pdev->device == 0x67df) && + ((adev->pdev->revision == 0xe0) || + (adev->pdev->revision == 0xe3) || + (adev->pdev->revision == 0xe4) || + (adev->pdev->revision == 0xe5) || + (adev->pdev->revision == 0xe7) || + (adev->pdev->revision == 0xef))) || + ((adev->pdev->device == 0x6fdf) && (adev->pdev->revision == 0xef))) { info->is_kicker = true; strcpy(fw_name, "amdgpu/polaris10_k_smc.bin"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 0f16986ec5bc4..4dd68d8213538 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -500,6 +500,7 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x67CA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, {0x1002, 0x67CC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, {0x1002, 0x67CF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, + {0x1002, 0x6FDF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, /* Polaris12 */ {0x1002, 0x6980, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, {0x1002, 0x6981, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, -- GitLab From 4334a6ae867aa12f01c1755368fd0de4c926ac75 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 16 Aug 2018 15:30:38 -0500 Subject: [PATCH 0553/2269] tty: vt_ioctl: fix potential Spectre v1 commit e97267cb4d1ee01ca0929638ec0fcbb0904f903d upstream. vsa.console is indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/tty/vt/vt_ioctl.c:711 vt_ioctl() warn: potential spectre issue 'vc_cons' [r] Fix this by sanitizing vsa.console before using it to index vc_cons Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Reviewed-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt_ioctl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 2d2b420598b23..7b34b0ddbf0e0 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -32,6 +32,8 @@ #include #include +#include + #include #include #include @@ -700,6 +702,8 @@ int vt_ioctl(struct tty_struct *tty, if (vsa.console == 0 || vsa.console > MAX_NR_CONSOLES) ret = -ENXIO; else { + vsa.console = array_index_nospec(vsa.console, + MAX_NR_CONSOLES + 1); vsa.console--; console_lock(); ret = vc_allocate(vsa.console); -- GitLab From 31343d27f18f19ad1bb1c5b4475b86483598cd04 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 27 Aug 2018 01:47:09 -0400 Subject: [PATCH 0554/2269] ext4: check to make sure the rename(2)'s destination is not freed commit b50282f3241acee880514212d88b6049fb5039c8 upstream. If the destination of the rename(2) system call exists, the inode's link count (i_nlinks) must be non-zero. If it is, the inode can end up on the orphan list prematurely, leading to all sorts of hilarity, including a use-after-free. https://bugzilla.kernel.org/show_bug.cgi?id=200931 Signed-off-by: Theodore Ts'o Reported-by: Wen Xu Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/namei.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 1db39e12e02b2..eb0d8ee39827a 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3514,6 +3514,12 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, int credits; u8 old_file_type; + if (new.inode && new.inode->i_nlink == 0) { + EXT4_ERROR_INODE(new.inode, + "target of rename is already freed"); + return -EFSCORRUPTED; + } + if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT)) && (!projid_eq(EXT4_I(new_dir)->i_projid, EXT4_I(old_dentry->d_inode)->i_projid))) -- GitLab From 3f9eafe8772f9a86cab2e431a44bce775f6b7ba5 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 27 Aug 2018 09:22:45 -0400 Subject: [PATCH 0555/2269] ext4: avoid divide by zero fault when deleting corrupted inline directories commit 4d982e25d0bdc83d8c64e66fdeca0b89240b3b85 upstream. A specially crafted file system can trick empty_inline_dir() into reading past the last valid entry in a inline directory, and then run into the end of xattr marker. This will trigger a divide by zero fault. Fix this by using the size of the inline directory instead of dir->i_size. Also clean up error reporting in __ext4_check_dir_entry so that the message is clearer and more understandable --- and avoids the division by zero trap if the size passed in is zero. (I'm not sure why we coded it that way in the first place; printing offset % size is actually more confusing and less useful.) https://bugzilla.kernel.org/show_bug.cgi?id=200933 Signed-off-by: Theodore Ts'o Reported-by: Wen Xu Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/dir.c | 20 +++++++++----------- fs/ext4/inline.c | 4 +++- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index d5babc9f222bf..dc676714454aa 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -75,7 +75,7 @@ int __ext4_check_dir_entry(const char *function, unsigned int line, else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len))) error_msg = "rec_len is too small for name_len"; else if (unlikely(((char *) de - buf) + rlen > size)) - error_msg = "directory entry across range"; + error_msg = "directory entry overrun"; else if (unlikely(le32_to_cpu(de->inode) > le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count))) error_msg = "inode out of bounds"; @@ -84,18 +84,16 @@ int __ext4_check_dir_entry(const char *function, unsigned int line, if (filp) ext4_error_file(filp, function, line, bh->b_blocknr, - "bad entry in directory: %s - offset=%u(%u), " - "inode=%u, rec_len=%d, name_len=%d", - error_msg, (unsigned) (offset % size), - offset, le32_to_cpu(de->inode), - rlen, de->name_len); + "bad entry in directory: %s - offset=%u, " + "inode=%u, rec_len=%d, name_len=%d, size=%d", + error_msg, offset, le32_to_cpu(de->inode), + rlen, de->name_len, size); else ext4_error_inode(dir, function, line, bh->b_blocknr, - "bad entry in directory: %s - offset=%u(%u), " - "inode=%u, rec_len=%d, name_len=%d", - error_msg, (unsigned) (offset % size), - offset, le32_to_cpu(de->inode), - rlen, de->name_len); + "bad entry in directory: %s - offset=%u, " + "inode=%u, rec_len=%d, name_len=%d, size=%d", + error_msg, offset, le32_to_cpu(de->inode), + rlen, de->name_len, size); return 1; } diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index b549cfd2d7d3f..4e1d62ba0703f 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1759,6 +1759,7 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data) { int err, inline_size; struct ext4_iloc iloc; + size_t inline_len; void *inline_pos; unsigned int offset; struct ext4_dir_entry_2 *de; @@ -1786,8 +1787,9 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data) goto out; } + inline_len = ext4_get_inline_size(dir); offset = EXT4_INLINE_DOTDOT_SIZE; - while (offset < dir->i_size) { + while (offset < inline_len) { de = ext4_get_inline_entry(dir, &iloc, offset, &inline_pos, &inline_size); if (ext4_check_dir_entry(dir, NULL, de, -- GitLab From 779af00b3fa38d040a4798afdb2c57cc64ba346e Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 1 Sep 2018 12:45:04 -0400 Subject: [PATCH 0556/2269] ext4: avoid arithemetic overflow that can trigger a BUG commit bcd8e91f98c156f4b1ebcfacae675f9cfd962441 upstream. A maliciously crafted file system can cause an overflow when the results of a 64-bit calculation is stored into a 32-bit length parameter. https://bugzilla.kernel.org/show_bug.cgi?id=200623 Signed-off-by: Theodore Ts'o Reported-by: Wen Xu Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 3 +++ fs/ext4/inode.c | 9 +++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0abb30d19fa18..c96778c39885b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -714,6 +714,9 @@ struct fsxattr { /* Max physical block we can address w/o extents */ #define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF +/* Max logical block we can support */ +#define EXT4_MAX_LOGICAL_BLOCK 0xFFFFFFFF + /* * Structure of an inode on the disk */ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f9baa59de0e20..9808df52ceca6 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3407,11 +3407,16 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); unsigned int blkbits = inode->i_blkbits; - unsigned long first_block = offset >> blkbits; - unsigned long last_block = (offset + length - 1) >> blkbits; + unsigned long first_block, last_block; struct ext4_map_blocks map; int ret; + if ((offset >> blkbits) > EXT4_MAX_LOGICAL_BLOCK) + return -EINVAL; + first_block = offset >> blkbits; + last_block = min_t(loff_t, (offset + length - 1) >> blkbits, + EXT4_MAX_LOGICAL_BLOCK); + if (WARN_ON_ONCE(ext4_has_inline_data(inode))) return -ERANGE; -- GitLab From 22654a3b4a301d77365007f2fe913676afe6207e Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 1 Sep 2018 14:42:14 -0400 Subject: [PATCH 0557/2269] ext4: recalucate superblock checksum after updating free blocks/inodes commit 4274f516d4bc50648a4d97e4f67ecbd7b65cde4a upstream. When mounting the superblock, ext4_fill_super() calculates the free blocks and free inodes and stores them in the superblock. It's not strictly necessary, since we don't use them any more, but it's nice to keep them roughly aligned to reality. Since it's not critical for file system correctness, the code doesn't call ext4_commit_super(). The problem is that it's in ext4_commit_super() that we recalculate the superblock checksum. So if we're not going to call ext4_commit_super(), we need to call ext4_superblock_csum_set() to make sure the superblock checksum is consistent. Most of the time, this doesn't matter, since we end up calling ext4_commit_super() very soon thereafter, and definitely by the time the file system is unmounted. However, it doesn't work in this sequence: mke2fs -Fq -t ext4 /dev/vdc 128M mount /dev/vdc /vdc cp xfstests/git-versions /vdc godown /vdc umount /vdc mount /dev/vdc tune2fs -l /dev/vdc With this commit, the "tune2fs -l" no longer fails. Reported-by: Chengguang Xu Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b4fb085261fd9..b1f78e211abd3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4311,11 +4311,13 @@ no_journal: block = ext4_count_free_clusters(sb); ext4_free_blocks_count_set(sbi->s_es, EXT4_C2B(sbi, block)); + ext4_superblock_csum_set(sb); err = percpu_counter_init(&sbi->s_freeclusters_counter, block, GFP_KERNEL); if (!err) { unsigned long freei = ext4_count_free_inodes(sb); sbi->s_es->s_free_inodes_count = cpu_to_le32(freei); + ext4_superblock_csum_set(sb); err = percpu_counter_init(&sbi->s_freeinodes_counter, freei, GFP_KERNEL); } -- GitLab From 6a4d7b584d3831bd4d01850d5afe97fa64b363e8 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 3 Sep 2018 22:19:43 -0400 Subject: [PATCH 0558/2269] ext4: fix online resize's handling of a too-small final block group commit f0a459dec5495a3580f8d784555e6f8f3bf7f263 upstream. Avoid growing the file system to an extent so that the last block group is too small to hold all of the metadata that must be stored in the block group. This problem can be triggered with the following reproducer: umount /mnt mke2fs -F -m0 -b 4096 -t ext4 -O resize_inode,^has_journal \ -E resize=1073741824 /tmp/foo.img 128M mount /tmp/foo.img /mnt truncate --size 1708M /tmp/foo.img resize2fs /dev/loop0 295400 umount /mnt e2fsck -fy /tmp/foo.img Reported-by: Torsten Hilbrich Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 823c0b82dfeb0..41fb03718e879 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1956,6 +1956,26 @@ retry: } } + /* + * Make sure the last group has enough space so that it's + * guaranteed to have enough space for all metadata blocks + * that it might need to hold. (We might not need to store + * the inode table blocks in the last block group, but there + * will be cases where this might be needed.) + */ + if ((ext4_group_first_block_no(sb, n_group) + + ext4_group_overhead_blocks(sb, n_group) + 2 + + sbi->s_itb_per_group + sbi->s_cluster_ratio) >= n_blocks_count) { + n_blocks_count = ext4_group_first_block_no(sb, n_group); + n_group--; + n_blocks_count_retry = 0; + if (resize_inode) { + iput(resize_inode); + resize_inode = NULL; + } + goto retry; + } + /* extend the last group */ if (n_group == o_group) add = n_blocks_count - o_blocks_count; -- GitLab From ba48e66e3f53b30dd6f5785eaa658b9919d3e07c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 3 Sep 2018 22:25:01 -0400 Subject: [PATCH 0559/2269] ext4: fix online resizing for bigalloc file systems with a 1k block size commit 5f8c10936fab2b69a487400f2872902e597dd320 upstream. An online resize of a file system with the bigalloc feature enabled and a 1k block size would be refused since ext4_resize_begin() did not understand s_first_data_block is 0 for all bigalloc file systems, even when the block size is 1k. Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 41fb03718e879..e344e606c054c 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -19,6 +19,7 @@ int ext4_resize_begin(struct super_block *sb) { + struct ext4_sb_info *sbi = EXT4_SB(sb); int ret = 0; if (!capable(CAP_SYS_RESOURCE)) @@ -29,7 +30,7 @@ int ext4_resize_begin(struct super_block *sb) * because the user tools have no way of handling this. Probably a * bad time to do it anyways. */ - if (EXT4_SB(sb)->s_sbh->b_blocknr != + if (EXT4_B2C(sbi, sbi->s_sbh->b_blocknr) != le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { ext4_warning(sb, "won't resize using backup superblock at %llu", (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr); -- GitLab From 3dc006d212e38f9efc821352d261379a38ae9eb3 Mon Sep 17 00:00:00 2001 From: Li Dongyang Date: Sat, 15 Sep 2018 17:11:25 -0400 Subject: [PATCH 0560/2269] ext4: don't mark mmp buffer head dirty commit fe18d649891d813964d3aaeebad873f281627fbc upstream. Marking mmp bh dirty before writing it will make writeback pick up mmp block later and submit a write, we don't want the duplicate write as kmmpd thread should have full control of reading and writing the mmp block. Another reason is we will also have random I/O error on the writeback request when blk integrity is enabled, because kmmpd could modify the content of the mmp block(e.g. setting new seq and time) while the mmp block is under I/O requested by writeback. Signed-off-by: Li Dongyang Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mmp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 638ad47434771..38e6a846aac16 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -49,7 +49,6 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) */ sb_start_write(sb); ext4_mmp_csum_set(sb, mmp); - mark_buffer_dirty(bh); lock_buffer(bh); bh->b_end_io = end_buffer_write_sync; get_bh(bh); -- GitLab From 0d09307bc242e1692d1b8fb1cbfde688de6f8cd0 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 15 Sep 2018 14:28:26 -0400 Subject: [PATCH 0561/2269] ext4: show test_dummy_encryption mount option in /proc/mounts commit 338affb548c243d2af25b1ca628e67819350de6b upstream. When in effect, add "test_dummy_encryption" to _ext4_show_options() so that it is shown in /proc/mounts and other relevant procfs files. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b1f78e211abd3..9dbd27f7b7782 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2085,6 +2085,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb); if (test_opt(sb, DATA_ERR_ABORT)) SEQ_OPTS_PUTS("data_err=abort"); + if (DUMMY_ENCRYPTION_ENABLED(sbi)) + SEQ_OPTS_PUTS("test_dummy_encryption"); ext4_show_quota_options(seq, sb); return 0; -- GitLab From fe87d18b14717d25e3e81a7f36e605f5f1f92c47 Mon Sep 17 00:00:00 2001 From: Steve Muckle Date: Fri, 31 Aug 2018 15:42:17 -0700 Subject: [PATCH 0562/2269] sched/fair: Fix vruntime_normalized() for remote non-migration wakeup commit d0cdb3ce8834332d918fc9c8ff74f8a169ec9abe upstream. When a task which previously ran on a given CPU is remotely queued to wake up on that same CPU, there is a period where the task's state is TASK_WAKING and its vruntime is not normalized. This is not accounted for in vruntime_normalized() which will cause an error in the task's vruntime if it is switched from the fair class during this time. For example if it is boosted to RT priority via rt_mutex_setprio(), rq->min_vruntime will not be subtracted from the task's vruntime but it will be added again when the task returns to the fair class. The task's vruntime will have been erroneously doubled and the effective priority of the task will be reduced. Note this will also lead to inflation of all vruntimes since the doubled vruntime value will become the rq's min_vruntime when other tasks leave the rq. This leads to repeated doubling of the vruntime and priority penalty. Fix this by recognizing a WAKING task's vruntime as normalized only if sched_remote_wakeup is true. This indicates a migration, in which case the vruntime would have been normalized in migrate_task_rq_fair(). Based on a similar patch from John Dias . Suggested-by: Peter Zijlstra Tested-by: Dietmar Eggemann Signed-off-by: Steve Muckle Signed-off-by: Peter Zijlstra (Intel) Cc: Chris Redpath Cc: John Dias Cc: Linus Torvalds Cc: Miguel de Dios Cc: Morten Rasmussen Cc: Patrick Bellasi Cc: Paul Turner Cc: Quentin Perret Cc: Thomas Gleixner Cc: Todd Kjos Cc: kernel-team@android.com Fixes: b5179ac70de8 ("sched/fair: Prepare to fix fairness problems on migration") Link: http://lkml.kernel.org/r/20180831224217.169476-1-smuckle@google.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/fair.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a5e20ceb0b5a2..b2d699f283046 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9137,7 +9137,8 @@ static inline bool vruntime_normalized(struct task_struct *p) * - A task which has been woken up by try_to_wake_up() and * waiting for actually being woken up by sched_ttwu_pending(). */ - if (!se->sum_exec_runtime || p->state == TASK_WAKING) + if (!se->sum_exec_runtime || + (p->state == TASK_WAKING && p->sched_remote_wakeup)) return true; return false; -- GitLab From 23ac2a32b2f8cb78e086d3902f1e7bf488590b4a Mon Sep 17 00:00:00 2001 From: Zachary Zhang Date: Fri, 29 Jun 2018 11:16:19 +0200 Subject: [PATCH 0563/2269] PCI: aardvark: Size bridges before resources allocation commit 91a2968e245d6ba616db37001fa1a043078b1a65 upstream. The PCIE I/O and MEM resource allocation mechanism is that root bus goes through the following steps: 1. Check PCI bridges' range and computes I/O and Mem base/limits. 2. Sort all subordinate devices I/O and MEM resource requirements and allocate the resources and writes/updates subordinate devices' requirements to PCI bridges I/O and Mem MEM/limits registers. Currently, PCI Aardvark driver only handles the second step and lacks the first step, so there is an I/O and MEM resource allocation failure when using a PCI switch. This commit fixes that by sizing bridges before doing the resource allocation. Fixes: 8c39d710363c1 ("PCI: aardvark: Add Aardvark PCI host controller driver") Signed-off-by: Zachary Zhang [Thomas: edit commit log.] Signed-off-by: Thomas Petazzoni Signed-off-by: Lorenzo Pieralisi Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pci-aardvark.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/host/pci-aardvark.c b/drivers/pci/host/pci-aardvark.c index 9bfc22b5da4b7..5f3048e75becb 100644 --- a/drivers/pci/host/pci-aardvark.c +++ b/drivers/pci/host/pci-aardvark.c @@ -954,6 +954,7 @@ static int advk_pcie_probe(struct platform_device *pdev) bus = bridge->bus; + pci_bus_size_bridges(bus); pci_bus_assign_resources(bus); list_for_each_entry(child, &bus->children, node) -- GitLab From 956fa50745b3d01dd37f48bd601f16222742648b Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 13 Sep 2018 13:18:52 -0700 Subject: [PATCH 0564/2269] vmw_balloon: include asm/io.h commit a3b92ee6fc171d7c9d9b6b829b7fef169210440c upstream. Fix a build error due to missing virt_to_phys() Reported-by: kbuild test robot Fixes: f0a1bf29d821b ("vmw_balloon: fix inflation with batching") Cc: stable@vger.kernel.org Cc: Xavier Deguillard Signed-off-by: Nadav Amit Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_balloon.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index 5f8b583c6e414..f74166aa9a0dc 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -45,6 +45,7 @@ #include #include #include +#include #include MODULE_AUTHOR("VMware, Inc."); -- GitLab From 3a411a04be4e57de1774f680d3d6d0d48cc16c10 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 31 Aug 2018 07:15:56 -0700 Subject: [PATCH 0565/2269] iw_cxgb4: only allow 1 flush on user qps commit 308aa2b8f7b7db3332a7d41099fd37851fb793b2 upstream. Once the qp has been flushed, it cannot be flushed again. The user qp flush logic wasn't enforcing it however. The bug can cause touch-after-free crashes like: Unable to handle kernel paging request for data at address 0x000001ec Faulting instruction address: 0xc008000016069100 Oops: Kernel access of bad area, sig: 11 [#1] ... NIP [c008000016069100] flush_qp+0x80/0x480 [iw_cxgb4] LR [c00800001606cd6c] c4iw_modify_qp+0x71c/0x11d0 [iw_cxgb4] Call Trace: [c00800001606cd6c] c4iw_modify_qp+0x71c/0x11d0 [iw_cxgb4] [c00800001606e868] c4iw_ib_modify_qp+0x118/0x200 [iw_cxgb4] [c0080000119eae80] ib_security_modify_qp+0xd0/0x3d0 [ib_core] [c0080000119c4e24] ib_modify_qp+0xc4/0x2c0 [ib_core] [c008000011df0284] iwcm_modify_qp_err+0x44/0x70 [iw_cm] [c008000011df0fec] destroy_cm_id+0xcc/0x370 [iw_cm] [c008000011ed4358] rdma_destroy_id+0x3c8/0x520 [rdma_cm] [c0080000134b0540] ucma_close+0x90/0x1b0 [rdma_ucm] [c000000000444da4] __fput+0xe4/0x2f0 So fix flush_qp() to only flush the wq once. Cc: stable@vger.kernel.org Signed-off-by: Steve Wise Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/cxgb4/qp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index a8a8f65a1e51b..24952af51a546 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1395,6 +1395,12 @@ static void flush_qp(struct c4iw_qp *qhp) schp = to_c4iw_cq(qhp->ibqp.send_cq); if (qhp->ibqp.uobject) { + + /* for user qps, qhp->wq.flushed is protected by qhp->mutex */ + if (qhp->wq.flushed) + return; + + qhp->wq.flushed = 1; t4_set_wq_in_error(&qhp->wq); t4_set_cq_in_error(&rchp->cq); spin_lock_irqsave(&rchp->comp_handler_lock, flag); -- GitLab From ed5e9462f6617a85945353abe5c4b94a81ee01a1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 30 Aug 2018 17:05:19 +0200 Subject: [PATCH 0566/2269] tick/nohz: Prevent bogus softirq pending warning Commit 0a0e0829f990 ("nohz: Fix missing tick reprogram when interrupting an inline softirq") got backported to stable trees and now causes the NOHZ softirq pending warning to trigger. It's not an upstream issue as the NOHZ update logic has been changed there. The problem is when a softirq disabled section gets interrupted and on return from interrupt the tick/nohz state is evaluated, which then can observe pending soft interrupts. These soft interrupts are legitimately pending because they cannot be processed as long as soft interrupts are disabled and the interrupted code will correctly process them when soft interrupts are reenabled. Add a check for softirqs disabled to the pending check to prevent the warning. Reported-by: Grygorii Strashko Reported-by: John Crispin Signed-off-by: Thomas Gleixner Tested-by: Grygorii Strashko Tested-by: John Crispin Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Anna-Maria Gleixner Cc: stable@vger.kernel.org Fixes: 2d898915ccf4838c ("nohz: Fix missing tick reprogram when interrupting an inline softirq") Acked-by: Frederic Weisbecker Tested-by: Geert Uytterhoeven --- kernel/time/tick-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index ea3c062e7e1c1..a8fa0a896b785 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -908,7 +908,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) if (unlikely(local_softirq_pending() && cpu_online(cpu))) { static int ratelimit; - if (ratelimit < 10 && + if (ratelimit < 10 && !in_softirq() && (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { pr_warn("NOHZ: local_softirq_pending %02x\n", (unsigned int) local_softirq_pending()); -- GitLab From 97513162cd6d7ed118aa155b4196e88b3e43ff69 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 21 Aug 2018 11:53:03 +0200 Subject: [PATCH 0567/2269] spi: Fix double IDR allocation with DT aliases commit 04b2d03a75652bda989de1595048f0501dc0c0a0 upstream. If the SPI bus number is provided by a DT alias, idr_alloc() is called twice, leading to: WARNING: CPU: 1 PID: 1 at drivers/spi/spi.c:2179 spi_register_controller+0x11c/0x5d8 couldn't get idr Fix this by moving the handling of fixed SPI bus numbers up, before the DT handling code fills in ctlr->bus_num. Fixes: 1a4327fbf4554d5b ("spi: fix IDR collision on systems with both fixed and dynamic SPI bus numbers") Signed-off-by: Geert Uytterhoeven Tested-by: Fabio Estevam Signed-off-by: Mark Brown Cc: Sudip Mukherjee Cc: Kirill Kapranov Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 780a32175ec09..670dbb7a8500a 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2108,8 +2108,17 @@ int spi_register_controller(struct spi_controller *ctlr) */ if (ctlr->num_chipselect == 0) return -EINVAL; - /* allocate dynamic bus number using Linux idr */ - if ((ctlr->bus_num < 0) && ctlr->dev.of_node) { + if (ctlr->bus_num >= 0) { + /* devices with a fixed bus num must check-in with the num */ + mutex_lock(&board_lock); + id = idr_alloc(&spi_master_idr, ctlr, ctlr->bus_num, + ctlr->bus_num + 1, GFP_KERNEL); + mutex_unlock(&board_lock); + if (WARN(id < 0, "couldn't get idr")) + return id == -ENOSPC ? -EBUSY : id; + ctlr->bus_num = id; + } else if (ctlr->dev.of_node) { + /* allocate dynamic bus number using Linux idr */ id = of_alias_get_id(ctlr->dev.of_node, "spi"); if (id >= 0) { ctlr->bus_num = id; @@ -2135,15 +2144,6 @@ int spi_register_controller(struct spi_controller *ctlr) if (WARN(id < 0, "couldn't get idr")) return id; ctlr->bus_num = id; - } else { - /* devices with a fixed bus num must check-in with the num */ - mutex_lock(&board_lock); - id = idr_alloc(&spi_master_idr, ctlr, ctlr->bus_num, - ctlr->bus_num + 1, GFP_KERNEL); - mutex_unlock(&board_lock); - if (WARN(id < 0, "couldn't get idr")) - return id == -ENOSPC ? -EBUSY : id; - ctlr->bus_num = id; } INIT_LIST_HEAD(&ctlr->queue); spin_lock_init(&ctlr->queue_lock); -- GitLab From 3b65f403d7d0e98484e8f2c9471808f2785a8235 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 29 Sep 2018 03:06:07 -0700 Subject: [PATCH 0568/2269] Linux 4.14.73 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 734722bda1736..89f30ca964b67 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 72 +SUBLEVEL = 73 EXTRAVERSION = NAME = Petit Gorille -- GitLab From dcfd4c956bb5570f99037df864f73bc118e94b54 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 1 Oct 2018 10:46:28 -0700 Subject: [PATCH 0569/2269] Revert "f2fs: use timespec64 for inode timestamps" This reverts commit 98c7ff4c05a06cd81e432ee6b80d3c129cdfa19d. This is to fix build errors for mips. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 12 ++++++------ fs/f2fs/namei.c | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1f7bfe4787aaf..08f2389c52bee 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -688,8 +688,8 @@ struct f2fs_inode_info { int i_extra_isize; /* size of extra space located in i_addr */ kprojid_t i_projid; /* id for project quota */ int i_inline_xattr_size; /* inline xattr size */ - struct timespec64 i_crtime; /* inode creation time */ - struct timespec64 i_disk_time[4];/* inode disk times */ + struct timespec i_crtime; /* inode creation time */ + struct timespec i_disk_time[4]; /* inode disk times */ }; static inline void get_extent_info(struct extent_info *ext, @@ -2572,13 +2572,13 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) i_size_read(inode) & ~PAGE_MASK) return false; - if (!timespec64_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime)) + if (!timespec_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime)) return false; - if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime)) + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime)) return false; - if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime)) + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime)) return false; - if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 3, + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 3, &F2FS_I(inode)->i_crtime)) return false; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index ce2e6af3054be..6ba092f635e3f 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -50,8 +50,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) inode->i_ino = ino; inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); - F2FS_I(inode)->i_crtime = inode->i_mtime; + inode->i_mtime = inode->i_atime = inode->i_ctime = + F2FS_I(inode)->i_crtime = current_time(inode); inode->i_generation = sbi->s_next_generation++; if (S_ISDIR(inode->i_mode)) -- GitLab From c4e7788b7adf28f1562964cca2dc5198089e670e Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 14 Sep 2018 09:17:55 -0700 Subject: [PATCH 0570/2269] ANDROID: kbuild: disable LTO_CLANG with KASAN Using LTO with KASAN currently results in "inlinable function call in a function with debug info must have a !dbg location" errors for memset and several of the __asan_report_* functions. As combining these options doesn't provide significant benefits, this change disables LTO_CLANG when KASAN is selected. Bug: 113246877 Change-Id: I06cd27d1e9ab74627de4771548453abe3593fcb5 Signed-off-by: Sami Tolvanen --- arch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/Kconfig b/arch/Kconfig index 6f1ea52b69a8a..3c3a1a0d5cc2a 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -635,6 +635,7 @@ config LTO_CLANG bool "Use clang Link Time Optimization (LTO) (EXPERIMENTAL)" depends on ARCH_SUPPORTS_LTO_CLANG depends on !FTRACE_MCOUNT_RECORD || HAVE_C_RECORDMCOUNT + depends on !KASAN select LTO select THIN_ARCHIVES select LD_DEAD_CODE_DATA_ELIMINATION -- GitLab From ff9973a5da5ec5e65bc3f7912df0bdccddf0480d Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 1 Oct 2018 16:21:22 -0700 Subject: [PATCH 0571/2269] ANDROID: arm64: kbuild: only specify code model with LTO for modules This fixes CONFIG_LTO_CLANG for LLVM >= r334571 where LLVMgold actually started respecting the code model flag. Bug: 116819139 Test: builds with LLVM r334571, boots on a device, all modules load Change-Id: I1af9d24ed1b789a40ac1e85bd00e176ff8a68aa5 Signed-off-by: Sami Tolvanen --- arch/arm64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index e64f5f0c5fd7c..d86e4fd9fa150 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -104,7 +104,7 @@ ifeq ($(CONFIG_ARM64_MODULE_CMODEL_LARGE), y) KBUILD_CFLAGS_MODULE += -mcmodel=large ifeq ($(CONFIG_LTO_CLANG), y) # Code model is not stored in LLVM IR, so we need to pass it also to LLVMgold -LDFLAGS += -plugin-opt=-code-model=large +KBUILD_LDFLAGS_MODULE += -plugin-opt=-code-model=large endif endif -- GitLab From 29d3d43ae355d277f720782afa1f6743b7739a99 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 19 Jul 2018 18:08:35 -0700 Subject: [PATCH 0572/2269] ANDROID: sdcardfs: Don't use OVERRIDE_CRED macro The macro hides some control flow, making it easier to run into bugs. bug: 111642636 Change-Id: I37ec207c277d97c4e7f1e8381bc9ae743ad78435 Reported-by: Jann Horn Signed-off-by: Daniel Rosenberg --- fs/sdcardfs/file.c | 24 +++-- fs/sdcardfs/inode.c | 198 +++++++++-------------------------------- fs/sdcardfs/lookup.c | 9 +- fs/sdcardfs/sdcardfs.h | 25 ------ 4 files changed, 66 insertions(+), 190 deletions(-) diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c index 1461254f301dc..271c4c4cb760f 100644 --- a/fs/sdcardfs/file.c +++ b/fs/sdcardfs/file.c @@ -118,7 +118,11 @@ static long sdcardfs_unlocked_ioctl(struct file *file, unsigned int cmd, goto out; /* save current_cred and override it */ - OVERRIDE_CRED(sbi, saved_cred, SDCARDFS_I(file_inode(file))); + saved_cred = override_fsids(sbi, SDCARDFS_I(file_inode(file))->data); + if (!saved_cred) { + err = -ENOMEM; + goto out; + } if (lower_file->f_op->unlocked_ioctl) err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg); @@ -127,7 +131,7 @@ static long sdcardfs_unlocked_ioctl(struct file *file, unsigned int cmd, if (!err) sdcardfs_copy_and_fix_attrs(file_inode(file), file_inode(lower_file)); - REVERT_CRED(saved_cred); + revert_fsids(saved_cred); out: return err; } @@ -149,12 +153,16 @@ static long sdcardfs_compat_ioctl(struct file *file, unsigned int cmd, goto out; /* save current_cred and override it */ - OVERRIDE_CRED(sbi, saved_cred, SDCARDFS_I(file_inode(file))); + saved_cred = override_fsids(sbi, SDCARDFS_I(file_inode(file))->data); + if (!saved_cred) { + err = -ENOMEM; + goto out; + } if (lower_file->f_op->compat_ioctl) err = lower_file->f_op->compat_ioctl(lower_file, cmd, arg); - REVERT_CRED(saved_cred); + revert_fsids(saved_cred); out: return err; } @@ -241,7 +249,11 @@ static int sdcardfs_open(struct inode *inode, struct file *file) } /* save current_cred and override it */ - OVERRIDE_CRED(sbi, saved_cred, SDCARDFS_I(inode)); + saved_cred = override_fsids(sbi, SDCARDFS_I(inode)->data); + if (!saved_cred) { + err = -ENOMEM; + goto out_err; + } file->private_data = kzalloc(sizeof(struct sdcardfs_file_info), GFP_KERNEL); @@ -271,7 +283,7 @@ static int sdcardfs_open(struct inode *inode, struct file *file) sdcardfs_copy_and_fix_attrs(inode, sdcardfs_lower_inode(inode)); out_revert_cred: - REVERT_CRED(saved_cred); + revert_fsids(saved_cred); out_err: dput(parent); return err; diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 2de5a4dffa22c..7fc2f083aaad5 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -22,7 +22,6 @@ #include #include -/* Do not directly use this function. Use OVERRIDE_CRED() instead. */ const struct cred *override_fsids(struct sdcardfs_sb_info *sbi, struct sdcardfs_inode_data *data) { @@ -50,7 +49,6 @@ const struct cred *override_fsids(struct sdcardfs_sb_info *sbi, return old_cred; } -/* Do not directly use this function, use REVERT_CRED() instead. */ void revert_fsids(const struct cred *old_cred) { const struct cred *cur_cred; @@ -78,7 +76,10 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, } /* save current_cred and override it */ - OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(dir)); + saved_cred = override_fsids(SDCARDFS_SB(dir->i_sb), + SDCARDFS_I(dir)->data); + if (!saved_cred) + return -ENOMEM; sdcardfs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; @@ -115,53 +116,11 @@ out: out_unlock: unlock_dir(lower_parent_dentry); sdcardfs_put_lower_path(dentry, &lower_path); - REVERT_CRED(saved_cred); + revert_fsids(saved_cred); out_eacces: return err; } -#if 0 -static int sdcardfs_link(struct dentry *old_dentry, struct inode *dir, - struct dentry *new_dentry) -{ - struct dentry *lower_old_dentry; - struct dentry *lower_new_dentry; - struct dentry *lower_dir_dentry; - u64 file_size_save; - int err; - struct path lower_old_path, lower_new_path; - - OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb)); - - file_size_save = i_size_read(d_inode(old_dentry)); - sdcardfs_get_lower_path(old_dentry, &lower_old_path); - sdcardfs_get_lower_path(new_dentry, &lower_new_path); - lower_old_dentry = lower_old_path.dentry; - lower_new_dentry = lower_new_path.dentry; - lower_dir_dentry = lock_parent(lower_new_dentry); - - err = vfs_link(lower_old_dentry, d_inode(lower_dir_dentry), - lower_new_dentry, NULL); - if (err || !d_inode(lower_new_dentry)) - goto out; - - err = sdcardfs_interpose(new_dentry, dir->i_sb, &lower_new_path); - if (err) - goto out; - fsstack_copy_attr_times(dir, d_inode(lower_new_dentry)); - fsstack_copy_inode_size(dir, d_inode(lower_new_dentry)); - set_nlink(d_inode(old_dentry), - sdcardfs_lower_inode(d_inode(old_dentry))->i_nlink); - i_size_write(d_inode(new_dentry), file_size_save); -out: - unlock_dir(lower_dir_dentry); - sdcardfs_put_lower_path(old_dentry, &lower_old_path); - sdcardfs_put_lower_path(new_dentry, &lower_new_path); - REVERT_CRED(); - return err; -} -#endif - static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry) { int err; @@ -178,7 +137,10 @@ static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry) } /* save current_cred and override it */ - OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(dir)); + saved_cred = override_fsids(SDCARDFS_SB(dir->i_sb), + SDCARDFS_I(dir)->data); + if (!saved_cred) + return -ENOMEM; sdcardfs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; @@ -209,43 +171,11 @@ out: unlock_dir(lower_dir_dentry); dput(lower_dentry); sdcardfs_put_lower_path(dentry, &lower_path); - REVERT_CRED(saved_cred); + revert_fsids(saved_cred); out_eacces: return err; } -#if 0 -static int sdcardfs_symlink(struct inode *dir, struct dentry *dentry, - const char *symname) -{ - int err; - struct dentry *lower_dentry; - struct dentry *lower_parent_dentry = NULL; - struct path lower_path; - - OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb)); - - sdcardfs_get_lower_path(dentry, &lower_path); - lower_dentry = lower_path.dentry; - lower_parent_dentry = lock_parent(lower_dentry); - - err = vfs_symlink(d_inode(lower_parent_dentry), lower_dentry, symname); - if (err) - goto out; - err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path); - if (err) - goto out; - fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); - fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry)); - -out: - unlock_dir(lower_parent_dentry); - sdcardfs_put_lower_path(dentry, &lower_path); - REVERT_CRED(); - return err; -} -#endif - static int touch(char *abs_path, mode_t mode) { struct file *filp = filp_open(abs_path, O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW, mode); @@ -287,7 +217,10 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode } /* save current_cred and override it */ - OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(dir)); + saved_cred = override_fsids(SDCARDFS_SB(dir->i_sb), + SDCARDFS_I(dir)->data); + if (!saved_cred) + return -ENOMEM; /* check disk space */ parent_dentry = dget_parent(dentry); @@ -366,13 +299,21 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode if (make_nomedia_in_obb || ((pd->perm == PERM_ANDROID) && (qstr_case_eq(&dentry->d_name, &q_data)))) { - REVERT_CRED(saved_cred); - OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(d_inode(dentry))); + revert_fsids(saved_cred); + saved_cred = override_fsids(sbi, + SDCARDFS_I(d_inode(dentry))->data); + if (!saved_cred) { + pr_err("sdcardfs: failed to set up .nomedia in %s: %d\n", + lower_path.dentry->d_name.name, + -ENOMEM); + goto out; + } set_fs_pwd(current->fs, &lower_path); touch_err = touch(".nomedia", 0664); if (touch_err) { pr_err("sdcardfs: failed to create .nomedia in %s: %d\n", - lower_path.dentry->d_name.name, touch_err); + lower_path.dentry->d_name.name, + touch_err); goto out; } } @@ -382,7 +323,7 @@ out: out_unlock: sdcardfs_put_lower_path(dentry, &lower_path); out_revert: - REVERT_CRED(saved_cred); + revert_fsids(saved_cred); out_eacces: return err; } @@ -402,7 +343,10 @@ static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry) } /* save current_cred and override it */ - OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(dir)); + saved_cred = override_fsids(SDCARDFS_SB(dir->i_sb), + SDCARDFS_I(dir)->data); + if (!saved_cred) + return -ENOMEM; /* sdcardfs_get_real_lower(): in case of remove an user's obb dentry * the dentry on the original path should be deleted. @@ -427,44 +371,11 @@ static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry) out: unlock_dir(lower_dir_dentry); sdcardfs_put_real_lower(dentry, &lower_path); - REVERT_CRED(saved_cred); + revert_fsids(saved_cred); out_eacces: return err; } -#if 0 -static int sdcardfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, - dev_t dev) -{ - int err; - struct dentry *lower_dentry; - struct dentry *lower_parent_dentry = NULL; - struct path lower_path; - - OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb)); - - sdcardfs_get_lower_path(dentry, &lower_path); - lower_dentry = lower_path.dentry; - lower_parent_dentry = lock_parent(lower_dentry); - - err = vfs_mknod(d_inode(lower_parent_dentry), lower_dentry, mode, dev); - if (err) - goto out; - - err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path); - if (err) - goto out; - fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); - fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry)); - -out: - unlock_dir(lower_parent_dentry); - sdcardfs_put_lower_path(dentry, &lower_path); - REVERT_CRED(); - return err; -} -#endif - /* * The locking rules in sdcardfs_rename are complex. We could use a simpler * superblock-level name-space lock for renames and copy-ups. @@ -493,7 +404,10 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, } /* save current_cred and override it */ - OVERRIDE_CRED(SDCARDFS_SB(old_dir->i_sb), saved_cred, SDCARDFS_I(new_dir)); + saved_cred = override_fsids(SDCARDFS_SB(old_dir->i_sb), + SDCARDFS_I(new_dir)->data); + if (!saved_cred) + return -ENOMEM; sdcardfs_get_real_lower(old_dentry, &lower_old_path); sdcardfs_get_lower_path(new_dentry, &lower_new_path); @@ -540,7 +454,7 @@ out: dput(lower_new_dir_dentry); sdcardfs_put_real_lower(old_dentry, &lower_old_path); sdcardfs_put_lower_path(new_dentry, &lower_new_path); - REVERT_CRED(saved_cred); + revert_fsids(saved_cred); out_eacces: return err; } @@ -660,33 +574,7 @@ static int sdcardfs_permission(struct vfsmount *mnt, struct inode *inode, int ma if (IS_POSIXACL(inode)) pr_warn("%s: This may be undefined behavior...\n", __func__); err = generic_permission(&tmp, mask); - /* XXX - * Original sdcardfs code calls inode_permission(lower_inode,.. ) - * for checking inode permission. But doing such things here seems - * duplicated work, because the functions called after this func, - * such as vfs_create, vfs_unlink, vfs_rename, and etc, - * does exactly same thing, i.e., they calls inode_permission(). - * So we just let they do the things. - * If there are any security hole, just uncomment following if block. - */ -#if 0 - if (!err) { - /* - * Permission check on lower_inode(=EXT4). - * we check it with AID_MEDIA_RW permission - */ - struct inode *lower_inode; - - OVERRIDE_CRED(SDCARDFS_SB(inode->sb)); - - lower_inode = sdcardfs_lower_inode(inode); - err = inode_permission(lower_inode, mask); - - REVERT_CRED(); - } -#endif return err; - } static int sdcardfs_setattr_wrn(struct dentry *dentry, struct iattr *ia) @@ -764,7 +652,10 @@ static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct goto out_err; /* save current_cred and override it */ - OVERRIDE_CRED(SDCARDFS_SB(dentry->d_sb), saved_cred, SDCARDFS_I(inode)); + saved_cred = override_fsids(SDCARDFS_SB(dentry->d_sb), + SDCARDFS_I(inode)->data); + if (!saved_cred) + return -ENOMEM; sdcardfs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; @@ -823,7 +714,7 @@ static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct out: sdcardfs_put_lower_path(dentry, &lower_path); - REVERT_CRED(saved_cred); + revert_fsids(saved_cred); out_err: return err; } @@ -907,13 +798,6 @@ const struct inode_operations sdcardfs_dir_iops = { .setattr = sdcardfs_setattr_wrn, .setattr2 = sdcardfs_setattr, .getattr = sdcardfs_getattr, - /* XXX Following operations are implemented, - * but FUSE(sdcard) or FAT does not support them - * These methods are *NOT* perfectly tested. - .symlink = sdcardfs_symlink, - .link = sdcardfs_link, - .mknod = sdcardfs_mknod, - */ }; const struct inode_operations sdcardfs_main_iops = { diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c index 2ff3051618820..73179ce2591fd 100644 --- a/fs/sdcardfs/lookup.c +++ b/fs/sdcardfs/lookup.c @@ -427,7 +427,12 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, } /* save current_cred and override it */ - OVERRIDE_CRED_PTR(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(dir)); + saved_cred = override_fsids(SDCARDFS_SB(dir->i_sb), + SDCARDFS_I(dir)->data); + if (!saved_cred) { + ret = ERR_PTR(-ENOMEM); + goto out_err; + } sdcardfs_get_lower_path(parent, &lower_parent_path); @@ -458,7 +463,7 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, out: sdcardfs_put_lower_path(parent, &lower_parent_path); - REVERT_CRED(saved_cred); + revert_fsids(saved_cred); out_err: dput(parent); return ret; diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index 826afb5c7e883..ec2290a16d3cb 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -88,31 +88,6 @@ (x)->i_mode = ((x)->i_mode & S_IFMT) | 0775;\ } while (0) -/* OVERRIDE_CRED() and REVERT_CRED() - * OVERRIDE_CRED() - * backup original task->cred - * and modifies task->cred->fsuid/fsgid to specified value. - * REVERT_CRED() - * restore original task->cred->fsuid/fsgid. - * These two macro should be used in pair, and OVERRIDE_CRED() should be - * placed at the beginning of a function, right after variable declaration. - */ -#define OVERRIDE_CRED(sdcardfs_sbi, saved_cred, info) \ - do { \ - saved_cred = override_fsids(sdcardfs_sbi, info->data); \ - if (!saved_cred) \ - return -ENOMEM; \ - } while (0) - -#define OVERRIDE_CRED_PTR(sdcardfs_sbi, saved_cred, info) \ - do { \ - saved_cred = override_fsids(sdcardfs_sbi, info->data); \ - if (!saved_cred) \ - return ERR_PTR(-ENOMEM); \ - } while (0) - -#define REVERT_CRED(saved_cred) revert_fsids(saved_cred) - /* Android 5.0 support */ /* Permission mode for a specific node. Controls how file permissions -- GitLab From 1e6b0ff01fc5cb2701734de69a93fb1c42d894b5 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Fri, 20 Jul 2018 16:11:40 -0700 Subject: [PATCH 0573/2269] ANDROID: sdcardfs: Change current->fs under lock bug: 111641492 Change-Id: I79e9894f94880048edaf0f7cfa2d180f65cbcf3b Reported-by: Jann Horn Signed-off-by: Daniel Rosenberg --- fs/sdcardfs/inode.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 7fc2f083aaad5..4e80d62f9eda6 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -96,8 +96,11 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, err = -ENOMEM; goto out_unlock; } + copied_fs->umask = 0; + task_lock(current); current->fs = copied_fs; - current->fs->umask = 0; + task_unlock(current); + err = vfs_create2(lower_dentry_mnt, d_inode(lower_parent_dentry), lower_dentry, mode, want_excl); if (err) goto out; @@ -111,7 +114,9 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, fixup_lower_ownership(dentry, dentry->d_name.name); out: + task_lock(current); current->fs = saved_fs; + task_unlock(current); free_fs_struct(copied_fs); out_unlock: unlock_dir(lower_parent_dentry); @@ -249,8 +254,11 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode unlock_dir(lower_parent_dentry); goto out_unlock; } + copied_fs->umask = 0; + task_lock(current); current->fs = copied_fs; - current->fs->umask = 0; + task_unlock(current); + err = vfs_mkdir2(lower_mnt, d_inode(lower_parent_dentry), lower_dentry, mode); if (err) { @@ -318,7 +326,10 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode } } out: + task_lock(current); current->fs = saved_fs; + task_unlock(current); + free_fs_struct(copied_fs); out_unlock: sdcardfs_put_lower_path(dentry, &lower_path); -- GitLab From cf748a3e868e95a954e8e799782746c67f2add0a Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Wed, 3 Oct 2018 13:50:58 -0700 Subject: [PATCH 0574/2269] Revert "ANDROID: sdcardfs: Change current->fs under lock" This reverts commit 1e6b0ff01fc5cb2701734de69a93fb1c42d894b5. --- fs/sdcardfs/inode.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 4e80d62f9eda6..7fc2f083aaad5 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -96,11 +96,8 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, err = -ENOMEM; goto out_unlock; } - copied_fs->umask = 0; - task_lock(current); current->fs = copied_fs; - task_unlock(current); - + current->fs->umask = 0; err = vfs_create2(lower_dentry_mnt, d_inode(lower_parent_dentry), lower_dentry, mode, want_excl); if (err) goto out; @@ -114,9 +111,7 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, fixup_lower_ownership(dentry, dentry->d_name.name); out: - task_lock(current); current->fs = saved_fs; - task_unlock(current); free_fs_struct(copied_fs); out_unlock: unlock_dir(lower_parent_dentry); @@ -254,11 +249,8 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode unlock_dir(lower_parent_dentry); goto out_unlock; } - copied_fs->umask = 0; - task_lock(current); current->fs = copied_fs; - task_unlock(current); - + current->fs->umask = 0; err = vfs_mkdir2(lower_mnt, d_inode(lower_parent_dentry), lower_dentry, mode); if (err) { @@ -326,10 +318,7 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode } } out: - task_lock(current); current->fs = saved_fs; - task_unlock(current); - free_fs_struct(copied_fs); out_unlock: sdcardfs_put_lower_path(dentry, &lower_path); -- GitLab From 29db2772349dcc71ad3192dc290a87d888ed4b09 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Mon, 25 Jun 2018 21:45:37 +0900 Subject: [PATCH 0575/2269] crypto: skcipher - Fix -Wstringop-truncation warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit cefd769fd0192c84d638f66da202459ed8ad63ba ] As of GCC 9.0.0 the build is reporting warnings like: crypto/ablkcipher.c: In function ‘crypto_ablkcipher_report’: crypto/ablkcipher.c:374:2: warning: ‘strncpy’ specified bound 64 equals destination size [-Wstringop-truncation] strncpy(rblkcipher.geniv, alg->cra_ablkcipher.geniv ?: "", ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sizeof(rblkcipher.geniv)); ~~~~~~~~~~~~~~~~~~~~~~~~~ This means the strnycpy might create a non null terminated string. Fix this by explicitly performing '\0' termination. Cc: Greg Kroah-Hartman Cc: Arnd Bergmann Cc: Max Filippov Cc: Eric Biggers Cc: Nick Desaulniers Signed-off-by: Stafford Horne Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- crypto/ablkcipher.c | 2 ++ crypto/blkcipher.c | 1 + 2 files changed, 3 insertions(+) diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c index 4ee7c041bb82b..8882e90e868ea 100644 --- a/crypto/ablkcipher.c +++ b/crypto/ablkcipher.c @@ -368,6 +368,7 @@ static int crypto_ablkcipher_report(struct sk_buff *skb, struct crypto_alg *alg) strncpy(rblkcipher.type, "ablkcipher", sizeof(rblkcipher.type)); strncpy(rblkcipher.geniv, alg->cra_ablkcipher.geniv ?: "", sizeof(rblkcipher.geniv)); + rblkcipher.geniv[sizeof(rblkcipher.geniv) - 1] = '\0'; rblkcipher.blocksize = alg->cra_blocksize; rblkcipher.min_keysize = alg->cra_ablkcipher.min_keysize; @@ -442,6 +443,7 @@ static int crypto_givcipher_report(struct sk_buff *skb, struct crypto_alg *alg) strncpy(rblkcipher.type, "givcipher", sizeof(rblkcipher.type)); strncpy(rblkcipher.geniv, alg->cra_ablkcipher.geniv ?: "", sizeof(rblkcipher.geniv)); + rblkcipher.geniv[sizeof(rblkcipher.geniv) - 1] = '\0'; rblkcipher.blocksize = alg->cra_blocksize; rblkcipher.min_keysize = alg->cra_ablkcipher.min_keysize; diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c index d84c6920ada9f..830821f234d28 100644 --- a/crypto/blkcipher.c +++ b/crypto/blkcipher.c @@ -511,6 +511,7 @@ static int crypto_blkcipher_report(struct sk_buff *skb, struct crypto_alg *alg) strncpy(rblkcipher.type, "blkcipher", sizeof(rblkcipher.type)); strncpy(rblkcipher.geniv, alg->cra_blkcipher.geniv ?: "", sizeof(rblkcipher.geniv)); + rblkcipher.geniv[sizeof(rblkcipher.geniv) - 1] = '\0'; rblkcipher.blocksize = alg->cra_blocksize; rblkcipher.min_keysize = alg->cra_blkcipher.min_keysize; -- GitLab From db12e7d3e9bc593f784ee8602c38a2b5d8d45408 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 25 Jun 2018 00:05:21 +0900 Subject: [PATCH 0576/2269] iio: adc: ina2xx: avoid kthread_stop() with stale task_struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7d6cd21d82bacab2d1786fe5e989e4815b75d9a3 ] When the buffer is enabled for ina2xx driver, a dedicated kthread is invoked to capture mesurement data. When the buffer is disabled, the kthread is stopped. However if the kthread gets register access errors, it immediately exits and when the malfunctional buffer is disabled, the stale task_struct pointer is accessed as there is no kthread to be stopped. A similar issue in the usbip driver is prevented by kthread_get_run and kthread_stop_put helpers by increasing usage count of the task_struct. This change applies the same solution. Cc: Stefan Brüns Cc: Jonathan Cameron Signed-off-by: Akinobu Mita Fixes: c43a102e67db ("iio: ina2xx: add support for TI INA2xx Power Monitors") Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ina2xx-adc.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/ina2xx-adc.c b/drivers/iio/adc/ina2xx-adc.c index 59f99b3a180d7..d5b9f831eba77 100644 --- a/drivers/iio/adc/ina2xx-adc.c +++ b/drivers/iio/adc/ina2xx-adc.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -701,6 +702,7 @@ static int ina2xx_buffer_enable(struct iio_dev *indio_dev) { struct ina2xx_chip_info *chip = iio_priv(indio_dev); unsigned int sampling_us = SAMPLING_PERIOD(chip); + struct task_struct *task; dev_dbg(&indio_dev->dev, "Enabling buffer w/ scan_mask %02x, freq = %d, avg =%u\n", (unsigned int)(*indio_dev->active_scan_mask), @@ -710,11 +712,17 @@ static int ina2xx_buffer_enable(struct iio_dev *indio_dev) dev_dbg(&indio_dev->dev, "Async readout mode: %d\n", chip->allow_async_readout); - chip->task = kthread_run(ina2xx_capture_thread, (void *)indio_dev, - "%s:%d-%uus", indio_dev->name, indio_dev->id, - sampling_us); + task = kthread_create(ina2xx_capture_thread, (void *)indio_dev, + "%s:%d-%uus", indio_dev->name, indio_dev->id, + sampling_us); + if (IS_ERR(task)) + return PTR_ERR(task); + + get_task_struct(task); + wake_up_process(task); + chip->task = task; - return PTR_ERR_OR_ZERO(chip->task); + return 0; } static int ina2xx_buffer_disable(struct iio_dev *indio_dev) @@ -723,6 +731,7 @@ static int ina2xx_buffer_disable(struct iio_dev *indio_dev) if (chip->task) { kthread_stop(chip->task); + put_task_struct(chip->task); chip->task = NULL; } -- GitLab From 4804f372b53f65a9e04348ccfa494f169bbeadd9 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Fri, 8 Jun 2018 23:58:15 -0700 Subject: [PATCH 0577/2269] tsl2550: fix lux1_input error in low light [ Upstream commit ce054546cc2c26891cefa2f284d90d93b52205de ] ADC channel 0 photodiode detects both infrared + visible light, but ADC channel 1 just detects infrared. However, the latter is a bit more sensitive in that range so complete darkness or low light causes a error condition in which the chan0 - chan1 is negative that results in a -EAGAIN. This patch changes the resulting lux1_input sysfs attribute message from "Resource temporarily unavailable" to a user-grokable lux value of 0. Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Signed-off-by: Matt Ranostay Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/misc/tsl2550.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/tsl2550.c b/drivers/misc/tsl2550.c index adf46072cb37d..3fce3b6a36246 100644 --- a/drivers/misc/tsl2550.c +++ b/drivers/misc/tsl2550.c @@ -177,7 +177,7 @@ static int tsl2550_calculate_lux(u8 ch0, u8 ch1) } else lux = 0; else - return -EAGAIN; + return 0; /* LUX range check */ return lux > TSL2550_MAX_LUX ? TSL2550_MAX_LUX : lux; -- GitLab From 127cd4e23323f23ca2fe39abb8310e312af2f88f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Jul 2018 12:33:34 +0300 Subject: [PATCH 0578/2269] vmci: type promotion bug in qp_host_get_user_memory() [ Upstream commit 7fb2fd4e25fc1fb10dcb30b5519de257cfeae84c ] The problem is that if get_user_pages_fast() fails and returns a negative error code, it gets type promoted to a high positive value and treated as a success. Fixes: 06164d2b72aa ("VMCI: queue pairs implementation.") Signed-off-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_vmci/vmci_queue_pair.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c index 8af5c2672f71c..b4570d5c1fe7d 100644 --- a/drivers/misc/vmw_vmci/vmci_queue_pair.c +++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c @@ -755,7 +755,7 @@ static int qp_host_get_user_memory(u64 produce_uva, retval = get_user_pages_fast((uintptr_t) produce_uva, produce_q->kernel_if->num_pages, 1, produce_q->kernel_if->u.h.header_page); - if (retval < produce_q->kernel_if->num_pages) { + if (retval < (int)produce_q->kernel_if->num_pages) { pr_debug("get_user_pages_fast(produce) failed (retval=%d)", retval); qp_release_pages(produce_q->kernel_if->u.h.header_page, @@ -767,7 +767,7 @@ static int qp_host_get_user_memory(u64 produce_uva, retval = get_user_pages_fast((uintptr_t) consume_uva, consume_q->kernel_if->num_pages, 1, consume_q->kernel_if->u.h.header_page); - if (retval < consume_q->kernel_if->num_pages) { + if (retval < (int)consume_q->kernel_if->num_pages) { pr_debug("get_user_pages_fast(consume) failed (retval=%d)", retval); qp_release_pages(consume_q->kernel_if->u.h.header_page, -- GitLab From 6ecd10b1aa2298de70dc25ded68feb1b12555538 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 6 Jul 2018 09:08:01 -0700 Subject: [PATCH 0579/2269] x86/numa_emulation: Fix emulated-to-physical node mapping [ Upstream commit 3b6c62f363a19ce82bf378187ab97c9dc01e3927 ] Without this change the distance table calculation for emulated nodes may use the wrong numa node and report an incorrect distance. Signed-off-by: Dan Williams Cc: David Rientjes Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Wei Yang Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/153089328103.27680.14778434392225818887.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/numa_emulation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index 34a2a3bfde9c1..22cbad56acab8 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c @@ -61,7 +61,7 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei, eb->nid = nid; if (emu_nid_to_phys[nid] == NUMA_NO_NODE) - emu_nid_to_phys[nid] = nid; + emu_nid_to_phys[nid] = pb->nid; pb->start += size; if (pb->start >= pb->end) { -- GitLab From 2efa4bd5aa9ad8dda2fcc44d56ac8417cd7fc43d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 2 Jul 2018 14:27:35 +0100 Subject: [PATCH 0580/2269] staging: rts5208: fix missing error check on call to rtsx_write_register [ Upstream commit c5fae4f4fd28189b1062fb8ef7b21fec37cb8b17 ] Currently the check on error return from the call to rtsx_write_register is checking the error status from the previous call. Fix this by adding in the missing assignment of retval. Detected by CoverityScan, CID#709877 Fixes: fa590c222fba ("staging: rts5208: add support for rts5208 and rts5288") Signed-off-by: Colin Ian King Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rts5208/sd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rts5208/sd.c b/drivers/staging/rts5208/sd.c index 4033a2cf7ac9b..d98d5fe25a17d 100644 --- a/drivers/staging/rts5208/sd.c +++ b/drivers/staging/rts5208/sd.c @@ -5002,7 +5002,7 @@ int sd_execute_write_data(struct scsi_cmnd *srb, struct rtsx_chip *chip) goto sd_execute_write_cmd_failed; } - rtsx_write_register(chip, SD_BYTE_CNT_L, 0xFF, 0x00); + retval = rtsx_write_register(chip, SD_BYTE_CNT_L, 0xFF, 0x00); if (retval != STATUS_SUCCESS) { rtsx_trace(chip); goto sd_execute_write_cmd_failed; -- GitLab From 914b4daa9b6d00b520e136e5a10f5286b4d211f3 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 23 May 2018 15:33:21 +0200 Subject: [PATCH 0581/2269] power: supply: axp288_charger: Fix initial constant_charge_current value [ Upstream commit f2a42595f0865886a2d40524b0e9d15600848670 ] We should look at val which contains the value read from the register, not ret which is always 0 on a successful read. Signed-off-by: Hans de Goede Fixes: eac53b3664f59 ("power: supply: axp288_charger: Drop platform_data dependency") Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/power/supply/axp288_charger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/supply/axp288_charger.c b/drivers/power/supply/axp288_charger.c index 9dc7590e07cbe..4d016fbc3527c 100644 --- a/drivers/power/supply/axp288_charger.c +++ b/drivers/power/supply/axp288_charger.c @@ -771,7 +771,7 @@ static int charger_init_hw_regs(struct axp288_chrg_info *info) } /* Determine charge current limit */ - cc = (ret & CHRG_CCCV_CC_MASK) >> CHRG_CCCV_CC_BIT_POS; + cc = (val & CHRG_CCCV_CC_MASK) >> CHRG_CCCV_CC_BIT_POS; cc = (cc * CHRG_CCCV_CC_LSB_RES) + CHRG_CCCV_CC_OFFSET; info->cc = cc; -- GitLab From 0470189cd9b931a4a22e2d179a788abc47f6e7fc Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jul 2018 12:05:48 +0200 Subject: [PATCH 0582/2269] misc: sram: enable clock before registering regions [ Upstream commit d5b9653dd2bb7a2b1c8cc783c5d3b607bbb6b271 ] Make sure to enable the clock before registering regions and exporting partitions to user space at which point we must be prepared for I/O. Fixes: ee895ccdf776 ("misc: sram: fix enabled clock leak on error path") Signed-off-by: Johan Hovold Reviewed-by: Vladimir Zapolskiy Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/misc/sram.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/misc/sram.c b/drivers/misc/sram.c index 4dd0d868ff888..8daefb81ba294 100644 --- a/drivers/misc/sram.c +++ b/drivers/misc/sram.c @@ -391,23 +391,23 @@ static int sram_probe(struct platform_device *pdev) if (IS_ERR(sram->pool)) return PTR_ERR(sram->pool); - ret = sram_reserve_regions(sram, res); - if (ret) - return ret; - sram->clk = devm_clk_get(sram->dev, NULL); if (IS_ERR(sram->clk)) sram->clk = NULL; else clk_prepare_enable(sram->clk); + ret = sram_reserve_regions(sram, res); + if (ret) + goto err_disable_clk; + platform_set_drvdata(pdev, sram); init_func = of_device_get_match_data(&pdev->dev); if (init_func) { ret = init_func(); if (ret) - goto err_disable_clk; + goto err_free_partitions; } dev_dbg(sram->dev, "SRAM pool: %zu KiB @ 0x%p\n", @@ -415,10 +415,11 @@ static int sram_probe(struct platform_device *pdev) return 0; +err_free_partitions: + sram_free_partitions(sram); err_disable_clk: if (sram->clk) clk_disable_unprepare(sram->clk); - sram_free_partitions(sram); return ret; } -- GitLab From 72bad20e9316bf6203def57256a5ade158585558 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 6 Jul 2018 11:08:36 +0200 Subject: [PATCH 0583/2269] serial: sh-sci: Stop RX FIFO timer during port shutdown [ Upstream commit c5a9262fa8bfed0dddc7466ef10fcd292e2af61b ] The RX FIFO timer may be armed when the port is shut down, hence the timer function may still be called afterwards. Fix this race condition by deleting the timer during port shutdown. Fixes: 039403765e5da3c6 ("serial: sh-sci: SCIFA/B RX FIFO software timeout") Signed-off-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sh-sci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 8bc8fe2b75f7a..37dba940d8980 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -2060,6 +2060,8 @@ static void sci_shutdown(struct uart_port *port) } #endif + if (s->rx_trigger > 1 && s->rx_fifo_timeout > 0) + del_timer_sync(&s->rx_fifo_timer); sci_free_irq(s); sci_free_dma(port); } -- GitLab From c1a630680c8b62c3f34f7041fcc10441c6bd5c22 Mon Sep 17 00:00:00 2001 From: Anton Vasilyev Date: Fri, 6 Jul 2018 15:32:53 +0300 Subject: [PATCH 0584/2269] uwb: hwa-rc: fix memory leak at probe [ Upstream commit 11b71782c1d10d9bccc31825cf84291cd7588a1e ] hwarc_probe() allocates memory for hwarc, but does not free it if uwb_rc_add() or hwarc_get_version() fail. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Anton Vasilyev Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/uwb/hwa-rc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/uwb/hwa-rc.c b/drivers/uwb/hwa-rc.c index 9a53912bdfe9f..5d3ba747ae17a 100644 --- a/drivers/uwb/hwa-rc.c +++ b/drivers/uwb/hwa-rc.c @@ -873,6 +873,7 @@ error_get_version: error_rc_add: usb_put_intf(iface); usb_put_dev(hwarc->usb_dev); + kfree(hwarc); error_alloc: uwb_rc_put(uwb_rc); error_rc_alloc: -- GitLab From c7e653a24c18f15be7169f217b5a91a63116d5f9 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 18 Jun 2018 16:54:32 +0100 Subject: [PATCH 0585/2269] power: vexpress: fix corruption in notifier registration [ Upstream commit 09bebb1adb21ecd04adf7ccb3b06f73e3a851e93 ] Vexpress platforms provide two different restart handlers: SYS_REBOOT that restart the entire system, while DB_RESET only restarts the daughter board containing the CPU. DB_RESET is overridden by SYS_REBOOT if it exists. notifier_chain_register used in register_restart_handler by design relies on notifiers to be registered once only, however vexpress restart notifier can get registered twice. When this happen it corrupts list of notifiers, as result some notifiers can be not called on proper event, traverse on list can be cycled forever, and second unregister can access already freed memory. So far, since this was the only restart handler in the system, no issue was observed even if the same notifier was registered twice. However commit 6c5c0d48b686 ("watchdog: sp805: add restart handler") added support for SP805 restart handlers and since the system under test contains two vexpress restart and two SP805 watchdog instances, it was observed that during the boot traversing the restart handler list looped forever as there's a cycle in that list resulting in boot hang. This patch fixes the issues by ensuring that the notifier is installed only once. Cc: Sebastian Reichel Signed-off-by: Sudeep Holla Fixes: 46c99ac66222 ("power/reset: vexpress: Register with kernel restart handler") Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/power/reset/vexpress-poweroff.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/power/reset/vexpress-poweroff.c b/drivers/power/reset/vexpress-poweroff.c index 102f95a094603..e9e749f87517d 100644 --- a/drivers/power/reset/vexpress-poweroff.c +++ b/drivers/power/reset/vexpress-poweroff.c @@ -35,6 +35,7 @@ static void vexpress_reset_do(struct device *dev, const char *what) } static struct device *vexpress_power_off_device; +static atomic_t vexpress_restart_nb_refcnt = ATOMIC_INIT(0); static void vexpress_power_off(void) { @@ -99,10 +100,13 @@ static int _vexpress_register_restart_handler(struct device *dev) int err; vexpress_restart_device = dev; - err = register_restart_handler(&vexpress_restart_nb); - if (err) { - dev_err(dev, "cannot register restart handler (err=%d)\n", err); - return err; + if (atomic_inc_return(&vexpress_restart_nb_refcnt) == 1) { + err = register_restart_handler(&vexpress_restart_nb); + if (err) { + dev_err(dev, "cannot register restart handler (err=%d)\n", err); + atomic_dec(&vexpress_restart_nb_refcnt); + return err; + } } device_create_file(dev, &dev_attr_active); -- GitLab From 834a9ef5f8318fd40741c4369600dd05d60f98d8 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Wed, 6 Jun 2018 10:18:46 +0800 Subject: [PATCH 0586/2269] iommu/amd: make sure TLB to be flushed before IOVA freed [ Upstream commit 3c120143f584360a13614787e23ae2cdcb5e5ccd ] Although the mapping has already been removed in the page table, it maybe still exist in TLB. Suppose the freed IOVAs is reused by others before the flush operation completed, the new user can not correctly access to its meomory. Signed-off-by: Zhen Lei Fixes: b1516a14657a ('iommu/amd: Implement flush queue') Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 10190e361a13d..01746e7b90de2 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2400,9 +2400,9 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, } if (amd_iommu_unmap_flush) { - dma_ops_free_iova(dma_dom, dma_addr, pages); domain_flush_tlb(&dma_dom->domain); domain_flush_complete(&dma_dom->domain); + dma_ops_free_iova(dma_dom, dma_addr, pages); } else { pages = __roundup_pow_of_two(pages); queue_iova(&dma_dom->iovad, dma_addr >> PAGE_SHIFT, pages, 0); -- GitLab From 90de5688afc39cbbd736a5dea6c4e98f0e9e93cb Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Fri, 25 May 2018 17:54:52 +0800 Subject: [PATCH 0587/2269] Bluetooth: Add a new Realtek 8723DE ID 0bda:b009 [ Upstream commit 45ae68b8cfc25bdbffc11248001c47ab1b76ff6e ] Without this patch we cannot turn on the Bluethooth adapter on HP 14-bs007la. T: Bus=01 Lev=02 Prnt=03 Port=00 Cnt=01 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0bda ProdID=b009 Rev= 2.00 S: Manufacturer=Realtek S: Product=802.11n WLAN Adapter S: SerialNumber=00e04c000001 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms Signed-off-by: Jian-Hong Pan Signed-off-by: Marcel Holtmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 819521d5895e1..b8dffe937f4ff 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -375,6 +375,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x7392, 0xa611), .driver_info = BTUSB_REALTEK }, /* Additional Realtek 8723DE Bluetooth devices */ + { USB_DEVICE(0x0bda, 0xb009), .driver_info = BTUSB_REALTEK }, { USB_DEVICE(0x2ff8, 0xb011), .driver_info = BTUSB_REALTEK }, /* Additional Realtek 8821AE Bluetooth devices */ -- GitLab From 410534a34315b846a235542f886566a4908d25a4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 4 Jul 2018 17:02:18 +0200 Subject: [PATCH 0588/2269] USB: serial: kobil_sct: fix modem-status error handling [ Upstream commit a420b5d939ee58f1d950f0ea782834056520aeaa ] Make sure to return -EIO in case of a short modem-status read request. While at it, split the debug message to not include the (zeroed) transfer-buffer content in case of errors. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/kobil_sct.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c index 3024b9b253605..75181d3afd958 100644 --- a/drivers/usb/serial/kobil_sct.c +++ b/drivers/usb/serial/kobil_sct.c @@ -397,12 +397,20 @@ static int kobil_tiocmget(struct tty_struct *tty) transfer_buffer_length, KOBIL_TIMEOUT); - dev_dbg(&port->dev, "%s - Send get_status_line_state URB returns: %i. Statusline: %02x\n", - __func__, result, transfer_buffer[0]); + dev_dbg(&port->dev, "Send get_status_line_state URB returns: %i\n", + result); + if (result < 1) { + if (result >= 0) + result = -EIO; + goto out_free; + } + + dev_dbg(&port->dev, "Statusline: %02x\n", transfer_buffer[0]); result = 0; if ((transfer_buffer[0] & SUSBCR_GSL_DSR) != 0) result = TIOCM_DSR; +out_free: kfree(transfer_buffer); return result; } -- GitLab From 96e878907c9023acb28e1341a5c3686fee43d60e Mon Sep 17 00:00:00 2001 From: Michael Scott Date: Tue, 19 Jun 2018 16:44:06 -0700 Subject: [PATCH 0589/2269] 6lowpan: iphc: reset mac_header after decompress to fix panic [ Upstream commit 03bc05e1a4972f73b4eb8907aa373369e825c252 ] After decompression of 6lowpan socket data, an IPv6 header is inserted before the existing socket payload. After this, we reset the network_header value of the skb to account for the difference in payload size from prior to decompression + the addition of the IPv6 header. However, we fail to reset the mac_header value. Leaving the mac_header value untouched here, can cause a calculation error in net/packet/af_packet.c packet_rcv() function when an AF_PACKET socket is opened in SOCK_RAW mode for use on a 6lowpan interface. On line 2088, the data pointer is moved backward by the value returned from skb_mac_header(). If skb->data is adjusted so that it is before the skb->head pointer (which can happen when an old value of mac_header is left in place) the kernel generates a panic in net/core/skbuff.c line 1717. This panic can be generated by BLE 6lowpan interfaces (such as bt0) and 802.15.4 interfaces (such as lowpan0) as they both use the same 6lowpan sources for compression and decompression. Signed-off-by: Michael Scott Acked-by: Alexander Aring Acked-by: Jukka Rissanen Signed-off-by: Marcel Holtmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/6lowpan/iphc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c index 6b1042e216565..52fad5dad9f71 100644 --- a/net/6lowpan/iphc.c +++ b/net/6lowpan/iphc.c @@ -770,6 +770,7 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev, hdr.hop_limit, &hdr.daddr); skb_push(skb, sizeof(hdr)); + skb_reset_mac_header(skb); skb_reset_network_header(skb); skb_copy_to_linear_data(skb, &hdr, sizeof(hdr)); -- GitLab From bc4ce060b30549bb56411e8443a45b55e276420f Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Tue, 12 Jun 2018 16:06:10 +0200 Subject: [PATCH 0590/2269] iommu/msm: Don't call iommu_device_{,un}link from atomic context [ Upstream commit 379521462e4add27f3514da8e4ab1fd7a54fe1c7 ] Fixes the following splat during boot: BUG: sleeping function called from invalid context at kernel/locking/mutex.c:747 in_atomic(): 1, irqs_disabled(): 128, pid: 77, name: kworker/2:1 4 locks held by kworker/2:1/77: #0: (ptrval) ((wq_completion)"events"){+.+.}, at: process_one_work+0x1fc/0x8fc #1: (ptrval) (deferred_probe_work){+.+.}, at: process_one_work+0x1fc/0x8fc #2: (ptrval) (&dev->mutex){....}, at: __device_attach+0x40/0x178 #3: (ptrval) (msm_iommu_lock){....}, at: msm_iommu_add_device+0x28/0xcc irq event stamp: 348 hardirqs last enabled at (347): [] kfree+0xe0/0x3c0 hardirqs last disabled at (348): [] _raw_spin_lock_irqsave+0x2c/0x68 softirqs last enabled at (0): [] copy_process.part.5+0x280/0x1a68 softirqs last disabled at (0): [<00000000>] (null) Preemption disabled at: [<00000000>] (null) CPU: 2 PID: 77 Comm: kworker/2:1 Not tainted 4.17.0-rc5-wt-ath-01075-gaca0516bb4cf #239 Hardware name: Generic DT based system Workqueue: events deferred_probe_work_func [] (unwind_backtrace) from [] (show_stack+0x20/0x24) [] (show_stack) from [] (dump_stack+0xa0/0xcc) [] (dump_stack) from [] (___might_sleep+0x1f8/0x2d4) ath10k_sdio mmc2:0001:1: Direct firmware load for ath10k/QCA9377/hw1.0/board-2.bin failed with error -2 [] (___might_sleep) from [] (__might_sleep+0x70/0xa8) [] (__might_sleep) from [] (__mutex_lock+0x50/0xb28) [] (__mutex_lock) from [] (mutex_lock_nested+0x2c/0x34) ath10k_sdio mmc2:0001:1: board_file api 1 bmi_id N/A crc32 544289f7 [] (mutex_lock_nested) from [] (kernfs_find_and_get_ns+0x30/0x5c) [] (kernfs_find_and_get_ns) from [] (sysfs_add_link_to_group+0x28/0x58) [] (sysfs_add_link_to_group) from [] (iommu_device_link+0x50/0xb4) [] (iommu_device_link) from [] (msm_iommu_add_device+0xa0/0xcc) [] (msm_iommu_add_device) from [] (add_iommu_group+0x3c/0x64) [] (add_iommu_group) from [] (bus_for_each_dev+0x84/0xc4) [] (bus_for_each_dev) from [] (bus_set_iommu+0xd0/0x10c) [] (bus_set_iommu) from [] (msm_iommu_probe+0x5b8/0x66c) [] (msm_iommu_probe) from [] (platform_drv_probe+0x60/0xbc) [] (platform_drv_probe) from [] (driver_probe_device+0x30c/0x4cc) [] (driver_probe_device) from [] (__device_attach_driver+0xac/0x14c) [] (__device_attach_driver) from [] (bus_for_each_drv+0x68/0xc8) [] (bus_for_each_drv) from [] (__device_attach+0xe4/0x178) [] (__device_attach) from [] (device_initial_probe+0x1c/0x20) [] (device_initial_probe) from [] (bus_probe_device+0x98/0xa0) [] (bus_probe_device) from [] (deferred_probe_work_func+0x74/0x198) [] (deferred_probe_work_func) from [] (process_one_work+0x2c4/0x8fc) [] (process_one_work) from [] (worker_thread+0x2c4/0x5cc) [] (worker_thread) from [] (kthread+0x180/0x188) [] (kthread) from [] (ret_from_fork+0x14/0x20) Fixes: 42df43b36163 ("iommu/msm: Make use of iommu_device_register interface") Signed-off-by: Niklas Cassel Reviewed-by: Vivek Gautam Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/msm_iommu.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 04f4d51ffacb1..92c8c83ce38c3 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -395,20 +395,15 @@ static int msm_iommu_add_device(struct device *dev) struct msm_iommu_dev *iommu; struct iommu_group *group; unsigned long flags; - int ret = 0; spin_lock_irqsave(&msm_iommu_lock, flags); - iommu = find_iommu_for_dev(dev); + spin_unlock_irqrestore(&msm_iommu_lock, flags); + if (iommu) iommu_device_link(&iommu->iommu, dev); else - ret = -ENODEV; - - spin_unlock_irqrestore(&msm_iommu_lock, flags); - - if (ret) - return ret; + return -ENODEV; group = iommu_group_get_for_dev(dev); if (IS_ERR(group)) @@ -425,13 +420,12 @@ static void msm_iommu_remove_device(struct device *dev) unsigned long flags; spin_lock_irqsave(&msm_iommu_lock, flags); - iommu = find_iommu_for_dev(dev); + spin_unlock_irqrestore(&msm_iommu_lock, flags); + if (iommu) iommu_device_unlink(&iommu->iommu, dev); - spin_unlock_irqrestore(&msm_iommu_lock, flags); - iommu_group_remove_device(dev); } -- GitLab From 1117e411a46c4df5230b469dc4641dcc46274191 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 24 Jun 2018 12:17:43 +0200 Subject: [PATCH 0591/2269] s390/mm: correct allocate_pgste proc_handler callback [ Upstream commit 5bedf8aa03c28cb8dc98bdd32a41b66d8f7d3eaa ] Since proc_dointvec does not perform value range control, proc_dointvec_minmax should be used to limit value range, which is clearly intended here, as the internal representation of the value: unsigned int alloc_pgste:1; In fact it currently works, since we have mm->context.alloc_pgste = page_table_allocate_pgste || ... ... since commit 23fefe119ceb5 ("s390/kvm: avoid global config of vm.alloc_pgste=1") Before that it was mm->context.alloc_pgste = page_table_allocate_pgste; which was broken. That was introduced with commit 0b46e0a3ec0d7 ("s390/kvm: remove delayed reallocation of page tables for KVM"). Fixes: 0b46e0a3ec0d7 ("s390/kvm: remove delayed reallocation of page tables for KVM") Acked-by: Christian Borntraeger Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/s390/mm/pgalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 334b6d103cbd1..29653f713162d 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -27,7 +27,7 @@ static struct ctl_table page_table_sysctl[] = { .data = &page_table_allocate_pgste, .maxlen = sizeof(int), .mode = S_IRUGO | S_IWUSR, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &page_table_allocate_pgste_min, .extra2 = &page_table_allocate_pgste_max, }, -- GitLab From e70f938a605a82c96760ee2afb6f9eac6d0e7088 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Mon, 25 Jun 2018 09:51:48 +0200 Subject: [PATCH 0592/2269] power: remove possible deadlock when unregistering power_supply [ Upstream commit 3ffa6583e24e1ad1abab836d24bfc9d2308074e5 ] If a device gets removed right after having registered a power_supply node, we might enter in a deadlock between the remove call (that has a lock on the parent device) and the deferred register work. Allow the deferred register work to exit without taking the lock when we are in the remove state. Stack trace on a Ubuntu 16.04: [16072.109121] INFO: task kworker/u16:2:1180 blocked for more than 120 seconds. [16072.109127] Not tainted 4.13.0-41-generic #46~16.04.1-Ubuntu [16072.109129] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [16072.109132] kworker/u16:2 D 0 1180 2 0x80000000 [16072.109142] Workqueue: events_power_efficient power_supply_deferred_register_work [16072.109144] Call Trace: [16072.109152] __schedule+0x3d6/0x8b0 [16072.109155] schedule+0x36/0x80 [16072.109158] schedule_preempt_disabled+0xe/0x10 [16072.109161] __mutex_lock.isra.2+0x2ab/0x4e0 [16072.109166] __mutex_lock_slowpath+0x13/0x20 [16072.109168] ? __mutex_lock_slowpath+0x13/0x20 [16072.109171] mutex_lock+0x2f/0x40 [16072.109174] power_supply_deferred_register_work+0x2b/0x50 [16072.109179] process_one_work+0x15b/0x410 [16072.109182] worker_thread+0x4b/0x460 [16072.109186] kthread+0x10c/0x140 [16072.109189] ? process_one_work+0x410/0x410 [16072.109191] ? kthread_create_on_node+0x70/0x70 [16072.109194] ret_from_fork+0x35/0x40 [16072.109199] INFO: task test:2257 blocked for more than 120 seconds. [16072.109202] Not tainted 4.13.0-41-generic #46~16.04.1-Ubuntu [16072.109204] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [16072.109206] test D 0 2257 2256 0x00000004 [16072.109208] Call Trace: [16072.109211] __schedule+0x3d6/0x8b0 [16072.109215] schedule+0x36/0x80 [16072.109218] schedule_timeout+0x1f3/0x360 [16072.109221] ? check_preempt_curr+0x5a/0xa0 [16072.109224] ? ttwu_do_wakeup+0x1e/0x150 [16072.109227] wait_for_completion+0xb4/0x140 [16072.109230] ? wait_for_completion+0xb4/0x140 [16072.109233] ? wake_up_q+0x70/0x70 [16072.109236] flush_work+0x129/0x1e0 [16072.109240] ? worker_detach_from_pool+0xb0/0xb0 [16072.109243] __cancel_work_timer+0x10f/0x190 [16072.109247] ? device_del+0x264/0x310 [16072.109250] ? __wake_up+0x44/0x50 [16072.109253] cancel_delayed_work_sync+0x13/0x20 [16072.109257] power_supply_unregister+0x37/0xb0 [16072.109260] devm_power_supply_release+0x11/0x20 [16072.109263] release_nodes+0x110/0x200 [16072.109266] devres_release_group+0x7c/0xb0 [16072.109274] wacom_remove+0xc2/0x110 [wacom] [16072.109279] hid_device_remove+0x6e/0xd0 [hid] [16072.109284] device_release_driver_internal+0x158/0x210 [16072.109288] device_release_driver+0x12/0x20 [16072.109291] bus_remove_device+0xec/0x160 [16072.109293] device_del+0x1de/0x310 [16072.109298] hid_destroy_device+0x27/0x60 [hid] [16072.109303] usbhid_disconnect+0x51/0x70 [usbhid] [16072.109308] usb_unbind_interface+0x77/0x270 [16072.109311] device_release_driver_internal+0x158/0x210 [16072.109315] device_release_driver+0x12/0x20 [16072.109318] usb_driver_release_interface+0x77/0x80 [16072.109321] proc_ioctl+0x20f/0x250 [16072.109325] usbdev_do_ioctl+0x57f/0x1140 [16072.109327] ? __wake_up+0x44/0x50 [16072.109331] usbdev_ioctl+0xe/0x20 [16072.109336] do_vfs_ioctl+0xa4/0x600 [16072.109339] ? vfs_write+0x15a/0x1b0 [16072.109343] SyS_ioctl+0x79/0x90 [16072.109347] entry_SYSCALL_64_fastpath+0x24/0xab [16072.109349] RIP: 0033:0x7f20da807f47 [16072.109351] RSP: 002b:00007ffc422ae398 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [16072.109353] RAX: ffffffffffffffda RBX: 00000000010b8560 RCX: 00007f20da807f47 [16072.109355] RDX: 00007ffc422ae3a0 RSI: 00000000c0105512 RDI: 0000000000000009 [16072.109356] RBP: 0000000000000000 R08: 00007ffc422ae3e0 R09: 0000000000000010 [16072.109357] R10: 00000000000000a6 R11: 0000000000000246 R12: 0000000000000000 [16072.109359] R13: 00000000010b8560 R14: 00007ffc422ae2e0 R15: 0000000000000000 Reported-and-tested-by: Richard Hughes Tested-by: Aaron Skomra Signed-off-by: Benjamin Tissoires Fixes: 7f1a57fdd6cb ("power_supply: Fix possible NULL pointer dereference on early uevent") Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/power/supply/power_supply_core.c | 11 +++++++++-- include/linux/power_supply.h | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index 02c6340ae36fe..3226faebe0a08 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -139,8 +140,13 @@ static void power_supply_deferred_register_work(struct work_struct *work) struct power_supply *psy = container_of(work, struct power_supply, deferred_register_work.work); - if (psy->dev.parent) - mutex_lock(&psy->dev.parent->mutex); + if (psy->dev.parent) { + while (!mutex_trylock(&psy->dev.parent->mutex)) { + if (psy->removing) + return; + msleep(10); + } + } power_supply_changed(psy); @@ -1071,6 +1077,7 @@ EXPORT_SYMBOL_GPL(devm_power_supply_register_no_ws); void power_supply_unregister(struct power_supply *psy) { WARN_ON(atomic_dec_return(&psy->use_cnt)); + psy->removing = true; cancel_work_sync(&psy->changed_work); cancel_delayed_work_sync(&psy->deferred_register_work); sysfs_remove_link(&psy->dev.kobj, "powers"); diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 79e90b3d32888..4617cf4f6c5b0 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -251,6 +251,7 @@ struct power_supply { spinlock_t changed_lock; bool changed; bool initialized; + bool removing; atomic_t use_cnt; #ifdef CONFIG_THERMAL struct thermal_zone_device *tzd; -- GitLab From e0ccd2360a474d6505c5a5fa7ea5036a2eecd665 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 2 Jul 2018 16:26:24 +0800 Subject: [PATCH 0593/2269] md-cluster: clear another node's suspend_area after the copy is finished [ Upstream commit 010228e4a932ca1e8365e3b58c8e1e44c16ff793 ] When one node leaves cluster or stops the resyncing (resync or recovery) array, then other nodes need to call recover_bitmaps to continue the unfinished task. But we need to clear suspend_area later after other nodes copy the resync information to their bitmap (by call bitmap_copy_from_slot). Otherwise, all nodes could write to the suspend_area even the suspend_area is not handled by any node, because area_resyncing returns 0 at the beginning of raid1_write_request. Which means one node could write suspend_area while another node is resyncing the same area, then data could be inconsistent. So let's clear suspend_area later to avoid above issue with the protection of bm lock. Also it is straightforward to clear suspend_area after nodes have copied the resync info to bitmap. Signed-off-by: Guoqing Jiang Reviewed-by: NeilBrown Signed-off-by: Shaohua Li Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/md-cluster.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 72ce0bccc8655..717aaffc227df 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -304,15 +304,6 @@ static void recover_bitmaps(struct md_thread *thread) while (cinfo->recovery_map) { slot = fls64((u64)cinfo->recovery_map) - 1; - /* Clear suspend_area associated with the bitmap */ - spin_lock_irq(&cinfo->suspend_lock); - list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list) - if (slot == s->slot) { - list_del(&s->list); - kfree(s); - } - spin_unlock_irq(&cinfo->suspend_lock); - snprintf(str, 64, "bitmap%04d", slot); bm_lockres = lockres_init(mddev, str, NULL, 1); if (!bm_lockres) { @@ -331,6 +322,16 @@ static void recover_bitmaps(struct md_thread *thread) pr_err("md-cluster: Could not copy data from bitmap %d\n", slot); goto clear_bit; } + + /* Clear suspend_area associated with the bitmap */ + spin_lock_irq(&cinfo->suspend_lock); + list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list) + if (slot == s->slot) { + list_del(&s->list); + kfree(s); + } + spin_unlock_irq(&cinfo->suspend_lock); + if (hi > 0) { if (lo < mddev->recovery_cp) mddev->recovery_cp = lo; -- GitLab From e862ab6b69c487fa4df545a60203fef1133bda9f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Jul 2018 12:57:11 +0300 Subject: [PATCH 0594/2269] RDMA/bnxt_re: Fix a couple off by one bugs [ Upstream commit 474e5a86067e5f12c97d1db8b170c7f45b53097a ] The sgid_tbl->tbl[] array is allocated in bnxt_qplib_alloc_sgid_tbl(). It has sgid_tbl->max elements. So the > should be >= to prevent accessing one element beyond the end of the array. Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Signed-off-by: Dan Carpenter Acked-by: Selvin Xavier Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 9536de8c5fb8b..124c8915b9eee 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -156,7 +156,7 @@ int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res, struct bnxt_qplib_sgid_tbl *sgid_tbl, int index, struct bnxt_qplib_gid *gid) { - if (index > sgid_tbl->max) { + if (index >= sgid_tbl->max) { dev_err(&res->pdev->dev, "QPLIB: Index %d exceeded SGID table max (%d)", index, sgid_tbl->max); @@ -361,7 +361,7 @@ int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res, *pkey = 0xFFFF; return 0; } - if (index > pkey_tbl->max) { + if (index >= pkey_tbl->max) { dev_err(&res->pdev->dev, "QPLIB: Index %d exceeded PKEY table max (%d)", index, pkey_tbl->max); -- GitLab From eca8598823596b740c6ef498a7c940a4ced45efa Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 1 Jul 2018 19:36:24 +0300 Subject: [PATCH 0595/2269] RDMA/i40w: Hold read semaphore while looking after VMA [ Upstream commit 5d9a2b0e28759e319a623da33940dbb3ce952b7d ] VMA lookup is supposed to be performed while mmap_sem is held. Fixes: f26c7c83395b ("i40iw: Add 2MB page support") Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index b7961f21b555b..39398dd074d66 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -1408,6 +1408,7 @@ static void i40iw_set_hugetlb_values(u64 addr, struct i40iw_mr *iwmr) struct vm_area_struct *vma; struct hstate *h; + down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm, addr); if (vma && is_vm_hugetlb_page(vma)) { h = hstate_vma(vma); @@ -1416,6 +1417,7 @@ static void i40iw_set_hugetlb_values(u64 addr, struct i40iw_mr *iwmr) iwmr->page_msk = huge_page_mask(h); } } + up_read(¤t->mm->mmap_sem); } /** -- GitLab From 0ca45668ecdb274fa861f0e863d6c1f0250060c8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Jul 2018 12:32:12 +0300 Subject: [PATCH 0596/2269] IB/core: type promotion bug in rdma_rw_init_one_mr() [ Upstream commit c2d7c8ff89b22ddefb1ac2986c0d48444a667689 ] "nents" is an unsigned int, so if ib_map_mr_sg() returns a negative error code then it's type promoted to a high unsigned int which is treated as success. Fixes: a060b5629ab0 ("IB/core: generic RDMA READ/WRITE API") Signed-off-by: Dan Carpenter Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/rw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 6ca607e8e293c..9939dcfb1b6a5 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -87,7 +87,7 @@ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num, } ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE); - if (ret < nents) { + if (ret < 0 || ret < nents) { ib_mr_pool_put(qp, &qp->rdma_mrs, reg->mr); return -EINVAL; } -- GitLab From 1f94cf4c81cb0c654cb8d0710e4a5ddb60ae7a45 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Tue, 15 May 2018 05:21:45 -0400 Subject: [PATCH 0597/2269] media: exynos4-is: Prevent NULL pointer dereference in __isp_video_try_fmt() [ Upstream commit 7c1b9a5aeed91bef98988ac0fcf38c8c1f4f9a3a ] This patch fixes potential NULL pointer dereference as indicated by the following static checker warning: drivers/media/platform/exynos4-is/fimc-isp-video.c:408 isp_video_try_fmt_mplane() error: NULL dereference inside function '__isp_video_try_fmt(isp, &f->fmt.pix_mp, (0))()'. Fixes: 34947b8aebe3: ("[media] exynos4-is: Add the FIMC-IS ISP capture DMA driver") Reported-by: Dan Carpenter Signed-off-by: Sylwester Nawrocki Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/exynos4-is/fimc-isp-video.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/media/platform/exynos4-is/fimc-isp-video.c b/drivers/media/platform/exynos4-is/fimc-isp-video.c index 55ba696b8cf40..a920164f53f1f 100644 --- a/drivers/media/platform/exynos4-is/fimc-isp-video.c +++ b/drivers/media/platform/exynos4-is/fimc-isp-video.c @@ -384,12 +384,17 @@ static void __isp_video_try_fmt(struct fimc_isp *isp, struct v4l2_pix_format_mplane *pixm, const struct fimc_fmt **fmt) { - *fmt = fimc_isp_find_format(&pixm->pixelformat, NULL, 2); + const struct fimc_fmt *__fmt; + + __fmt = fimc_isp_find_format(&pixm->pixelformat, NULL, 2); + + if (fmt) + *fmt = __fmt; pixm->colorspace = V4L2_COLORSPACE_SRGB; pixm->field = V4L2_FIELD_NONE; - pixm->num_planes = (*fmt)->memplanes; - pixm->pixelformat = (*fmt)->fourcc; + pixm->num_planes = __fmt->memplanes; + pixm->pixelformat = __fmt->fourcc; /* * TODO: double check with the docmentation these width/height * constraints are correct. -- GitLab From 333cb98f393ba2edd0e2f8577d823c0ffd9fef4b Mon Sep 17 00:00:00 2001 From: Tarick Bedeir Date: Mon, 2 Jul 2018 14:02:34 -0700 Subject: [PATCH 0598/2269] IB/mlx4: Test port number before querying type. [ Upstream commit f1228867adaf8890826f2b59e4caddb1c5cc2df7 ] rdma_ah_find_type() can reach into ib_device->port_immutable with a potentially out-of-bounds port number, so check that the port number is valid first. Fixes: 44c58487d51a ("IB/core: Define 'ib' and 'roce' rdma_ah_attr types") Signed-off-by: Tarick Bedeir Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 9354fec8efe78..e10c3d915e389 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -4014,9 +4014,9 @@ static void to_rdma_ah_attr(struct mlx4_ib_dev *ibdev, u8 port_num = path->sched_queue & 0x40 ? 2 : 1; memset(ah_attr, 0, sizeof(*ah_attr)); - ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, port_num); if (port_num == 0 || port_num > dev->caps.num_ports) return; + ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, port_num); if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) rdma_ah_set_sl(ah_attr, ((path->sched_queue >> 3) & 0x7) | -- GitLab From 769ae06e4442d9efc4c2a7494ed0f348b6efb7ba Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Thu, 28 Jun 2018 10:49:56 +0530 Subject: [PATCH 0599/2269] powerpc/kdump: Handle crashkernel memory reservation failure [ Upstream commit 8950329c4a64c6d3ca0bc34711a1afbd9ce05657 ] Memory reservation for crashkernel could fail if there are holes around kdump kernel offset (128M). Fail gracefully in such cases and print an error message. Signed-off-by: Hari Bathini Tested-by: David Gibson Reviewed-by: Dave Young Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/machine_kexec.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index 2694d078741d0..9dafd7af39b8f 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -186,7 +186,12 @@ void __init reserve_crashkernel(void) (unsigned long)(crashk_res.start >> 20), (unsigned long)(memblock_phys_mem_size() >> 20)); - memblock_reserve(crashk_res.start, crash_size); + if (!memblock_is_region_memory(crashk_res.start, crash_size) || + memblock_reserve(crashk_res.start, crash_size)) { + pr_err("Failed to reserve memory for crashkernel!\n"); + crashk_res.start = crashk_res.end = 0; + return; + } } int overlaps_crashkernel(unsigned long start, unsigned long size) -- GitLab From 6fd38ba41e340b20ddd806ffa96da57c8ec2aa3e Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Fri, 29 Jun 2018 17:49:22 -0400 Subject: [PATCH 0600/2269] media: fsl-viu: fix error handling in viu_of_probe() [ Upstream commit 662a99e145661c2b35155cf375044deae9b79896 ] viu_of_probe() ignores fails in i2c_get_adapter(), tries to unlock uninitialized mutex on error path. The patch streamlining the error handling in viu_of_probe(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/fsl-viu.c | 38 +++++++++++++++++++------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/drivers/media/platform/fsl-viu.c b/drivers/media/platform/fsl-viu.c index fb43025df5737..254d696dffd89 100644 --- a/drivers/media/platform/fsl-viu.c +++ b/drivers/media/platform/fsl-viu.c @@ -1417,7 +1417,7 @@ static int viu_of_probe(struct platform_device *op) sizeof(struct viu_reg), DRV_NAME)) { dev_err(&op->dev, "Error while requesting mem region\n"); ret = -EBUSY; - goto err; + goto err_irq; } /* remap registers */ @@ -1425,7 +1425,7 @@ static int viu_of_probe(struct platform_device *op) if (!viu_regs) { dev_err(&op->dev, "Can't map register set\n"); ret = -ENOMEM; - goto err; + goto err_irq; } /* Prepare our private structure */ @@ -1433,7 +1433,7 @@ static int viu_of_probe(struct platform_device *op) if (!viu_dev) { dev_err(&op->dev, "Can't allocate private structure\n"); ret = -ENOMEM; - goto err; + goto err_irq; } viu_dev->vr = viu_regs; @@ -1449,16 +1449,21 @@ static int viu_of_probe(struct platform_device *op) ret = v4l2_device_register(viu_dev->dev, &viu_dev->v4l2_dev); if (ret < 0) { dev_err(&op->dev, "v4l2_device_register() failed: %d\n", ret); - goto err; + goto err_irq; } ad = i2c_get_adapter(0); + if (!ad) { + ret = -EFAULT; + dev_err(&op->dev, "couldn't get i2c adapter\n"); + goto err_v4l2; + } v4l2_ctrl_handler_init(&viu_dev->hdl, 5); if (viu_dev->hdl.error) { ret = viu_dev->hdl.error; dev_err(&op->dev, "couldn't register control\n"); - goto err_vdev; + goto err_i2c; } /* This control handler will inherit the control(s) from the sub-device(s). */ @@ -1475,7 +1480,7 @@ static int viu_of_probe(struct platform_device *op) vdev = video_device_alloc(); if (vdev == NULL) { ret = -ENOMEM; - goto err_vdev; + goto err_hdl; } *vdev = viu_template; @@ -1496,7 +1501,7 @@ static int viu_of_probe(struct platform_device *op) ret = video_register_device(viu_dev->vdev, VFL_TYPE_GRABBER, -1); if (ret < 0) { video_device_release(viu_dev->vdev); - goto err_vdev; + goto err_unlock; } /* enable VIU clock */ @@ -1504,12 +1509,12 @@ static int viu_of_probe(struct platform_device *op) if (IS_ERR(clk)) { dev_err(&op->dev, "failed to lookup the clock!\n"); ret = PTR_ERR(clk); - goto err_clk; + goto err_vdev; } ret = clk_prepare_enable(clk); if (ret) { dev_err(&op->dev, "failed to enable the clock!\n"); - goto err_clk; + goto err_vdev; } viu_dev->clk = clk; @@ -1520,7 +1525,7 @@ static int viu_of_probe(struct platform_device *op) if (request_irq(viu_dev->irq, viu_intr, 0, "viu", (void *)viu_dev)) { dev_err(&op->dev, "Request VIU IRQ failed.\n"); ret = -ENODEV; - goto err_irq; + goto err_clk; } mutex_unlock(&viu_dev->lock); @@ -1528,16 +1533,19 @@ static int viu_of_probe(struct platform_device *op) dev_info(&op->dev, "Freescale VIU Video Capture Board\n"); return ret; -err_irq: - clk_disable_unprepare(viu_dev->clk); err_clk: - video_unregister_device(viu_dev->vdev); + clk_disable_unprepare(viu_dev->clk); err_vdev: - v4l2_ctrl_handler_free(&viu_dev->hdl); + video_unregister_device(viu_dev->vdev); +err_unlock: mutex_unlock(&viu_dev->lock); +err_hdl: + v4l2_ctrl_handler_free(&viu_dev->hdl); +err_i2c: i2c_put_adapter(ad); +err_v4l2: v4l2_device_unregister(&viu_dev->v4l2_dev); -err: +err_irq: irq_dispose_mapping(viu_irq); return ret; } -- GitLab From 23e4ab4069d1d98e91c87f52dd9192bc412f8618 Mon Sep 17 00:00:00 2001 From: Peter Seiderer Date: Thu, 15 Mar 2018 15:13:22 -0400 Subject: [PATCH 0601/2269] media: staging/imx: fill vb2_v4l2_buffer field entry [ Upstream commit a38d4b71cb7a12b65317f4e3d59883a918957719 ] - fixes gstreamer v4l2src warning: 0:00:00.716640334 349 0x164f720 WARN v4l2bufferpool gstv4l2bufferpool.c:1195:gst_v4l2_buffer_pool_dqbuf: Driver should never set v4l2_buffer.field to ANY - fixes v4l2-compliance test failure: Streaming ioctls: test read/write: OK (Not Supported) Video Capture: Buffer: 0 Sequence: 0 Field: Any Timestamp: 58.383658s fail: v4l2-test-buffers.cpp(297): g_field() == V4L2_FIELD_ANY Signed-off-by: Peter Seiderer Reviewed-by: Steve Longerbeam Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/media/imx/imx-ic-prpencvf.c | 1 + drivers/staging/media/imx/imx-media-csi.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/staging/media/imx/imx-ic-prpencvf.c b/drivers/staging/media/imx/imx-ic-prpencvf.c index 0790b3d9e2556..111afd34aa3c3 100644 --- a/drivers/staging/media/imx/imx-ic-prpencvf.c +++ b/drivers/staging/media/imx/imx-ic-prpencvf.c @@ -210,6 +210,7 @@ static void prp_vb2_buf_done(struct prp_priv *priv, struct ipuv3_channel *ch) done = priv->active_vb2_buf[priv->ipu_buf_num]; if (done) { + done->vbuf.field = vdev->fmt.fmt.pix.field; vb = &done->vbuf.vb2_buf; vb->timestamp = ktime_get_ns(); vb2_buffer_done(vb, priv->nfb4eof ? diff --git a/drivers/staging/media/imx/imx-media-csi.c b/drivers/staging/media/imx/imx-media-csi.c index 6d856118c2232..83ecb5b2fb9e1 100644 --- a/drivers/staging/media/imx/imx-media-csi.c +++ b/drivers/staging/media/imx/imx-media-csi.c @@ -171,6 +171,7 @@ static void csi_vb2_buf_done(struct csi_priv *priv) done = priv->active_vb2_buf[priv->ipu_buf_num]; if (done) { + done->vbuf.field = vdev->fmt.fmt.pix.field; vb = &done->vbuf.vb2_buf; vb->timestamp = ktime_get_ns(); vb2_buffer_done(vb, priv->nfb4eof ? -- GitLab From 0a29ab00339e641dad80db406343f007f4f8885d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 29 Jun 2018 22:31:10 +0300 Subject: [PATCH 0602/2269] x86/tsc: Add missing header to tsc_msr.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit dbd0fbc76c77daac08ddd245afdcbade0d506e19 ] Add a missing header otherwise compiler warns about missed prototype: CC arch/x86/kernel/tsc_msr.o arch/x86/kernel/tsc_msr.c:73:15: warning: no previous prototype for ‘cpu_khz_from_msr’ [-Wmissing-prototypes] unsigned long cpu_khz_from_msr(void) ^~~~~~~~~~~~~~~~ Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Pavel Tatashin Link: https://lkml.kernel.org/r/20180629193113.84425-4-andriy.shevchenko@linux.intel.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/tsc_msr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 19afdbd7d0a77..5532d1be76879 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -12,6 +12,7 @@ #include #include #include +#include #define MAX_NUM_FREQS 9 -- GitLab From 8e90c7ef50e243f370e981b3d79d991512d0ca5e Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Thu, 21 Jun 2018 14:43:08 +0530 Subject: [PATCH 0603/2269] ARM: hwmod: RTC: Don't assume lock/unlock will be called with irq enabled [ Upstream commit 6d609b35c815ba20132b7b64bcca04516bb17c56 ] When the RTC lock and unlock functions were introduced it was likely assumed that they would always be called from irq enabled context, hence the use of local_irq_disable/enable. This is no longer true as the RTC+DDR path makes a late call during the suspend path after irqs have been disabled to enable the RTC hwmod which calls both unlock and lock, leading to IRQs being reenabled through the local_irq_enable call in omap_hwmod_rtc_lock call. To avoid this change the local_irq_disable/enable to local_irq_save/restore to ensure that from whatever context this is called the proper IRQ configuration is maintained. Signed-off-by: Dave Gerlach Signed-off-by: Keerthy Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap2/omap_hwmod_reset.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-omap2/omap_hwmod_reset.c b/arch/arm/mach-omap2/omap_hwmod_reset.c index b68f9c0aff0b5..d5ddba00bb731 100644 --- a/arch/arm/mach-omap2/omap_hwmod_reset.c +++ b/arch/arm/mach-omap2/omap_hwmod_reset.c @@ -92,11 +92,13 @@ static void omap_rtc_wait_not_busy(struct omap_hwmod *oh) */ void omap_hwmod_rtc_unlock(struct omap_hwmod *oh) { - local_irq_disable(); + unsigned long flags; + + local_irq_save(flags); omap_rtc_wait_not_busy(oh); omap_hwmod_write(OMAP_RTC_KICK0_VALUE, oh, OMAP_RTC_KICK0_REG); omap_hwmod_write(OMAP_RTC_KICK1_VALUE, oh, OMAP_RTC_KICK1_REG); - local_irq_enable(); + local_irq_restore(flags); } /** @@ -110,9 +112,11 @@ void omap_hwmod_rtc_unlock(struct omap_hwmod *oh) */ void omap_hwmod_rtc_lock(struct omap_hwmod *oh) { - local_irq_disable(); + unsigned long flags; + + local_irq_save(flags); omap_rtc_wait_not_busy(oh); omap_hwmod_write(0x0, oh, OMAP_RTC_KICK0_REG); omap_hwmod_write(0x0, oh, OMAP_RTC_KICK1_REG); - local_irq_enable(); + local_irq_restore(flags); } -- GitLab From 8430918a04e3579fd513d8b2a289de6fb81b3ec5 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 2 Jul 2018 04:47:57 -0600 Subject: [PATCH 0604/2269] x86/entry/64: Add two more instruction suffixes [ Upstream commit 6709812f094d96543b443645c68daaa32d3d3e77 ] Sadly, other than claimed in: a368d7fd2a ("x86/entry/64: Add instruction suffix") ... there are two more instances which want to be adjusted. As said there, omitting suffixes from instructions in AT&T mode is bad practice when operand size cannot be determined by the assembler from register operands, and is likely going to be warned about by upstream gas in the future (mine does already). Add the other missing suffixes here as well. Signed-off-by: Jan Beulich Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/5B3A02DD02000078001CFB78@prv1-mh.provo.novell.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 0fae7096ae23c..164cd7529f0b0 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -88,7 +88,7 @@ END(native_usergs_sysret64) .endm .macro TRACE_IRQS_IRETQ_DEBUG - bt $9, EFLAGS(%rsp) /* interrupts off? */ + btl $9, EFLAGS(%rsp) /* interrupts off? */ jnc 1f TRACE_IRQS_ON_DEBUG 1: @@ -630,7 +630,7 @@ retint_kernel: #ifdef CONFIG_PREEMPT /* Interrupts are off */ /* Check if we need preemption */ - bt $9, EFLAGS(%rsp) /* were interrupts off? */ + btl $9, EFLAGS(%rsp) /* were interrupts off? */ jnc 1f 0: cmpl $0, PER_CPU_VAR(__preempt_count) jnz 1f -- GitLab From 2cbead46fd4e9f937bed7c68fb759931b97c176e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 25 May 2018 16:01:48 +0530 Subject: [PATCH 0605/2269] ARM: dts: ls1021a: Add missing cooling device properties for CPUs [ Upstream commit 47768f372eae030db6fab5225f9504a820d2c07f ] The cooling device properties, like "#cooling-cells" and "dynamic-power-coefficient", should either be present for all the CPUs of a cluster or none. If these are present only for a subset of CPUs of a cluster then things will start falling apart as soon as the CPUs are brought online in a different order. For example, this will happen because the operating system looks for such properties in the CPU node it is trying to bring up, so that it can register a cooling device. Add such missing properties. Signed-off-by: Viresh Kumar Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/ls1021a.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi index 379b4a03cfe2f..2d20f60947b95 100644 --- a/arch/arm/boot/dts/ls1021a.dtsi +++ b/arch/arm/boot/dts/ls1021a.dtsi @@ -84,6 +84,7 @@ device_type = "cpu"; reg = <0xf01>; clocks = <&clockgen 1 0>; + #cooling-cells = <2>; }; }; -- GitLab From bdfc40bc1b095c32ec519517755ab651582ea7c7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 22 Jun 2018 14:53:01 -0700 Subject: [PATCH 0606/2269] scsi: target/iscsi: Make iscsit_ta_authentication() respect the output buffer size [ Upstream commit 35bea5c84fd13c643cce63f0b5cd4b148f8c901d ] Fixes: e48354ce078c ("iscsi-target: Add iSCSI fabric support for target v4.1") Signed-off-by: Bart Van Assche Reviewed-by: Mike Christie Cc: Mike Christie Cc: Christoph Hellwig Cc: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target_tpg.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 594d07a1e995e..16e7516052a44 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -633,8 +633,7 @@ int iscsit_ta_authentication(struct iscsi_portal_group *tpg, u32 authentication) none = strstr(buf1, NONE); if (none) goto out; - strncat(buf1, ",", strlen(",")); - strncat(buf1, NONE, strlen(NONE)); + strlcat(buf1, "," NONE, sizeof(buf1)); if (iscsi_update_param_value(param, buf1) < 0) return -EINVAL; } -- GitLab From 1390c37d167044c2d7bcda95cc76770ebe6ac770 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 22 Jun 2018 14:54:49 -0700 Subject: [PATCH 0607/2269] scsi: klist: Make it safe to use klists in atomic context [ Upstream commit 624fa7790f80575a4ec28fbdb2034097dc18d051 ] In the scsi_transport_srp implementation it cannot be avoided to iterate over a klist from atomic context when using the legacy block layer instead of blk-mq. Hence this patch that makes it safe to use klists in atomic context. This patch avoids that lockdep reports the following: WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&(&k->k_lock)->rlock); local_irq_disable(); lock(&(&q->__queue_lock)->rlock); lock(&(&k->k_lock)->rlock); lock(&(&q->__queue_lock)->rlock); stack backtrace: Workqueue: kblockd blk_timeout_work Call Trace: dump_stack+0xa4/0xf5 check_usage+0x6e6/0x700 __lock_acquire+0x185d/0x1b50 lock_acquire+0xd2/0x260 _raw_spin_lock+0x32/0x50 klist_next+0x47/0x190 device_for_each_child+0x8e/0x100 srp_timed_out+0xaf/0x1d0 [scsi_transport_srp] scsi_times_out+0xd4/0x410 [scsi_mod] blk_rq_timed_out+0x36/0x70 blk_timeout_work+0x1b5/0x220 process_one_work+0x4fe/0xad0 worker_thread+0x63/0x5a0 kthread+0x1c1/0x1e0 ret_from_fork+0x24/0x30 See also commit c9ddf73476ff ("scsi: scsi_transport_srp: Fix shost to rport translation"). Signed-off-by: Bart Van Assche Cc: Martin K. Petersen Cc: James Bottomley Acked-by: Greg Kroah-Hartman Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- lib/klist.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/klist.c b/lib/klist.c index 0507fa5d84c53..f6b547812fe3d 100644 --- a/lib/klist.c +++ b/lib/klist.c @@ -336,8 +336,9 @@ struct klist_node *klist_prev(struct klist_iter *i) void (*put)(struct klist_node *) = i->i_klist->put; struct klist_node *last = i->i_cur; struct klist_node *prev; + unsigned long flags; - spin_lock(&i->i_klist->k_lock); + spin_lock_irqsave(&i->i_klist->k_lock, flags); if (last) { prev = to_klist_node(last->n_node.prev); @@ -356,7 +357,7 @@ struct klist_node *klist_prev(struct klist_iter *i) prev = to_klist_node(prev->n_node.prev); } - spin_unlock(&i->i_klist->k_lock); + spin_unlock_irqrestore(&i->i_klist->k_lock, flags); if (put && last) put(last); @@ -377,8 +378,9 @@ struct klist_node *klist_next(struct klist_iter *i) void (*put)(struct klist_node *) = i->i_klist->put; struct klist_node *last = i->i_cur; struct klist_node *next; + unsigned long flags; - spin_lock(&i->i_klist->k_lock); + spin_lock_irqsave(&i->i_klist->k_lock, flags); if (last) { next = to_klist_node(last->n_node.next); @@ -397,7 +399,7 @@ struct klist_node *klist_next(struct klist_iter *i) next = to_klist_node(next->n_node.next); } - spin_unlock(&i->i_klist->k_lock); + spin_unlock_irqrestore(&i->i_klist->k_lock, flags); if (put && last) put(last); -- GitLab From 336b73754169902d2a9fead2f736b23725b41d8c Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 26 Jun 2018 17:35:16 -0300 Subject: [PATCH 0608/2269] scsi: ibmvscsi: Improve strings handling [ Upstream commit 1262dc09dc9ae7bf4ad00b6a2c5ed6a6936bcd10 ] Currently an open firmware property is copied into partition_name variable without keeping a room for \0. Later one, this variable (partition_name), which is 97 bytes long, is strncpyed into ibmvcsci_host_data->madapter_info->partition_name, which is 96 bytes long, possibly truncating it 'again' and removing the \0. This patch simply decreases the partition name to 96 and just copy using strlcpy() which guarantees that the string is \0 terminated. I think there is no issue if this there is a truncation in this very first copy, i.e, when the open firmware property is read and copied into the driver for the very first time; This issue also causes the following warning on GCC 8: drivers/scsi/ibmvscsi/ibmvscsi.c:281:2: warning: strncpy output may be truncated copying 96 bytes from a string of length 96 [-Wstringop-truncation] ... inlined from ibmvscsi_probe at drivers/scsi/ibmvscsi/ibmvscsi.c:2221:7: drivers/scsi/ibmvscsi/ibmvscsi.c:265:3: warning: strncpy specified bound 97 equals destination size [-Wstringop-truncation] CC: Bart Van Assche CC: Tyrel Datwyler Signed-off-by: Breno Leitao Acked-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ibmvscsi/ibmvscsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 7d156b1614829..53eb277313739 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -93,7 +93,7 @@ static int max_requests = IBMVSCSI_MAX_REQUESTS_DEFAULT; static int max_events = IBMVSCSI_MAX_REQUESTS_DEFAULT + 2; static int fast_fail = 1; static int client_reserve = 1; -static char partition_name[97] = "UNKNOWN"; +static char partition_name[96] = "UNKNOWN"; static unsigned int partition_number = -1; static LIST_HEAD(ibmvscsi_head); @@ -262,7 +262,7 @@ static void gather_partition_info(void) ppartition_name = of_get_property(of_root, "ibm,partition-name", NULL); if (ppartition_name) - strncpy(partition_name, ppartition_name, + strlcpy(partition_name, ppartition_name, sizeof(partition_name)); p_number_ptr = of_get_property(of_root, "ibm,partition-no", NULL); if (p_number_ptr) -- GitLab From ebee32dd8f04ee364d20c14c2f87a51293c625a4 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 28 Jun 2018 13:48:57 -0500 Subject: [PATCH 0609/2269] scsi: target: Avoid that EXTENDED COPY commands trigger lock inversion [ Upstream commit 36d4cb460bcbe2a1323732a6e4bb9dd783284368 ] The approach for adding a device to the devices_idr data structure and for removing it is as follows: * &dev->dev_group.cg_item is initialized before a device is added to devices_idr. * If the reference count of a device drops to zero then target_free_device() removes the device from devices_idr. * All devices_idr manipulations are protected by device_mutex. This means that increasing the reference count of a device is sufficient to prevent removal from devices_idr and also that it is safe access dev_group.cg_item for any device that is referenced by devices_idr. Use this to modify target_find_device() and target_for_each_device() such that these functions no longer introduce a dependency between device_mutex and the configfs root inode mutex. Note: it is safe to pass a NULL pointer to config_item_put() and also to config_item_get_unless_zero(). This patch prevents that lockdep reports the following complaint: ====================================================== WARNING: possible circular locking dependency detected 4.12.0-rc1-dbg+ #1 Not tainted ------------------------------------------------------ rmdir/12053 is trying to acquire lock: (device_mutex#2){+.+.+.}, at: [] target_free_device+0xae/0xf0 [target_core_mod] but task is already holding lock: (&sb->s_type->i_mutex_key#14){++++++}, at: [] vfs_rmdir+0x50/0x140 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&sb->s_type->i_mutex_key#14){++++++}: lock_acquire+0x59/0x80 down_write+0x36/0x70 configfs_depend_item+0x3a/0xb0 [configfs] target_depend_item+0x13/0x20 [target_core_mod] target_xcopy_locate_se_dev_e4_iter+0x87/0x100 [target_core_mod] target_devices_idr_iter+0x16/0x20 [target_core_mod] idr_for_each+0x39/0xc0 target_for_each_device+0x36/0x50 [target_core_mod] target_xcopy_locate_se_dev_e4+0x28/0x80 [target_core_mod] target_xcopy_do_work+0x2e9/0xdd0 [target_core_mod] process_one_work+0x1ca/0x3f0 worker_thread+0x49/0x3b0 kthread+0x109/0x140 ret_from_fork+0x31/0x40 -> #0 (device_mutex#2){+.+.+.}: __lock_acquire+0x101f/0x11d0 lock_acquire+0x59/0x80 __mutex_lock+0x7e/0x950 mutex_lock_nested+0x16/0x20 target_free_device+0xae/0xf0 [target_core_mod] target_core_dev_release+0x10/0x20 [target_core_mod] config_item_put+0x6e/0xb0 [configfs] configfs_rmdir+0x1a6/0x300 [configfs] vfs_rmdir+0xb7/0x140 do_rmdir+0x1f4/0x200 SyS_rmdir+0x11/0x20 entry_SYSCALL_64_fastpath+0x23/0xc2 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&sb->s_type->i_mutex_key#14); lock(device_mutex#2); lock(&sb->s_type->i_mutex_key#14); lock(device_mutex#2); *** DEADLOCK *** 3 locks held by rmdir/12053: #0: (sb_writers#10){.+.+.+}, at: [] mnt_want_write+0x1f/0x50 #1: (&sb->s_type->i_mutex_key#14/1){+.+.+.}, at: [] do_rmdir+0x15e/0x200 #2: (&sb->s_type->i_mutex_key#14){++++++}, at: [] vfs_rmdir+0x50/0x140 stack backtrace: CPU: 3 PID: 12053 Comm: rmdir Not tainted 4.12.0-rc1-dbg+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014 Call Trace: dump_stack+0x86/0xcf print_circular_bug+0x1c7/0x220 __lock_acquire+0x101f/0x11d0 lock_acquire+0x59/0x80 __mutex_lock+0x7e/0x950 mutex_lock_nested+0x16/0x20 target_free_device+0xae/0xf0 [target_core_mod] target_core_dev_release+0x10/0x20 [target_core_mod] config_item_put+0x6e/0xb0 [configfs] configfs_rmdir+0x1a6/0x300 [configfs] vfs_rmdir+0xb7/0x140 do_rmdir+0x1f4/0x200 SyS_rmdir+0x11/0x20 entry_SYSCALL_64_fastpath+0x23/0xc2 Signed-off-by: Bart Van Assche [Rebased to handle conflict withe target_find_device removal] Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_device.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index e8dd6da164b28..84742125f7730 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -904,14 +904,20 @@ struct se_device *target_find_device(int id, bool do_depend) EXPORT_SYMBOL(target_find_device); struct devices_idr_iter { + struct config_item *prev_item; int (*fn)(struct se_device *dev, void *data); void *data; }; static int target_devices_idr_iter(int id, void *p, void *data) + __must_hold(&device_mutex) { struct devices_idr_iter *iter = data; struct se_device *dev = p; + int ret; + + config_item_put(iter->prev_item); + iter->prev_item = NULL; /* * We add the device early to the idr, so it can be used @@ -922,7 +928,15 @@ static int target_devices_idr_iter(int id, void *p, void *data) if (!(dev->dev_flags & DF_CONFIGURED)) return 0; - return iter->fn(dev, iter->data); + iter->prev_item = config_item_get_unless_zero(&dev->dev_group.cg_item); + if (!iter->prev_item) + return 0; + mutex_unlock(&device_mutex); + + ret = iter->fn(dev, iter->data); + + mutex_lock(&device_mutex); + return ret; } /** @@ -936,15 +950,13 @@ static int target_devices_idr_iter(int id, void *p, void *data) int target_for_each_device(int (*fn)(struct se_device *dev, void *data), void *data) { - struct devices_idr_iter iter; + struct devices_idr_iter iter = { .fn = fn, .data = data }; int ret; - iter.fn = fn; - iter.data = data; - mutex_lock(&device_mutex); ret = idr_for_each(&devices_idr, target_devices_idr_iter, &iter); mutex_unlock(&device_mutex); + config_item_put(iter.prev_item); return ret; } -- GitLab From 149f530334f00ba493c084d9312d8be21ebd2d2a Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 1 Jul 2018 19:32:04 +0200 Subject: [PATCH 0610/2269] usb: wusbcore: security: cast sizeof to int for comparison [ Upstream commit d3ac5598c5010a8999978ebbcca3b1c6188ca36b ] Comparing an int to a size, which is unsigned, causes the int to become unsigned, giving the wrong result. usb_get_descriptor can return a negative error code. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ int x; expression e,e1; identifier f; @@ *x = f(...); ... when != x = e1 when != if (x < 0 || ...) { ... return ...; } *x < sizeof(e) // Signed-off-by: Julia Lawall Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/wusbcore/security.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/wusbcore/security.c b/drivers/usb/wusbcore/security.c index 170f2c38de9b5..5274aa7339b8f 100644 --- a/drivers/usb/wusbcore/security.c +++ b/drivers/usb/wusbcore/security.c @@ -230,7 +230,7 @@ int wusb_dev_sec_add(struct wusbhc *wusbhc, result = usb_get_descriptor(usb_dev, USB_DT_SECURITY, 0, secd, sizeof(*secd)); - if (result < sizeof(*secd)) { + if (result < (int)sizeof(*secd)) { dev_err(dev, "Can't read security descriptor or " "not enough data: %d\n", result); goto out; -- GitLab From b31f41e02c800b63841066c0e4f6e04cc2c5905b Mon Sep 17 00:00:00 2001 From: Alagu Sankar Date: Fri, 29 Jun 2018 16:27:56 +0300 Subject: [PATCH 0611/2269] ath10k: sdio: use same endpoint id for all packets in a bundle [ Upstream commit 679e1f07c86221b7183dd69df7068fd42d0041f6 ] All packets in a bundle should use the same endpoint id as the first lookahead. This matches how things are done is ath6kl, however, this patch can theoretically handle several bundles in ath10k_sdio_mbox_rx_process_packets(). Without this patch we get lots of errors about invalid endpoint id: ath10k_sdio mmc2:0001:1: invalid endpoint in look-ahead: 224 ath10k_sdio mmc2:0001:1: failed to get pending recv messages: -12 ath10k_sdio mmc2:0001:1: failed to process pending SDIO interrupts: -12 Co-Developed-by: Niklas Cassel Signed-off-by: Alagu Sankar Signed-off-by: Niklas Cassel Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/sdio.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c index 03a69e5b11165..895ccfb2141ff 100644 --- a/drivers/net/wireless/ath/ath10k/sdio.c +++ b/drivers/net/wireless/ath/ath10k/sdio.c @@ -434,12 +434,14 @@ static int ath10k_sdio_mbox_rx_process_packets(struct ath10k *ar, enum ath10k_htc_ep_id id; int ret, i, *n_lookahead_local; u32 *lookaheads_local; + int lookahead_idx = 0; for (i = 0; i < ar_sdio->n_rx_pkts; i++) { lookaheads_local = lookaheads; n_lookahead_local = n_lookahead; - id = ((struct ath10k_htc_hdr *)&lookaheads[i])->eid; + id = ((struct ath10k_htc_hdr *) + &lookaheads[lookahead_idx++])->eid; if (id >= ATH10K_HTC_EP_COUNT) { ath10k_warn(ar, "invalid endpoint in look-ahead: %d\n", @@ -462,6 +464,7 @@ static int ath10k_sdio_mbox_rx_process_packets(struct ath10k *ar, /* Only read lookahead's from RX trailers * for the last packet in a bundle. */ + lookahead_idx--; lookaheads_local = NULL; n_lookahead_local = NULL; } -- GitLab From 45d3d58f9739bf422b2ec9512797eb886af3a414 Mon Sep 17 00:00:00 2001 From: Alagu Sankar Date: Fri, 29 Jun 2018 16:28:00 +0300 Subject: [PATCH 0612/2269] ath10k: sdio: set skb len for all rx packets [ Upstream commit 8530b4e7b22bc3bd8240579f3844c73947cd5f71 ] Without this, packets larger than 1500 will silently be dropped. Easily reproduced by sending a ping packet with a size larger than 1500. Co-Developed-by: Niklas Cassel Signed-off-by: Alagu Sankar Signed-off-by: Niklas Cassel Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/sdio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c index 895ccfb2141ff..c5e6f4254e49c 100644 --- a/drivers/net/wireless/ath/ath10k/sdio.c +++ b/drivers/net/wireless/ath/ath10k/sdio.c @@ -396,6 +396,7 @@ static int ath10k_sdio_mbox_rx_process_packet(struct ath10k *ar, int ret; payload_len = le16_to_cpu(htc_hdr->len); + skb->len = payload_len + sizeof(struct ath10k_htc_hdr); if (trailer_present) { trailer = skb->data + sizeof(*htc_hdr) + -- GitLab From 8deb5801f1548be952372303c25de95c6a243cae Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 1 Jun 2018 18:06:16 +1000 Subject: [PATCH 0613/2269] powerpc/powernv/ioda2: Reduce upper limit for DMA window size [ Upstream commit d3d4ffaae439981e1e441ebb125aa3588627c5d8 ] We use PHB in mode1 which uses bit 59 to select a correct DMA window. However there is mode2 which uses bits 59:55 and allows up to 32 DMA windows per a PE. Even though documentation does not clearly specify that, it seems that the actual hardware does not support bits 59:55 even in mode1, in other words we can create a window as big as 1<<58 but DMA simply won't work. This reduces the upper limit from 59 to 55 bits to let the userspace know about the hardware limits. Fixes: 7aafac11e3 "powerpc/powernv/ioda2: Gracefully fail if too many TCE levels requested" Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/pci-ioda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index e919696c7137c..ddef22e00ddd7 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2787,7 +2787,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, level_shift = entries_shift + 3; level_shift = max_t(unsigned, level_shift, PAGE_SHIFT); - if ((level_shift - 3) * levels + page_shift >= 60) + if ((level_shift - 3) * levels + page_shift >= 55) return -EINVAL; /* Allocate TCE table */ -- GitLab From 9374ffc6f3d3fcfe0cca6ed8139324d67b120719 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 2 Jul 2018 10:54:02 +0200 Subject: [PATCH 0614/2269] s390/sysinfo: add missing #ifdef CONFIG_PROC_FS [ Upstream commit 9f35b818a2f90fb6cb291aa0c9f835d4f0974a9a ] Get rid of this compile warning for !PROC_FS: CC arch/s390/kernel/sysinfo.o arch/s390/kernel/sysinfo.c:275:12: warning: 'sysinfo_show' defined but not used [-Wunused-function] static int sysinfo_show(struct seq_file *m, void *v) Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/sysinfo.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index a441cba8d165c..b0fad29c1427f 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -59,6 +59,8 @@ int stsi(void *sysinfo, int fc, int sel1, int sel2) } EXPORT_SYMBOL(stsi); +#ifdef CONFIG_PROC_FS + static bool convert_ext_name(unsigned char encoding, char *name, size_t len) { switch (encoding) { @@ -311,6 +313,8 @@ static int __init sysinfo_create_proc(void) } device_initcall(sysinfo_create_proc); +#endif /* CONFIG_PROC_FS */ + /* * Service levels interface. */ -- GitLab From a4dbaf7c2de0d622e0fe29840dd2bf4a281277a5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 2 Jul 2018 09:34:29 +0200 Subject: [PATCH 0615/2269] alarmtimer: Prevent overflow for relative nanosleep [ Upstream commit 5f936e19cc0ef97dbe3a56e9498922ad5ba1edef ] Air Icy reported: UBSAN: Undefined behaviour in kernel/time/alarmtimer.c:811:7 signed integer overflow: 1529859276030040771 + 9223372036854775807 cannot be represented in type 'long long int' Call Trace: alarm_timer_nsleep+0x44c/0x510 kernel/time/alarmtimer.c:811 __do_sys_clock_nanosleep kernel/time/posix-timers.c:1235 [inline] __se_sys_clock_nanosleep kernel/time/posix-timers.c:1213 [inline] __x64_sys_clock_nanosleep+0x326/0x4e0 kernel/time/posix-timers.c:1213 do_syscall_64+0xb8/0x3a0 arch/x86/entry/common.c:290 alarm_timer_nsleep() uses ktime_add() to add the current time and the relative expiry value. ktime_add() has no sanity checks so the addition can overflow when the relative timeout is large enough. Use ktime_add_safe() which has the necessary sanity checks in place and limits the result to the valid range. Fixes: 9a7adcf5c6de ("timers: Posix interface for alarm-timers") Reported-by: Team OWL337 Signed-off-by: Thomas Gleixner Cc: John Stultz Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1807020926360.1595@nanos.tec.linutronix.de Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/time/alarmtimer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 639321bf2e397..481bb6ca6ca04 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -808,7 +808,8 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, /* Convert (if necessary) to absolute time */ if (flags != TIMER_ABSTIME) { ktime_t now = alarm_bases[type].gettime(); - exp = ktime_add(now, exp); + + exp = ktime_add_safe(now, exp); } ret = alarmtimer_do_nsleep(&alarm, exp, type); -- GitLab From 98a34e26d93db505afe2fe0e26f47d755c85d40a Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 24 Jun 2018 09:21:59 +0200 Subject: [PATCH 0616/2269] s390/dasd: correct numa_node in dasd_alloc_queue [ Upstream commit b17e3abb0af404cb62ad4ef1a5962f58b06e2b78 ] The numa_node field of the tag_set struct has to be explicitly initialized, otherwise it stays as 0, which is a valid numa node id and cause memory allocation failure if node 0 is offline. Acked-by: Stefan Haberland Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/s390/block/dasd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index d072f84a8535f..92c4f5180ad07 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -3190,6 +3190,7 @@ static int dasd_alloc_queue(struct dasd_block *block) block->tag_set.nr_hw_queues = DASD_NR_HW_QUEUES; block->tag_set.queue_depth = DASD_MAX_LCU_DEV * DASD_REQ_PER_DEV; block->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + block->tag_set.numa_node = NUMA_NO_NODE; rc = blk_mq_alloc_tag_set(&block->tag_set); if (rc) -- GitLab From 33cd135ebc97e44140f76f625b9b3d99fd8019cb Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 25 Jun 2018 14:30:42 +0200 Subject: [PATCH 0617/2269] s390/scm_blk: correct numa_node in scm_blk_dev_setup [ Upstream commit d642d6262f4fcfa5d200ec6e218c17f0c15b3390 ] The numa_node field of the tag_set struct has to be explicitly initialized, otherwise it stays as 0, which is a valid numa node id and cause memory allocation failure if node 0 is offline. Acked-by: Sebastian Ott Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/s390/block/scm_blk.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index eb51893c74a4b..5c944ee76ec14 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -454,6 +454,7 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) bdev->tag_set.nr_hw_queues = nr_requests; bdev->tag_set.queue_depth = nr_requests_per_io * nr_requests; bdev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + bdev->tag_set.numa_node = NUMA_NO_NODE; ret = blk_mq_alloc_tag_set(&bdev->tag_set); if (ret) -- GitLab From a838008bb11f96f14bfc48a22e88116eb74351ab Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 17 Jun 2018 00:30:43 +0200 Subject: [PATCH 0618/2269] s390/extmem: fix gcc 8 stringop-overflow warning [ Upstream commit 6b2ddf33baec23dace85bd647e3fc4ac070963e8 ] arch/s390/mm/extmem.c: In function '__segment_load': arch/s390/mm/extmem.c:436:2: warning: 'strncat' specified bound 7 equals source length [-Wstringop-overflow=] strncat(seg->res_name, " (DCSS)", 7); What gcc complains about here is the misuse of strncat function, which in this case does not limit a number of bytes taken from "src", so it is in the end the same as strcat(seg->res_name, " (DCSS)"); Keeping in mind that a res_name is 15 bytes, strncat in this case would overflow the buffer and write 0 into alignment byte between the fields in the struct. To avoid that increasing res_name size to 16, and reusing strlcat. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/s390/mm/extmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 920d408945359..290e71e57541c 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -80,7 +80,7 @@ struct qin64 { struct dcss_segment { struct list_head list; char dcss_name[8]; - char res_name[15]; + char res_name[16]; unsigned long start_addr; unsigned long end; atomic_t ref_count; @@ -433,7 +433,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long memcpy(&seg->res_name, seg->dcss_name, 8); EBCASC(seg->res_name, 8); seg->res_name[8] = '\0'; - strncat(seg->res_name, " (DCSS)", 7); + strlcat(seg->res_name, " (DCSS)", sizeof(seg->res_name)); seg->res->name = seg->res_name; rc = seg->vm_segtype; if (rc == SEG_TYPE_SC || -- GitLab From 8cbb2f74c0939901d8b96b8fa820afe619790758 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Thu, 29 Mar 2018 15:10:54 +0200 Subject: [PATCH 0619/2269] mtd: rawnand: atmel: add module param to avoid using dma [ Upstream commit efc6362c6f8c1e74b340e2611f1b35e7d557ce7b ] On a sama5d31 with a Full-HD dual LVDS panel (132MHz pixel clock) NAND flash accesses have a tendency to cause display disturbances. Add a module param to disable DMA from the NAND controller, since that fixes the display problem for me. Signed-off-by: Peter Rosin Reviewed-by: Boris Brezillon Signed-off-by: Miquel Raynal Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/atmel/nand-controller.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/atmel/nand-controller.c b/drivers/mtd/nand/atmel/nand-controller.c index 68c9d98a3347c..148744418e82b 100644 --- a/drivers/mtd/nand/atmel/nand-controller.c +++ b/drivers/mtd/nand/atmel/nand-controller.c @@ -129,6 +129,11 @@ #define DEFAULT_TIMEOUT_MS 1000 #define MIN_DMA_LEN 128 +static bool atmel_nand_avoid_dma __read_mostly; + +MODULE_PARM_DESC(avoiddma, "Avoid using DMA"); +module_param_named(avoiddma, atmel_nand_avoid_dma, bool, 0400); + enum atmel_nand_rb_type { ATMEL_NAND_NO_RB, ATMEL_NAND_NATIVE_RB, @@ -1975,7 +1980,7 @@ static int atmel_nand_controller_init(struct atmel_nand_controller *nc, return ret; } - if (nc->caps->has_dma) { + if (nc->caps->has_dma && !atmel_nand_avoid_dma) { dma_cap_mask_t mask; dma_cap_zero(mask); -- GitLab From cf373da100393aa7d6a303646c0a9a58ff8d7459 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Tue, 26 Jun 2018 00:22:41 +0900 Subject: [PATCH 0620/2269] iio: accel: adxl345: convert address field usage in iio_chan_spec [ Upstream commit 9048f1f18a70a01eaa3c8e7166fdb2538929d780 ] Currently the address field in iio_chan_spec is filled with an accel data register address for the corresponding axis. In preparation for adding calibration offset support, this sets the address field to the index of accel data registers instead of the actual register address. This change makes it easier to access both accel registers and calibration offset registers with fewer lines of code as these are located in X-axis, Y-axis, Z-axis order. Cc: Eva Rachel Retuya Cc: Andy Shevchenko Cc: Jonathan Cameron Signed-off-by: Akinobu Mita Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/adxl345_core.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/iio/accel/adxl345_core.c b/drivers/iio/accel/adxl345_core.c index 9ccb5828db986..3dda7afe8a116 100644 --- a/drivers/iio/accel/adxl345_core.c +++ b/drivers/iio/accel/adxl345_core.c @@ -21,6 +21,8 @@ #define ADXL345_REG_DATAX0 0x32 #define ADXL345_REG_DATAY0 0x34 #define ADXL345_REG_DATAZ0 0x36 +#define ADXL345_REG_DATA_AXIS(index) \ + (ADXL345_REG_DATAX0 + (index) * sizeof(__le16)) #define ADXL345_POWER_CTL_MEASURE BIT(3) #define ADXL345_POWER_CTL_STANDBY 0x00 @@ -47,19 +49,19 @@ struct adxl345_data { u8 data_range; }; -#define ADXL345_CHANNEL(reg, axis) { \ +#define ADXL345_CHANNEL(index, axis) { \ .type = IIO_ACCEL, \ .modified = 1, \ .channel2 = IIO_MOD_##axis, \ - .address = reg, \ + .address = index, \ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \ .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), \ } static const struct iio_chan_spec adxl345_channels[] = { - ADXL345_CHANNEL(ADXL345_REG_DATAX0, X), - ADXL345_CHANNEL(ADXL345_REG_DATAY0, Y), - ADXL345_CHANNEL(ADXL345_REG_DATAZ0, Z), + ADXL345_CHANNEL(0, X), + ADXL345_CHANNEL(1, Y), + ADXL345_CHANNEL(2, Z), }; static int adxl345_read_raw(struct iio_dev *indio_dev, @@ -67,7 +69,7 @@ static int adxl345_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { struct adxl345_data *data = iio_priv(indio_dev); - __le16 regval; + __le16 accel; int ret; switch (mask) { @@ -77,12 +79,13 @@ static int adxl345_read_raw(struct iio_dev *indio_dev, * ADXL345_REG_DATA(X0/Y0/Z0) contain the least significant byte * and ADXL345_REG_DATA(X0/Y0/Z0) + 1 the most significant byte */ - ret = regmap_bulk_read(data->regmap, chan->address, ®val, - sizeof(regval)); + ret = regmap_bulk_read(data->regmap, + ADXL345_REG_DATA_AXIS(chan->address), + &accel, sizeof(accel)); if (ret < 0) return ret; - *val = sign_extend32(le16_to_cpu(regval), 12); + *val = sign_extend32(le16_to_cpu(accel), 12); return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: *val = 0; -- GitLab From a05bd4ba655f5737ead5494733846a87cc80bd36 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 26 Jun 2018 15:21:31 +0200 Subject: [PATCH 0621/2269] posix-timers: Make forward callback return s64 [ Upstream commit 6fec64e1c92d5c715c6d0f50786daa7708266bde ] The posix timer ti_overrun handling is broken because the forwarding functions can return a huge number of overruns which does not fit in an int. As a consequence timer_getoverrun(2) and siginfo::si_overrun can turn into random number generators. As a first step to address that let the timer_forward() callbacks return the full 64 bit value. Cast it to (int) temporarily until k_itimer::ti_overrun is converted to 64bit and the conversion to user space visible values is sanitized. Reported-by: Team OWL337 Signed-off-by: Thomas Gleixner Acked-by: John Stultz Cc: Peter Zijlstra Cc: Michael Kerrisk Link: https://lkml.kernel.org/r/20180626132704.922098090@linutronix.de Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/time/alarmtimer.c | 4 ++-- kernel/time/posix-timers.c | 6 +++--- kernel/time/posix-timers.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 481bb6ca6ca04..fa5de5e8de61d 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -581,11 +581,11 @@ static void alarm_timer_rearm(struct k_itimer *timr) * @timr: Pointer to the posixtimer data struct * @now: Current time to forward the timer against */ -static int alarm_timer_forward(struct k_itimer *timr, ktime_t now) +static s64 alarm_timer_forward(struct k_itimer *timr, ktime_t now) { struct alarm *alarm = &timr->it.alarm.alarmtimer; - return (int) alarm_forward(alarm, timr->it_interval, now); + return alarm_forward(alarm, timr->it_interval, now); } /** diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 7089927083325..fb0935612d4e3 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -645,11 +645,11 @@ static ktime_t common_hrtimer_remaining(struct k_itimer *timr, ktime_t now) return __hrtimer_expires_remaining_adjusted(timer, now); } -static int common_hrtimer_forward(struct k_itimer *timr, ktime_t now) +static s64 common_hrtimer_forward(struct k_itimer *timr, ktime_t now) { struct hrtimer *timer = &timr->it.real.timer; - return (int)hrtimer_forward(timer, now, timr->it_interval); + return hrtimer_forward(timer, now, timr->it_interval); } /* @@ -702,7 +702,7 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) * expiry time forward by intervals, so expiry is > now. */ if (iv && (timr->it_requeue_pending & REQUEUE_PENDING || sig_none)) - timr->it_overrun += kc->timer_forward(timr, now); + timr->it_overrun += (int)kc->timer_forward(timr, now); remaining = kc->timer_remaining(timr, now); /* Return 0 only, when the timer is expired and not pending */ diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h index 151e28f5bf304..ddb21145211a0 100644 --- a/kernel/time/posix-timers.h +++ b/kernel/time/posix-timers.h @@ -19,7 +19,7 @@ struct k_clock { void (*timer_get)(struct k_itimer *timr, struct itimerspec64 *cur_setting); void (*timer_rearm)(struct k_itimer *timr); - int (*timer_forward)(struct k_itimer *timr, ktime_t now); + s64 (*timer_forward)(struct k_itimer *timr, ktime_t now); ktime_t (*timer_remaining)(struct k_itimer *timr, ktime_t now); int (*timer_try_to_cancel)(struct k_itimer *timr); void (*timer_arm)(struct k_itimer *timr, ktime_t expires, -- GitLab From 3e3f075f72bd2dfcd5211bd1ff3919bc118ad4cd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 26 Jun 2018 15:21:32 +0200 Subject: [PATCH 0622/2269] posix-timers: Sanitize overrun handling [ Upstream commit 78c9c4dfbf8c04883941445a195276bb4bb92c76 ] The posix timer overrun handling is broken because the forwarding functions can return a huge number of overruns which does not fit in an int. As a consequence timer_getoverrun(2) and siginfo::si_overrun can turn into random number generators. The k_clock::timer_forward() callbacks return a 64 bit value now. Make k_itimer::ti_overrun[_last] 64bit as well, so the kernel internal accounting is correct. 3Remove the temporary (int) casts. Add a helper function which clamps the overrun value returned to user space via timer_getoverrun(2) or siginfo::si_overrun limited to a positive value between 0 and INT_MAX. INT_MAX is an indicator for user space that the overrun value has been clamped. Reported-by: Team OWL337 Signed-off-by: Thomas Gleixner Acked-by: John Stultz Cc: Peter Zijlstra Cc: Michael Kerrisk Link: https://lkml.kernel.org/r/20180626132705.018623573@linutronix.de Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/posix-timers.h | 4 ++-- kernel/time/posix-cpu-timers.c | 2 +- kernel/time/posix-timers.c | 31 ++++++++++++++++++++----------- 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 672c4f32311e2..437a539898aeb 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -82,8 +82,8 @@ struct k_itimer { clockid_t it_clock; timer_t it_id; int it_active; - int it_overrun; - int it_overrun_last; + s64 it_overrun; + s64 it_overrun_last; int it_requeue_pending; int it_sigev_notify; ktime_t it_interval; diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 5b117110b55b5..2da660d53a4ba 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -84,7 +84,7 @@ static void bump_cpu_timer(struct k_itimer *timer, u64 now) continue; timer->it.cpu.expires += incr; - timer->it_overrun += 1 << i; + timer->it_overrun += 1LL << i; delta -= incr; } } diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index fb0935612d4e3..55d45fe2cc173 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -283,6 +283,17 @@ static __init int init_posix_timers(void) } __initcall(init_posix_timers); +/* + * The siginfo si_overrun field and the return value of timer_getoverrun(2) + * are of type int. Clamp the overrun value to INT_MAX + */ +static inline int timer_overrun_to_int(struct k_itimer *timr, int baseval) +{ + s64 sum = timr->it_overrun_last + (s64)baseval; + + return sum > (s64)INT_MAX ? INT_MAX : (int)sum; +} + static void common_hrtimer_rearm(struct k_itimer *timr) { struct hrtimer *timer = &timr->it.real.timer; @@ -290,9 +301,8 @@ static void common_hrtimer_rearm(struct k_itimer *timr) if (!timr->it_interval) return; - timr->it_overrun += (unsigned int) hrtimer_forward(timer, - timer->base->get_time(), - timr->it_interval); + timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(), + timr->it_interval); hrtimer_restart(timer); } @@ -321,10 +331,10 @@ void posixtimer_rearm(struct siginfo *info) timr->it_active = 1; timr->it_overrun_last = timr->it_overrun; - timr->it_overrun = -1; + timr->it_overrun = -1LL; ++timr->it_requeue_pending; - info->si_overrun += timr->it_overrun_last; + info->si_overrun = timer_overrun_to_int(timr, info->si_overrun); } unlock_timer(timr, flags); @@ -418,9 +428,8 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) now = ktime_add(now, kj); } #endif - timr->it_overrun += (unsigned int) - hrtimer_forward(timer, now, - timr->it_interval); + timr->it_overrun += hrtimer_forward(timer, now, + timr->it_interval); ret = HRTIMER_RESTART; ++timr->it_requeue_pending; timr->it_active = 1; @@ -524,7 +533,7 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, new_timer->it_id = (timer_t) new_timer_id; new_timer->it_clock = which_clock; new_timer->kclock = kc; - new_timer->it_overrun = -1; + new_timer->it_overrun = -1LL; if (event) { rcu_read_lock(); @@ -702,7 +711,7 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) * expiry time forward by intervals, so expiry is > now. */ if (iv && (timr->it_requeue_pending & REQUEUE_PENDING || sig_none)) - timr->it_overrun += (int)kc->timer_forward(timr, now); + timr->it_overrun += kc->timer_forward(timr, now); remaining = kc->timer_remaining(timr, now); /* Return 0 only, when the timer is expired and not pending */ @@ -789,7 +798,7 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id) if (!timr) return -EINVAL; - overrun = timr->it_overrun_last; + overrun = timer_overrun_to_int(timr, 0); unlock_timer(timr, flags); return overrun; -- GitLab From 85d3dbd8e7f2af3083f8f68db468c9fdebe9d8d2 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Fri, 29 Jun 2018 19:07:42 +0200 Subject: [PATCH 0623/2269] ALSA: snd-aoa: add of_node_put() in error path [ Upstream commit 222bce5eb88d1af656419db04bcd84b2419fb900 ] Both calls to of_find_node_by_name() and of_get_next_child() return a node pointer with refcount incremented thus it must be explicidly decremented here after the last usage. As we are assured to have a refcounted np either from the initial of_find_node_by_name(NULL, name); or from the of_get_next_child(gpio, np) in the while loop if we reached the error code path below, an x of_node_put(np) is needed. Signed-off-by: Nicholas Mc Guire Fixes: commit f3d9478b2ce4 ("[ALSA] snd-aoa: add snd-aoa") Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/aoa/core/gpio-feature.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/aoa/core/gpio-feature.c b/sound/aoa/core/gpio-feature.c index 71960089e207b..65557421fe0ba 100644 --- a/sound/aoa/core/gpio-feature.c +++ b/sound/aoa/core/gpio-feature.c @@ -88,8 +88,10 @@ static struct device_node *get_gpio(char *name, } reg = of_get_property(np, "reg", NULL); - if (!reg) + if (!reg) { + of_node_put(np); return NULL; + } *gpioptr = *reg; -- GitLab From 381f8d235dd89d2cd5a90f5662e6e30c56b06d8c Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 10 Jun 2018 11:42:01 -0400 Subject: [PATCH 0624/2269] media: s3c-camif: ignore -ENOIOCTLCMD from v4l2_subdev_call for s_power [ Upstream commit 30ed2b83343bd1e07884ca7355dac70d25ffc158 ] When the subdevice doesn't provide s_power core ops callback, the v4l2_subdev_call for s_power returns -ENOIOCTLCMD. If the subdevice doesn't have the special handling for its power saving mode, the s_power isn't required. So -ENOIOCTLCMD from the v4l2_subdev_call should be ignored. Cc: Hans Verkuil Signed-off-by: Akinobu Mita Acked-by: Sylwester Nawrocki Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/s3c-camif/camif-capture.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/platform/s3c-camif/camif-capture.c b/drivers/media/platform/s3c-camif/camif-capture.c index 0f0324a14d515..85d26713cedb9 100644 --- a/drivers/media/platform/s3c-camif/camif-capture.c +++ b/drivers/media/platform/s3c-camif/camif-capture.c @@ -117,6 +117,8 @@ static int sensor_set_power(struct camif_dev *camif, int on) if (camif->sensor.power_count == !on) err = v4l2_subdev_call(sensor->sd, core, s_power, on); + if (err == -ENOIOCTLCMD) + err = 0; if (!err) sensor->power_count += on ? 1 : -1; -- GitLab From daefaacc6e027a9973c30fc34babca73e8e58094 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 10 Jun 2018 11:42:26 -0400 Subject: [PATCH 0625/2269] media: soc_camera: ov772x: correct setting of banding filter [ Upstream commit 22216ec41e919682c15345e95928f266e8ba6f9e ] The banding filter ON/OFF is controlled via bit 5 of COM8 register. It is attempted to be enabled in ov772x_set_params() by the following line. ret = ov772x_mask_set(client, COM8, BNDF_ON_OFF, 1); But this unexpectedly results disabling the banding filter, because the mask and set bits are exclusive. On the other hand, ov772x_s_ctrl() correctly sets the bit by: ret = ov772x_mask_set(client, COM8, BNDF_ON_OFF, BNDF_ON_OFF); The same fix was already applied to non-soc_camera version of ov772x driver in the commit commit a024ee14cd36 ("media: ov772x: correct setting of banding filter") Cc: Jacopo Mondi Cc: Laurent Pinchart Cc: Hans Verkuil Signed-off-by: Akinobu Mita Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/soc_camera/ov772x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/i2c/soc_camera/ov772x.c b/drivers/media/i2c/soc_camera/ov772x.c index 806383500313e..14377af7c8888 100644 --- a/drivers/media/i2c/soc_camera/ov772x.c +++ b/drivers/media/i2c/soc_camera/ov772x.c @@ -834,7 +834,7 @@ static int ov772x_set_params(struct ov772x_priv *priv, * set COM8 */ if (priv->band_filter) { - ret = ov772x_mask_set(client, COM8, BNDF_ON_OFF, 1); + ret = ov772x_mask_set(client, COM8, BNDF_ON_OFF, BNDF_ON_OFF); if (!ret) ret = ov772x_mask_set(client, BDBASE, 0xff, 256 - priv->band_filter); -- GitLab From 1b16d06a9e271d73b4549ed9c3655d7b4d7594c5 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Sat, 9 Jun 2018 08:22:45 -0400 Subject: [PATCH 0626/2269] media: omap3isp: zero-initialize the isp cam_xclk{a,b} initial data [ Upstream commit 2ec7debd44b49927a6e2861521994cc075a389ed ] The struct clk_init_data init variable is declared in the isp_xclk_init() function so is an automatic variable allocated in the stack. But it's not explicitly zero-initialized, so some init fields are left uninitialized. This causes the data structure to have undefined values that may confuse the common clock framework when the clock is registered. For example, the uninitialized .flags field could have the CLK_IS_CRITICAL bit set, causing the framework to wrongly prepare the clk on registration. This leads to the isp_xclk_prepare() callback being called, which in turn calls to the omap3isp_get() function that increments the isp dev refcount. Since this omap3isp_get() call is unexpected, this leads to an unbalanced omap3isp_get() call that prevents the requested IRQ to be later enabled, due the refcount not being 0 when the correct omap3isp_get() call happens. Fixes: 9b28ee3c9122 ("[media] omap3isp: Use the common clock framework") Signed-off-by: Javier Martinez Canillas Reviewed-by: Sebastian Reichel Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/omap3isp/isp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/omap3isp/isp.c b/drivers/media/platform/omap3isp/isp.c index 9f023bc6e1b7e..6e6e978263b02 100644 --- a/drivers/media/platform/omap3isp/isp.c +++ b/drivers/media/platform/omap3isp/isp.c @@ -305,7 +305,7 @@ static struct clk *isp_xclk_src_get(struct of_phandle_args *clkspec, void *data) static int isp_xclk_init(struct isp_device *isp) { struct device_node *np = isp->dev->of_node; - struct clk_init_data init; + struct clk_init_data init = { 0 }; unsigned int i; for (i = 0; i < ARRAY_SIZE(isp->xclks); ++i) -- GitLab From 3af342f5ddbd2ee31aa9e3ae2c50869f304ffe4d Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Tue, 19 Jun 2018 17:57:35 -0700 Subject: [PATCH 0627/2269] staging: android: ashmem: Fix mmap size validation [ Upstream commit 8632c614565d0c5fdde527889601c018e97b6384 ] The ashmem driver did not check that the size/offset of the vma passed to its .mmap() function was not larger than the ashmem object being mapped. This could cause mmap() to succeed, even though accessing parts of the mapping would later fail with a segmentation fault. Ensure an error is returned by the ashmem_mmap() function if the vma size is larger than the ashmem object size. This enables safer handling of the problem in userspace. Cc: Todd Kjos Cc: devel@driverdev.osuosl.org Cc: linux-kernel@vger.kernel.org Cc: kernel-team@android.com Cc: Joel Fernandes Signed-off-by: Alistair Strachan Acked-by: Joel Fernandes (Google) Reviewed-by: Martijn Coenen Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ashmem.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 893b2836089c3..4151bb44a4103 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -374,6 +374,12 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) goto out; } + /* requested mapping size larger than object size */ + if (vma->vm_end - vma->vm_start > PAGE_ALIGN(asma->size)) { + ret = -EINVAL; + goto out; + } + /* requested protection bits must match our allowed protection mask */ if (unlikely((vma->vm_flags & ~calc_vm_prot_bits(asma->prot_mask, 0)) & calc_vm_prot_bits(PROT_MASK, 0))) { -- GitLab From 0091a4ede7834c92751cb56fe4a9bc3b62d347ed Mon Sep 17 00:00:00 2001 From: Zhouyang Jia Date: Tue, 12 Jun 2018 12:36:25 +0800 Subject: [PATCH 0628/2269] drivers/tty: add error handling for pcmcia_loop_config [ Upstream commit 85c634e919bd6ef17427f26a52920aeba12e16ee ] When pcmcia_loop_config fails, the lack of error-handling code may cause unexpected results. This patch adds error-handling code after calling pcmcia_loop_config. Signed-off-by: Zhouyang Jia Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/serial_cs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/serial_cs.c b/drivers/tty/serial/8250/serial_cs.c index 933c2688dd7ea..8106353ce7aa0 100644 --- a/drivers/tty/serial/8250/serial_cs.c +++ b/drivers/tty/serial/8250/serial_cs.c @@ -637,8 +637,10 @@ static int serial_config(struct pcmcia_device *link) (link->has_func_id) && (link->socket->pcmcia_pfc == 0) && ((link->func_id == CISTPL_FUNCID_MULTI) || - (link->func_id == CISTPL_FUNCID_SERIAL))) - pcmcia_loop_config(link, serial_check_for_multi, info); + (link->func_id == CISTPL_FUNCID_SERIAL))) { + if (pcmcia_loop_config(link, serial_check_for_multi, info)) + goto failed; + } /* * Apply any multi-port quirk. -- GitLab From 0ebe95dee2f23419f4d5a379590d1f889bf22467 Mon Sep 17 00:00:00 2001 From: Zhouyang Jia Date: Mon, 11 Jun 2018 00:39:20 -0400 Subject: [PATCH 0629/2269] media: tm6000: add error handling for dvb_register_adapter [ Upstream commit e95d7c6eb94c634852eaa5ff4caf3db05b5d2e86 ] When dvb_register_adapter fails, the lack of error-handling code may cause unexpected results. This patch adds error-handling code after calling dvb_register_adapter. Signed-off-by: Zhouyang Jia [hans.verkuil@cisco.com: use pr_err and fix typo: adater -> adapter] Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/tm6000/tm6000-dvb.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/media/usb/tm6000/tm6000-dvb.c b/drivers/media/usb/tm6000/tm6000-dvb.c index 097ac321b7e1e..349f578273b6e 100644 --- a/drivers/media/usb/tm6000/tm6000-dvb.c +++ b/drivers/media/usb/tm6000/tm6000-dvb.c @@ -267,6 +267,11 @@ static int register_dvb(struct tm6000_core *dev) ret = dvb_register_adapter(&dvb->adapter, "Trident TVMaster 6000 DVB-T", THIS_MODULE, &dev->udev->dev, adapter_nr); + if (ret < 0) { + pr_err("tm6000: couldn't register the adapter!\n"); + goto err; + } + dvb->adapter.priv = dev; if (dvb->frontend) { -- GitLab From 8d9fd12b1eefa8e9138e28184d63443f36fb22f9 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 28 Jun 2018 15:28:24 +0800 Subject: [PATCH 0630/2269] ALSA: hda: Add AZX_DCAPS_PM_RUNTIME for AMD Raven Ridge [ Upstream commit 1adca4b0cd65c14cb8b8c9c257720385869c3d5f ] This patch can make audio controller in AMD Raven Ridge gets runtime suspended to D3, to save ~1W power when it's not in use. Cc: Vijendar Mukunda Signed-off-by: Kai-Heng Feng Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 22c13ad6a9ae4..873d9824fbcff 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2510,7 +2510,8 @@ static const struct pci_device_id azx_ids[] = { .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB }, /* AMD Raven */ { PCI_DEVICE(0x1022, 0x15e3), - .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB }, + .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB | + AZX_DCAPS_PM_RUNTIME }, /* ATI HDMI */ { PCI_DEVICE(0x1002, 0x0002), .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, -- GitLab From 0f4ca55e441cf3bdf3b3d6467690d58adc681d44 Mon Sep 17 00:00:00 2001 From: Brandon Maier Date: Tue, 26 Jun 2018 12:50:50 -0500 Subject: [PATCH 0631/2269] net: phy: xgmiitorgmii: Check read_status results [ Upstream commit 8d0752d11312be830c33e84dfd1016e6a47c2938 ] We're ignoring the result of the attached phy device's read_status(). Return it so we can detect errors. Signed-off-by: Brandon Maier Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/xilinx_gmii2rgmii.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c index 2e5150b0b8d52..449b313190109 100644 --- a/drivers/net/phy/xilinx_gmii2rgmii.c +++ b/drivers/net/phy/xilinx_gmii2rgmii.c @@ -40,8 +40,11 @@ static int xgmiitorgmii_read_status(struct phy_device *phydev) { struct gmii2rgmii *priv = phydev->priv; u16 val = 0; + int err; - priv->phy_drv->read_status(phydev); + err = priv->phy_drv->read_status(phydev); + if (err < 0) + return err; val = mdiobus_read(phydev->mdio.bus, priv->addr, XILINX_GMII2RGMII_REG); val &= ~XILINX_GMII2RGMII_SPEED_MASK; -- GitLab From accb431813bf94cf619044e8cdc8edfdcc613cd9 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Mon, 18 Jun 2018 17:00:56 +0300 Subject: [PATCH 0632/2269] ath10k: protect ath10k_htt_rx_ring_free with rx_ring.lock [ Upstream commit 168f75f11fe68455e0d058a818ebccfc329d8685 ] While debugging driver crashes related to a buggy firmware crashing under load, I noticed that ath10k_htt_rx_ring_free could be called without being under lock. I'm not sure if this is the root cause of the crash or not, but it seems prudent to protect it. Originally tested on 4.16+ kernel with ath10k-ct 10.4 firmware running on 9984 NIC. Signed-off-by: Ben Greear Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/htt_rx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 0aeeb233af780..21642bab485a1 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -215,11 +215,12 @@ int ath10k_htt_rx_ring_refill(struct ath10k *ar) spin_lock_bh(&htt->rx_ring.lock); ret = ath10k_htt_rx_ring_fill_n(htt, (htt->rx_ring.fill_level - htt->rx_ring.fill_cnt)); - spin_unlock_bh(&htt->rx_ring.lock); if (ret) ath10k_htt_rx_ring_free(htt); + spin_unlock_bh(&htt->rx_ring.lock); + return ret; } @@ -231,7 +232,9 @@ void ath10k_htt_rx_free(struct ath10k_htt *htt) skb_queue_purge(&htt->rx_in_ord_compl_q); skb_queue_purge(&htt->tx_fetch_ind_q); + spin_lock_bh(&htt->rx_ring.lock); ath10k_htt_rx_ring_free(htt); + spin_unlock_bh(&htt->rx_ring.lock); dma_free_coherent(htt->ar->dev, (htt->rx_ring.size * -- GitLab From 3f7056e1822d648f8022997497edc6cad2ad1e73 Mon Sep 17 00:00:00 2001 From: Brandon Maier Date: Tue, 26 Jun 2018 12:50:48 -0500 Subject: [PATCH 0633/2269] net: phy: xgmiitorgmii: Check phy_driver ready before accessing [ Upstream commit ab4e6ee578e88a659938db8fbf33720bc048d29c ] Since a phy_device is added to the global mdio_bus list during phy_device_register(), but a phy_device's phy_driver doesn't get attached until phy_probe(). It's possible of_phy_find_device() in xgmiitorgmii will return a valid phy with a NULL phy_driver. Leading to a NULL pointer access during the memcpy(). Fixes this Oops: Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = c0004000 [00000000] *pgd=00000000 Internal error: Oops: 5 [#1] PREEMPT SMP ARM Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.14.40 #1 Hardware name: Xilinx Zynq Platform task: ce4c8d00 task.stack: ce4ca000 PC is at memcpy+0x48/0x330 LR is at xgmiitorgmii_probe+0x90/0xe8 pc : [] lr : [] psr: 20000013 sp : ce4cbb54 ip : 00000000 fp : ce4cbb8c r10: 00000000 r9 : 00000000 r8 : c0c49178 r7 : 00000000 r6 : cdc14718 r5 : ce762800 r4 : cdc14710 r3 : 00000000 r2 : 00000054 r1 : 00000000 r0 : cdc14718 Flags: nzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 18c5387d Table: 0000404a DAC: 00000051 Process swapper/0 (pid: 1, stack limit = 0xce4ca210) ... [] (memcpy) from [] (xgmiitorgmii_probe+0x90/0xe8) [] (xgmiitorgmii_probe) from [] (mdio_probe+0x28/0x34) [] (mdio_probe) from [] (driver_probe_device+0x254/0x414) [] (driver_probe_device) from [] (__device_attach_driver+0xac/0x10c) [] (__device_attach_driver) from [] (bus_for_each_drv+0x84/0xc8) [] (bus_for_each_drv) from [] (__device_attach+0xd0/0x134) [] (__device_attach) from [] (device_initial_probe+0x1c/0x20) [] (device_initial_probe) from [] (bus_probe_device+0x98/0xa0) [] (bus_probe_device) from [] (device_add+0x43c/0x5d0) [] (device_add) from [] (mdio_device_register+0x34/0x80) [] (mdio_device_register) from [] (of_mdiobus_register+0x170/0x30c) [] (of_mdiobus_register) from [] (macb_probe+0x710/0xc00) [] (macb_probe) from [] (platform_drv_probe+0x44/0x80) [] (platform_drv_probe) from [] (driver_probe_device+0x254/0x414) [] (driver_probe_device) from [] (__driver_attach+0x10c/0x118) [] (__driver_attach) from [] (bus_for_each_dev+0x8c/0xd0) [] (bus_for_each_dev) from [] (driver_attach+0x2c/0x30) [] (driver_attach) from [] (bus_add_driver+0x50/0x260) [] (bus_add_driver) from [] (driver_register+0x88/0x108) [] (driver_register) from [] (__platform_driver_register+0x50/0x58) [] (__platform_driver_register) from [] (macb_driver_init+0x24/0x28) [] (macb_driver_init) from [] (do_one_initcall+0x60/0x1a4) [] (do_one_initcall) from [] (kernel_init_freeable+0x15c/0x1f8) [] (kernel_init_freeable) from [] (kernel_init+0x18/0x124) [] (kernel_init) from [] (ret_from_fork+0x14/0x20) Code: ba000002 f5d1f03c f5d1f05c f5d1f07c (e8b151f8) ---[ end trace 3e4ec21905820a1f ]--- Signed-off-by: Brandon Maier Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/xilinx_gmii2rgmii.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c index 449b313190109..7a14e8170e826 100644 --- a/drivers/net/phy/xilinx_gmii2rgmii.c +++ b/drivers/net/phy/xilinx_gmii2rgmii.c @@ -84,6 +84,11 @@ static int xgmiitorgmii_probe(struct mdio_device *mdiodev) return -EPROBE_DEFER; } + if (!priv->phy_dev->drv) { + dev_info(dev, "Attached phy not ready\n"); + return -EPROBE_DEFER; + } + priv->addr = mdiodev->addr; priv->phy_drv = priv->phy_dev->drv; memcpy(&priv->conv_phy_drv, priv->phy_dev->drv, -- GitLab From c1283a6270a257a3f0658b6b2cef6332e2649df8 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Mon, 25 Jun 2018 14:02:46 +0200 Subject: [PATCH 0634/2269] drm/sun4i: Fix releasing node when enumerating enpoints [ Upstream commit 367c359aa8637b15ee8df6335c5a29b7623966ec ] sun4i_drv_add_endpoints() has a memory leak since it uses of_node_put() when remote is equal to NULL and does nothing when remote has a valid pointer. Invert the logic to fix memory leak. Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20180625120304.7543-7-jernej.skrabec@siol.net Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/sun4i/sun4i_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index ace59651892fb..8d3c8070ed862 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -241,7 +241,6 @@ static int sun4i_drv_add_endpoints(struct device *dev, remote = of_graph_get_remote_port_parent(ep); if (!remote) { DRM_DEBUG_DRIVER("Error retrieving the output node\n"); - of_node_put(remote); continue; } @@ -255,11 +254,13 @@ static int sun4i_drv_add_endpoints(struct device *dev, if (of_graph_parse_endpoint(ep, &endpoint)) { DRM_DEBUG_DRIVER("Couldn't parse endpoint\n"); + of_node_put(remote); continue; } if (!endpoint.id) { DRM_DEBUG_DRIVER("Endpoint is our panel... skipping\n"); + of_node_put(remote); continue; } } -- GitLab From 3c7f6b2cf6d696baf651125ba6a208fb755882b6 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 18 Jun 2018 17:00:49 +0300 Subject: [PATCH 0635/2269] ath10k: transmit queued frames after processing rx packets [ Upstream commit 3f04950f32d5d592ab4fcaecac2178558a6f7437 ] When running iperf on ath10k SDIO, TX can stop working: iperf -c 192.168.1.1 -i 1 -t 20 -w 10K [ 3] 0.0- 1.0 sec 2.00 MBytes 16.8 Mbits/sec [ 3] 1.0- 2.0 sec 3.12 MBytes 26.2 Mbits/sec [ 3] 2.0- 3.0 sec 3.25 MBytes 27.3 Mbits/sec [ 3] 3.0- 4.0 sec 655 KBytes 5.36 Mbits/sec [ 3] 4.0- 5.0 sec 0.00 Bytes 0.00 bits/sec [ 3] 5.0- 6.0 sec 0.00 Bytes 0.00 bits/sec [ 3] 6.0- 7.0 sec 0.00 Bytes 0.00 bits/sec [ 3] 7.0- 8.0 sec 0.00 Bytes 0.00 bits/sec [ 3] 8.0- 9.0 sec 0.00 Bytes 0.00 bits/sec [ 3] 9.0-10.0 sec 0.00 Bytes 0.00 bits/sec [ 3] 0.0-10.3 sec 9.01 MBytes 7.32 Mbits/sec There are frames in the ieee80211_txq and there are frames that have been removed from from this queue, but haven't yet been sent on the wire (num_pending_tx). When num_pending_tx reaches max_num_pending_tx, we will stop the queues by calling ieee80211_stop_queues(). As frames that have previously been sent for transmission (num_pending_tx) are completed, we will decrease num_pending_tx and wake the queues by calling ieee80211_wake_queue(). ieee80211_wake_queue() does not call wake_tx_queue, so we might still have frames in the queue at this point. While the queues were stopped, the socket buffer might have filled up, and in order for user space to write more, we need to free the frames in the queue, since they are accounted to the socket. In order to free them, we first need to transmit them. This problem cannot be reproduced on low-latency devices, e.g. pci, since they call ath10k_mac_tx_push_pending() from ath10k_htt_txrx_compl_task(). ath10k_htt_txrx_compl_task() is not called on high-latency devices. Fix the problem by calling ath10k_mac_tx_push_pending(), after processing rx packets, just like for low-latency devices, also in the SDIO case. Since we are calling ath10k_mac_tx_push_pending() directly, we also need to export it. Signed-off-by: Niklas Cassel Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/mac.c | 1 + drivers/net/wireless/ath/ath10k/sdio.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 6fa9c223ff93b..cdcfb175ad9b8 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -4015,6 +4015,7 @@ void ath10k_mac_tx_push_pending(struct ath10k *ar) rcu_read_unlock(); spin_unlock_bh(&ar->txqs_lock); } +EXPORT_SYMBOL(ath10k_mac_tx_push_pending); /************/ /* Scanning */ diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c index c5e6f4254e49c..da9dbf3ddaa5e 100644 --- a/drivers/net/wireless/ath/ath10k/sdio.c +++ b/drivers/net/wireless/ath/ath10k/sdio.c @@ -30,6 +30,7 @@ #include "debug.h" #include "hif.h" #include "htc.h" +#include "mac.h" #include "targaddrs.h" #include "trace.h" #include "sdio.h" @@ -1346,6 +1347,8 @@ static void ath10k_sdio_irq_handler(struct sdio_func *func) break; } while (time_before(jiffies, timeout) && !done); + ath10k_mac_tx_push_pending(ar); + sdio_claim_host(ar_sdio->func); if (ret && ret != -ECANCELED) -- GitLab From 62bd8064fa8820d4fc5dc3c504e25522f9f18f90 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 5 Jun 2018 14:31:39 +0300 Subject: [PATCH 0636/2269] rndis_wlan: potential buffer overflow in rndis_wlan_auth_indication() [ Upstream commit ae636fb1554833ee5133ca47bf4b2791b6739c52 ] This is a static checker fix, not something I have tested. The issue is that on the second iteration through the loop, we jump forward by le32_to_cpu(auth_req->length) bytes. The problem is that if the length is more than "buflen" then we end up with a negative "buflen". A negative buflen is type promoted to a high positive value and the loop continues but it's accessing beyond the end of the buffer. I believe the "auth_req->length" comes from the firmware and if the firmware is malicious or buggy, you're already toasted so the impact of this bug is probably not very severe. Fixes: 030645aceb3d ("rndis_wlan: handle 802.11 indications from device") Signed-off-by: Dan Carpenter Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rndis_wlan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c index 9935bd09db1fb..d4947e3a909ec 100644 --- a/drivers/net/wireless/rndis_wlan.c +++ b/drivers/net/wireless/rndis_wlan.c @@ -2928,6 +2928,8 @@ static void rndis_wlan_auth_indication(struct usbnet *usbdev, while (buflen >= sizeof(*auth_req)) { auth_req = (void *)buf; + if (buflen < le32_to_cpu(auth_req->length)) + return; type = "unknown"; flags = le32_to_cpu(auth_req->flags); pairwise_error = false; -- GitLab From dad01c56989a91c1bbce8b8ae7eab124bb11dac7 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sun, 17 Jun 2018 12:33:50 +0200 Subject: [PATCH 0637/2269] brcmsmac: fix wrap around in conversion from constant to s16 [ Upstream commit c9a61469fc97672a08b2f798830a55ea6e03dc4a ] The last value in the log_table wraps around to a negative value since s16 has a value range of -32768 to 32767. This is not what the table intends to represent. Use the closest positive value 32767. This fixes a warning seen with clang: drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_qmath.c:216:2: warning: implicit conversion from 'int' to 's16' (aka 'short') changes value from 32768 to -32768 [-Wconstant-conversion] 32768 ^~~~~ 1 warning generated. Fixes: 4c0bfeaae9f9 ("brcmsmac: fix array out-of-bounds access in qm_log10") Cc: Tobias Regnery Signed-off-by: Stefan Agner Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_qmath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_qmath.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_qmath.c index b9672da24a9d3..b24bc57ca91b8 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_qmath.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_qmath.c @@ -213,7 +213,7 @@ static const s16 log_table[] = { 30498, 31267, 32024, - 32768 + 32767 }; #define LOG_TABLE_SIZE 32 /* log_table size */ -- GitLab From e87efc44dd36ba3db59847c418354711ebad779b Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 19 Jun 2018 02:43:35 -0700 Subject: [PATCH 0638/2269] wlcore: Add missing PM call for wlcore_cmd_wait_for_event_or_timeout() [ Upstream commit 4ec7cece87b3ed21ffcd407c62fb2f151a366bc1 ] Otherwise we can get: WARNING: CPU: 0 PID: 55 at drivers/net/wireless/ti/wlcore/io.h:84 I've only seen this few times with the runtime PM patches enabled so this one is probably not needed before that. This seems to work currently based on the current PM implementation timer. Let's apply this separately though in case others are hitting this issue. Signed-off-by: Tony Lindgren Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ti/wlcore/cmd.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/ti/wlcore/cmd.c b/drivers/net/wireless/ti/wlcore/cmd.c index 761cf8573a805..f48c3f62966d4 100644 --- a/drivers/net/wireless/ti/wlcore/cmd.c +++ b/drivers/net/wireless/ti/wlcore/cmd.c @@ -35,6 +35,7 @@ #include "wl12xx_80211.h" #include "cmd.h" #include "event.h" +#include "ps.h" #include "tx.h" #include "hw_ops.h" @@ -191,6 +192,10 @@ int wlcore_cmd_wait_for_event_or_timeout(struct wl1271 *wl, timeout_time = jiffies + msecs_to_jiffies(WL1271_EVENT_TIMEOUT); + ret = wl1271_ps_elp_wakeup(wl); + if (ret < 0) + return ret; + do { if (time_after(jiffies, timeout_time)) { wl1271_debug(DEBUG_CMD, "timeout waiting for event %d", @@ -222,6 +227,7 @@ int wlcore_cmd_wait_for_event_or_timeout(struct wl1271 *wl, } while (!event); out: + wl1271_ps_elp_sleep(wl); kfree(events_vector); return ret; } -- GitLab From 5ef7a3782de84729d021a702341e33cccd06f32c Mon Sep 17 00:00:00 2001 From: Ethan Tuttle Date: Tue, 19 Jun 2018 21:31:08 -0700 Subject: [PATCH 0639/2269] ARM: mvebu: declare asm symbols as character arrays in pmsu.c [ Upstream commit d0d378ff451a66e486488eec842e507d28145813 ] With CONFIG_FORTIFY_SOURCE, memcpy uses the declared size of operands to detect buffer overflows. If src or dest is declared as a char, attempts to copy more than byte will result in a fortify_panic(). Address this problem in mvebu_setup_boot_addr_wa() by declaring mvebu_boot_wa_start and mvebu_boot_wa_end as character arrays. Also remove a couple addressof operators to avoid "arithmetic on pointer to an incomplete type" compiler error. See commit 54a7d50b9205 ("x86: mark kprobe templates as character arrays, not single characters") for a similar fix. Fixes "detected buffer overflow in memcpy" error during init on some mvebu systems (armada-370-xp, armada-375): (fortify_panic) from (mvebu_setup_boot_addr_wa+0xb0/0xb4) (mvebu_setup_boot_addr_wa) from (mvebu_v7_cpu_pm_init+0x154/0x204) (mvebu_v7_cpu_pm_init) from (do_one_initcall+0x7c/0x1a8) (do_one_initcall) from (kernel_init_freeable+0x1bc/0x254) (kernel_init_freeable) from (kernel_init+0x8/0x114) (kernel_init) from (ret_from_fork+0x14/0x2c) Signed-off-by: Ethan Tuttle Tested-by: Ethan Tuttle Signed-off-by: Gregory CLEMENT Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-mvebu/pmsu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-mvebu/pmsu.c b/arch/arm/mach-mvebu/pmsu.c index 27a78c80e5b17..73d5d72dfc3e5 100644 --- a/arch/arm/mach-mvebu/pmsu.c +++ b/arch/arm/mach-mvebu/pmsu.c @@ -116,8 +116,8 @@ void mvebu_pmsu_set_cpu_boot_addr(int hw_cpu, void *boot_addr) PMSU_BOOT_ADDR_REDIRECT_OFFSET(hw_cpu)); } -extern unsigned char mvebu_boot_wa_start; -extern unsigned char mvebu_boot_wa_end; +extern unsigned char mvebu_boot_wa_start[]; +extern unsigned char mvebu_boot_wa_end[]; /* * This function sets up the boot address workaround needed for SMP @@ -130,7 +130,7 @@ int mvebu_setup_boot_addr_wa(unsigned int crypto_eng_target, phys_addr_t resume_addr_reg) { void __iomem *sram_virt_base; - u32 code_len = &mvebu_boot_wa_end - &mvebu_boot_wa_start; + u32 code_len = mvebu_boot_wa_end - mvebu_boot_wa_start; mvebu_mbus_del_window(BOOTROM_BASE, BOOTROM_SIZE); mvebu_mbus_add_window_by_id(crypto_eng_target, crypto_eng_attribute, -- GitLab From 69cb15d6596de0e91877b19af8353a4f0192e971 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 25 May 2018 16:01:49 +0530 Subject: [PATCH 0640/2269] arm: dts: mediatek: Add missing cooling device properties for CPUs [ Upstream commit 0c7f7a5150023f3c6f0b27c4d4940ce3dfaf62cc ] The cooling device properties, like "#cooling-cells" and "dynamic-power-coefficient", should either be present for all the CPUs of a cluster or none. If these are present only for a subset of CPUs of a cluster then things will start falling apart as soon as the CPUs are brought online in a different order. For example, this will happen because the operating system looks for such properties in the CPU node it is trying to bring up, so that it can register a cooling device. Add such missing properties. Signed-off-by: Viresh Kumar Signed-off-by: Matthias Brugger Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/mt7623.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/boot/dts/mt7623.dtsi b/arch/arm/boot/dts/mt7623.dtsi index 1853573235724..028cf4a5887fc 100644 --- a/arch/arm/boot/dts/mt7623.dtsi +++ b/arch/arm/boot/dts/mt7623.dtsi @@ -98,6 +98,7 @@ compatible = "arm,cortex-a7"; reg = <0x1>; operating-points-v2 = <&cpu_opp_table>; + #cooling-cells = <2>; clock-frequency = <1300000000>; }; @@ -106,6 +107,7 @@ compatible = "arm,cortex-a7"; reg = <0x2>; operating-points-v2 = <&cpu_opp_table>; + #cooling-cells = <2>; clock-frequency = <1300000000>; }; @@ -114,6 +116,7 @@ compatible = "arm,cortex-a7"; reg = <0x3>; operating-points-v2 = <&cpu_opp_table>; + #cooling-cells = <2>; clock-frequency = <1300000000>; }; }; -- GitLab From b8e30b822d08fbacd4e023336d9035dac2736bc8 Mon Sep 17 00:00:00 2001 From: Zhouyang Jia Date: Thu, 14 Jun 2018 21:37:17 +0800 Subject: [PATCH 0641/2269] HID: hid-ntrig: add error handling for sysfs_create_group [ Upstream commit 44d4d51de9a3534a2b63d69efda02a10e66541e4 ] When sysfs_create_group fails, the lack of error-handling code may cause unexpected results. This patch adds error-handling code after calling sysfs_create_group. Signed-off-by: Zhouyang Jia Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-ntrig.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-ntrig.c b/drivers/hid/hid-ntrig.c index 3d121d8ee980b..5d2d746e35f60 100644 --- a/drivers/hid/hid-ntrig.c +++ b/drivers/hid/hid-ntrig.c @@ -955,6 +955,8 @@ static int ntrig_probe(struct hid_device *hdev, const struct hid_device_id *id) ret = sysfs_create_group(&hdev->dev.kobj, &ntrig_attribute_group); + if (ret) + hid_err(hdev, "cannot create sysfs group\n"); return 0; err_free: -- GitLab From 85222eb56f2a5a92fdf1cf95cfd8ea57cc3864ff Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 16 Apr 2018 23:47:43 +0900 Subject: [PATCH 0642/2269] MIPS: boot: fix build rule of vmlinux.its.S [ Upstream commit 67e09db507db3e1642ddce512a4313d20addd6e5 ] As Documentation/kbuild/makefile.txt says, it is a typical mistake to forget the FORCE prerequisite for the rule invoked by if_changed. Add the FORCE to the prerequisite, but it must be filtered-out from the files passed to the 'cat' command. Because this rule generates .vmlinux.its.S.cmd, vmlinux.its.S must be specified as targets so that the .cmd file is included. Signed-off-by: Masahiro Yamada Patchwork: https://patchwork.linux-mips.org/patch/19097/ Signed-off-by: Paul Burton Cc: Kees Cook Cc: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/mips/boot/Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/mips/boot/Makefile b/arch/mips/boot/Makefile index c22da16d67b82..5c7bfa8478e75 100644 --- a/arch/mips/boot/Makefile +++ b/arch/mips/boot/Makefile @@ -118,10 +118,12 @@ ifeq ($(ADDR_BITS),64) itb_addr_cells = 2 endif +targets += vmlinux.its.S + quiet_cmd_its_cat = CAT $@ - cmd_its_cat = cat $^ >$@ + cmd_its_cat = cat $(filter-out $(PHONY), $^) >$@ -$(obj)/vmlinux.its.S: $(addprefix $(srctree)/arch/mips/$(PLATFORM)/,$(ITS_INPUTS)) +$(obj)/vmlinux.its.S: $(addprefix $(srctree)/arch/mips/$(PLATFORM)/,$(ITS_INPUTS)) FORCE $(call if_changed,its_cat) quiet_cmd_cpp_its_S = ITS $@ -- GitLab From d5963fae7f3691e58fe7eeb02bbcf6eaac981676 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 5 Jun 2018 08:38:45 -0700 Subject: [PATCH 0643/2269] perf/x86/intel/lbr: Fix incomplete LBR call stack [ Upstream commit 0592e57b24e7e05ec1f4c50b9666c013abff7017 ] LBR has a limited stack size. If a task has a deeper call stack than LBR's stack size, only the overflowed part is reported. A complete call stack may not be reconstructed by perf tool. Current code doesn't access all LBR registers. It only read the ones below the TOS. The LBR registers above the TOS will be discarded unconditionally. When a CALL is captured, the TOS is incremented by 1 , modulo max LBR stack size. The LBR HW only records the call stack information to the register which the TOS points to. It will not touch other LBR registers. So the registers above the TOS probably still store the valid call stack information for an overflowed call stack, which need to be reported. To retrieve complete call stack information, we need to start from TOS, read all LBR registers until an invalid entry is detected. 0s can be used to detect the invalid entry, because: - When a RET is captured, the HW zeros the LBR register which TOS points to, then decreases the TOS. - The LBR registers are reset to 0 when adding a new LBR event or scheduling an existing LBR event. - A taken branch at IP 0 is not expected The context switch code is also modified to save/restore all valid LBR registers. Furthermore, the LBR registers, which don't have valid call stack information, need to be reset in restore, because they may be polluted while swapped out. Here is a small test program, tchain_deep. Its call stack is deeper than 32. noinline void f33(void) { int i; for (i = 0; i < 10000000;) { if (i%2) i++; else i++; } } noinline void f32(void) { f33(); } noinline void f31(void) { f32(); } ... ... noinline void f1(void) { f2(); } int main() { f1(); } Here is the test result on SKX. The max stack size of SKX is 32. Without the patch: $ perf record -e cycles --call-graph lbr -- ./tchain_deep $ perf report --stdio # # Children Self Command Shared Object Symbol # ........ ........ ........... ................ ................. # 100.00% 99.99% tchain_deep tchain_deep [.] f33 | --99.99%--f30 f31 f32 f33 With the patch: $ perf record -e cycles --call-graph lbr -- ./tchain_deep $ perf report --stdio # Children Self Command Shared Object Symbol # ........ ........ ........... ................ .................. # 99.99% 0.00% tchain_deep tchain_deep [.] f1 | ---f1 f2 f3 f4 f5 f6 f7 f8 f9 f10 f11 f12 f13 f14 f15 f16 f17 f18 f19 f20 f21 f22 f23 f24 f25 f26 f27 f28 f29 f30 f31 f32 f33 Signed-off-by: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Stephane Eranian Cc: Vince Weaver Cc: Alexander Shishkin Cc: Thomas Gleixner Cc: acme@kernel.org Cc: eranian@google.com Link: https://lore.kernel.org/lkml/1528213126-4312-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/lbr.c | 32 ++++++++++++++++++++++++++------ arch/x86/events/perf_event.h | 1 + 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index cf372b90557ed..a4170048a30bc 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -346,7 +346,7 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) mask = x86_pmu.lbr_nr - 1; tos = task_ctx->tos; - for (i = 0; i < tos; i++) { + for (i = 0; i < task_ctx->valid_lbrs; i++) { lbr_idx = (tos - i) & mask; wrlbr_from(lbr_idx, task_ctx->lbr_from[i]); wrlbr_to (lbr_idx, task_ctx->lbr_to[i]); @@ -354,6 +354,15 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); } + + for (; i < x86_pmu.lbr_nr; i++) { + lbr_idx = (tos - i) & mask; + wrlbr_from(lbr_idx, 0); + wrlbr_to(lbr_idx, 0); + if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) + wrmsrl(MSR_LBR_INFO_0 + lbr_idx, 0); + } + wrmsrl(x86_pmu.lbr_tos, tos); task_ctx->lbr_stack_state = LBR_NONE; } @@ -361,7 +370,7 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) { unsigned lbr_idx, mask; - u64 tos; + u64 tos, from; int i; if (task_ctx->lbr_callstack_users == 0) { @@ -371,13 +380,17 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) mask = x86_pmu.lbr_nr - 1; tos = intel_pmu_lbr_tos(); - for (i = 0; i < tos; i++) { + for (i = 0; i < x86_pmu.lbr_nr; i++) { lbr_idx = (tos - i) & mask; - task_ctx->lbr_from[i] = rdlbr_from(lbr_idx); + from = rdlbr_from(lbr_idx); + if (!from) + break; + task_ctx->lbr_from[i] = from; task_ctx->lbr_to[i] = rdlbr_to(lbr_idx); if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); } + task_ctx->valid_lbrs = i; task_ctx->tos = tos; task_ctx->lbr_stack_state = LBR_VALID; } @@ -531,7 +544,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) */ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) { - bool need_info = false; + bool need_info = false, call_stack = false; unsigned long mask = x86_pmu.lbr_nr - 1; int lbr_format = x86_pmu.intel_cap.lbr_format; u64 tos = intel_pmu_lbr_tos(); @@ -542,7 +555,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) if (cpuc->lbr_sel) { need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO); if (cpuc->lbr_sel->config & LBR_CALL_STACK) - num = tos; + call_stack = true; } for (i = 0; i < num; i++) { @@ -555,6 +568,13 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) from = rdlbr_from(lbr_idx); to = rdlbr_to(lbr_idx); + /* + * Read LBR call stack entries + * until invalid entry (0s) is detected. + */ + if (call_stack && !from) + break; + if (lbr_format == LBR_FORMAT_INFO && need_info) { u64 info; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index dc4728eccfd86..c6698c63c047b 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -646,6 +646,7 @@ struct x86_perf_task_context { u64 lbr_to[MAX_LBR_ENTRIES]; u64 lbr_info[MAX_LBR_ENTRIES]; int tos; + int valid_lbrs; int lbr_callstack_users; int lbr_stack_state; }; -- GitLab From 13ab355240a9dc5be2ca9e5aba56932c64ee66ff Mon Sep 17 00:00:00 2001 From: Zhouyang Jia Date: Tue, 12 Jun 2018 11:13:00 +0800 Subject: [PATCH 0644/2269] scsi: bnx2i: add error handling for ioremap_nocache [ Upstream commit aa154ea885eb0c2407457ce9c1538d78c95456fa ] When ioremap_nocache fails, the lack of error-handling code may cause unexpected results. This patch adds error-handling code after calling ioremap_nocache. Signed-off-by: Zhouyang Jia Reviewed-by: Johannes Thumshirn Acked-by: Manish Rangankar Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/bnx2i/bnx2i_hwi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c index 42921dbba9272..4ca10501647b3 100644 --- a/drivers/scsi/bnx2i/bnx2i_hwi.c +++ b/drivers/scsi/bnx2i/bnx2i_hwi.c @@ -2742,6 +2742,8 @@ int bnx2i_map_ep_dbell_regs(struct bnx2i_endpoint *ep) BNX2X_DOORBELL_PCI_BAR); reg_off = (1 << BNX2X_DB_SHIFT) * (cid_num & 0x1FFFF); ep->qp.ctx_base = ioremap_nocache(reg_base + reg_off, 4); + if (!ep->qp.ctx_base) + return -ENOMEM; goto arm_cq; } -- GitLab From a56b97a2fc2d6a2490f8aa1980738b41825cc704 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 19 Jun 2018 15:10:55 -0700 Subject: [PATCH 0645/2269] iomap: complete partial direct I/O writes synchronously [ Upstream commit ebf00be37de35788cad72f4f20b4a39e30c0be4a ] According to xfstest generic/240, applications seem to expect direct I/O writes to either complete as a whole or to fail; short direct I/O writes are apparently not appreciated. This means that when only part of an asynchronous direct I/O write succeeds, we can either fail the entire write, or we can wait for the partial write to complete and retry the remaining write as buffered I/O. The old __blockdev_direct_IO helper has code for waiting for partial writes to complete; the new iomap_dio_rw iomap helper does not. The above mentioned fallback mode is needed for gfs2, which doesn't allow block allocations under direct I/O to avoid taking cluster-wide exclusive locks. As a consequence, an asynchronous direct I/O write to a file range that contains a hole will result in a short write. In that case, wait for the short write to complete to allow gfs2 to recover. Signed-off-by: Andreas Gruenbacher Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/iomap.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/iomap.c b/fs/iomap.c index d4801f8dd4fd5..8f7673a692736 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -693,6 +693,7 @@ struct iomap_dio { atomic_t ref; unsigned flags; int error; + bool wait_for_completion; union { /* used during submission and for synchronous completion: */ @@ -793,9 +794,8 @@ static void iomap_dio_bio_end_io(struct bio *bio) iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status)); if (atomic_dec_and_test(&dio->ref)) { - if (is_sync_kiocb(dio->iocb)) { + if (dio->wait_for_completion) { struct task_struct *waiter = dio->submit.waiter; - WRITE_ONCE(dio->submit.waiter, NULL); wake_up_process(waiter); } else if (dio->flags & IOMAP_DIO_WRITE) { @@ -980,13 +980,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, dio->end_io = end_io; dio->error = 0; dio->flags = 0; + dio->wait_for_completion = is_sync_kiocb(iocb); dio->submit.iter = iter; - if (is_sync_kiocb(iocb)) { - dio->submit.waiter = current; - dio->submit.cookie = BLK_QC_T_NONE; - dio->submit.last_queue = NULL; - } + dio->submit.waiter = current; + dio->submit.cookie = BLK_QC_T_NONE; + dio->submit.last_queue = NULL; if (iov_iter_rw(iter) == READ) { if (pos >= dio->i_size) @@ -1016,7 +1015,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, WARN_ON_ONCE(ret); ret = 0; - if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) && + if (iov_iter_rw(iter) == WRITE && !dio->wait_for_completion && !inode->i_sb->s_dio_done_wq) { ret = sb_init_dio_done_wq(inode->i_sb); if (ret < 0) @@ -1031,8 +1030,10 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, iomap_dio_actor); if (ret <= 0) { /* magic error code to fall back to buffered I/O */ - if (ret == -ENOTBLK) + if (ret == -ENOTBLK) { + dio->wait_for_completion = true; ret = 0; + } break; } pos += ret; @@ -1046,7 +1047,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, iomap_dio_set_error(dio, ret); if (!atomic_dec_and_test(&dio->ref)) { - if (!is_sync_kiocb(iocb)) + if (!dio->wait_for_completion) return -EIOCBQUEUED; for (;;) { -- GitLab From c96c2f2b11b6ac4e048e1529084a77c90b626967 Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Mon, 4 Jun 2018 03:45:10 -0700 Subject: [PATCH 0646/2269] scsi: megaraid_sas: Update controller info during resume [ Upstream commit c3b10a55abc943a526aaecd7e860b15671beb906 ] There is a possibility that firmware on the controller was upgraded before system was suspended. During resume, driver needs to read updated controller properties. Signed-off-by: Shivasharan S Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid/megaraid_sas_base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 985378e4bb6ff..d55c365be2388 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -6597,6 +6597,9 @@ megasas_resume(struct pci_dev *pdev) goto fail_init_mfi; } + if (megasas_get_ctrl_info(instance) != DCMD_SUCCESS) + goto fail_init_mfi; + tasklet_init(&instance->isr_tasklet, instance->instancet->tasklet, (unsigned long)instance); -- GitLab From 3fd534a5480ec33913c3f76cc0c2de61f5db46e3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 12 Jun 2018 14:43:35 +0200 Subject: [PATCH 0647/2269] EDAC, i7core: Fix memleaks and use-after-free on probe and remove [ Upstream commit 6c974d4dfafe5e9ee754f2a6fba0eb1864f1649e ] Make sure to free and deregister the addrmatch and chancounts devices allocated during probe in all error paths. Also fix use-after-free in a probe error path and in the remove success path where the devices were being put before before deregistration. Signed-off-by: Johan Hovold Cc: Mauro Carvalho Chehab Cc: linux-edac Fixes: 356f0a30860d ("i7core_edac: change the mem allocation scheme to make Documentation/kobject.txt happy") Link: http://lkml.kernel.org/r/20180612124335.6420-2-johan@kernel.org Signed-off-by: Borislav Petkov Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/edac/i7core_edac.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index c16c3b931b3d0..6c7d5f20eacbd 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -1177,15 +1177,14 @@ static int i7core_create_sysfs_devices(struct mem_ctl_info *mci) rc = device_add(pvt->addrmatch_dev); if (rc < 0) - return rc; + goto err_put_addrmatch; if (!pvt->is_registered) { pvt->chancounts_dev = kzalloc(sizeof(*pvt->chancounts_dev), GFP_KERNEL); if (!pvt->chancounts_dev) { - put_device(pvt->addrmatch_dev); - device_del(pvt->addrmatch_dev); - return -ENOMEM; + rc = -ENOMEM; + goto err_del_addrmatch; } pvt->chancounts_dev->type = &all_channel_counts_type; @@ -1199,9 +1198,18 @@ static int i7core_create_sysfs_devices(struct mem_ctl_info *mci) rc = device_add(pvt->chancounts_dev); if (rc < 0) - return rc; + goto err_put_chancounts; } return 0; + +err_put_chancounts: + put_device(pvt->chancounts_dev); +err_del_addrmatch: + device_del(pvt->addrmatch_dev); +err_put_addrmatch: + put_device(pvt->addrmatch_dev); + + return rc; } static void i7core_delete_sysfs_devices(struct mem_ctl_info *mci) @@ -1211,11 +1219,11 @@ static void i7core_delete_sysfs_devices(struct mem_ctl_info *mci) edac_dbg(1, "\n"); if (!pvt->is_registered) { - put_device(pvt->chancounts_dev); device_del(pvt->chancounts_dev); + put_device(pvt->chancounts_dev); } - put_device(pvt->addrmatch_dev); device_del(pvt->addrmatch_dev); + put_device(pvt->addrmatch_dev); } /**************************************************************************** -- GitLab From 05f78b1a0e0c7d76bd97159e9a575165e7fdc6c6 Mon Sep 17 00:00:00 2001 From: Liam Girdwood Date: Thu, 14 Jun 2018 20:26:42 +0100 Subject: [PATCH 0648/2269] ASoC: dapm: Fix potential DAI widget pointer deref when linking DAIs [ Upstream commit e01b4f624278d5efe5fb5da585ca371947b16680 ] Sometime a component or topology may configure a DAI widget with no private data leading to a dev_dbg() dereferencne of this data. Fix this to check for non NULL private data and let users know if widget is missing DAI. Signed-off-by: Liam Girdwood Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-dapm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 1c9f6a0d234f0..53c9d75256393 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -4005,6 +4005,13 @@ int snd_soc_dapm_link_dai_widgets(struct snd_soc_card *card) continue; } + /* let users know there is no DAI to link */ + if (!dai_w->priv) { + dev_dbg(card->dev, "dai widget %s has no DAI\n", + dai_w->name); + continue; + } + dai = dai_w->priv; /* ...find all widgets with the same stream and link them */ -- GitLab From 5bcbbadf6ac54568019720be6d434bf7020fe3ed Mon Sep 17 00:00:00 2001 From: Jessica Yu Date: Tue, 5 Jun 2018 10:22:52 +0200 Subject: [PATCH 0649/2269] module: exclude SHN_UNDEF symbols from kallsyms api [ Upstream commit 9f2d1e68cf4d641def734adaccfc3823d3575e6c ] Livepatch modules are special in that we preserve their entire symbol tables in order to be able to apply relocations after module load. The unwanted side effect of this is that undefined (SHN_UNDEF) symbols of livepatch modules are accessible via the kallsyms api and this can confuse symbol resolution in livepatch (klp_find_object_symbol()) and cause subtle bugs in livepatch. Have the module kallsyms api skip over SHN_UNDEF symbols. These symbols are usually not available for normal modules anyway as we cut down their symbol tables to just the core (non-undefined) symbols, so this should really just affect livepatch modules. Note that this patch doesn't affect the display of undefined symbols in /proc/kallsyms. Reported-by: Josh Poimboeuf Tested-by: Josh Poimboeuf Reviewed-by: Josh Poimboeuf Signed-off-by: Jessica Yu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/module.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/module.c b/kernel/module.c index 321b0b1f87e7a..2a44c515f0d74 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -4058,7 +4058,7 @@ static unsigned long mod_find_symname(struct module *mod, const char *name) for (i = 0; i < kallsyms->num_symtab; i++) if (strcmp(name, symname(kallsyms, i)) == 0 && - kallsyms->symtab[i].st_info != 'U') + kallsyms->symtab[i].st_shndx != SHN_UNDEF) return kallsyms->symtab[i].st_value; return 0; } @@ -4104,6 +4104,10 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, if (mod->state == MODULE_STATE_UNFORMED) continue; for (i = 0; i < kallsyms->num_symtab; i++) { + + if (kallsyms->symtab[i].st_shndx == SHN_UNDEF) + continue; + ret = fn(data, symname(kallsyms, i), mod, kallsyms->symtab[i].st_value); if (ret != 0) -- GitLab From de6ccdbd77345349b0134590a71dfcee5e4da791 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 4 Jun 2018 06:58:14 -0700 Subject: [PATCH 0650/2269] gpio: Fix wrong rounding in gpio-menz127 [ Upstream commit 7279d9917560bbd0d82813d6bf00490a82c06783 ] men_z127_debounce() tries to round up and down, but uses functions which are only suitable when the divider is a power of two, which is not the case. Use the appropriate ones. Found by static check. Compile tested. Fixes: f436bc2726c64 ("gpio: add driver for MEN 16Z127 GPIO controller") Signed-off-by: Nadav Amit Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-menz127.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-menz127.c b/drivers/gpio/gpio-menz127.c index e1037582e34d7..b2635326546e7 100644 --- a/drivers/gpio/gpio-menz127.c +++ b/drivers/gpio/gpio-menz127.c @@ -56,9 +56,9 @@ static int men_z127_debounce(struct gpio_chip *gc, unsigned gpio, rnd = fls(debounce) - 1; if (rnd && (debounce & BIT(rnd - 1))) - debounce = round_up(debounce, MEN_Z127_DB_MIN_US); + debounce = roundup(debounce, MEN_Z127_DB_MIN_US); else - debounce = round_down(debounce, MEN_Z127_DB_MIN_US); + debounce = rounddown(debounce, MEN_Z127_DB_MIN_US); if (debounce > MEN_Z127_DB_MAX_US) debounce = MEN_Z127_DB_MAX_US; -- GitLab From a4f7bea878871cffe39618696462e6e4881f4d6e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 13 Jun 2018 15:21:35 -0400 Subject: [PATCH 0651/2269] nfsd: fix corrupted reply to badly ordered compound [ Upstream commit 5b7b15aee641904ae269be9846610a3950cbd64c ] We're encoding a single op in the reply but leaving the number of ops zero, so the reply makes no sense. Somewhat academic as this isn't a case any real client will hit, though in theory perhaps that could change in a future protocol extension. Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5b6ff168d11aa..6d16399a350e0 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1725,6 +1725,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) if (status) { op = &args->ops[0]; op->status = status; + resp->opcnt = 1; goto encode_op; } -- GitLab From 0c4439c4441608e79d4a0dcb3cd81b9becf918c2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 12 Jun 2018 14:43:34 +0200 Subject: [PATCH 0652/2269] EDAC: Fix memleak in module init error path [ Upstream commit 4708aa85d50cc6e962dfa8acf5ad4e0d290a21db ] Make sure to use put_device() to free the initialised struct device so that resources managed by driver core also gets released in the event of a registration failure. Signed-off-by: Johan Hovold Cc: Denis Kirjanov Cc: Mauro Carvalho Chehab Cc: linux-edac Fixes: 2d56b109e3a5 ("EDAC: Handle error path in edac_mc_sysfs_init() properly") Link: http://lkml.kernel.org/r/20180612124335.6420-1-johan@kernel.org Signed-off-by: Borislav Petkov Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/edac/edac_mc_sysfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index e4fcfa84fbd35..79c13301bf416 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -1097,14 +1097,14 @@ int __init edac_mc_sysfs_init(void) err = device_add(mci_pdev); if (err < 0) - goto out_dev_free; + goto out_put_device; edac_dbg(0, "device %s created\n", dev_name(mci_pdev)); return 0; - out_dev_free: - kfree(mci_pdev); + out_put_device: + put_device(mci_pdev); out: return err; } -- GitLab From 91e30cae8903ad56b44e36e97b2eddb77675151c Mon Sep 17 00:00:00 2001 From: Konstantin Khorenko Date: Fri, 8 Jun 2018 17:27:11 +0300 Subject: [PATCH 0653/2269] fs/lock: skip lock owner pid translation in case we are in init_pid_ns [ Upstream commit 826d7bc9f013d01e92997883d2fd0c25f4af1f1c ] If the flock owner process is dead and its pid has been already freed, pid translation won't work, but we still want to show flock owner pid number when expecting /proc/$PID/fdinfo/$FD in init pidns. Reproducer: process A process A1 process A2 fork()---------> exit() open() flock() fork()---------> exit() sleep() Before the patch: ================ (root@vz7)/: cat /proc/${PID_A2}/fdinfo/3 pos: 4 flags: 02100002 mnt_id: 257 lock: (root@vz7)/: After the patch: =============== (root@vz7)/:cat /proc/${PID_A2}/fdinfo/3 pos: 4 flags: 02100002 mnt_id: 295 lock: 1: FLOCK ADVISORY WRITE ${PID_A1} b6:f8a61:529946 0 EOF Fixes: 9d5b86ac13c5 ("fs/locks: Remove fl_nspid and use fs-specific l_pid for remote locks") Signed-off-by: Konstantin Khorenko Acked-by: Andrey Vagin Reviewed-by: Benjamin Coddington Signed-off-by: Jeff Layton Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/locks.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/locks.c b/fs/locks.c index 1bd71c4d663a8..665e3ce9ab476 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2074,6 +2074,13 @@ static pid_t locks_translate_pid(struct file_lock *fl, struct pid_namespace *ns) return -1; if (IS_REMOTELCK(fl)) return fl->fl_pid; + /* + * If the flock owner process is dead and its pid has been already + * freed, the translation below won't work, but we still want to show + * flock owner pid number in init pidns. + */ + if (ns == &init_pid_ns) + return (pid_t)fl->fl_pid; rcu_read_lock(); pid = find_pid_ns(fl->fl_pid, &init_pid_ns); -- GitLab From a82a772da750843c30efdd3434cd5a0937193cf6 Mon Sep 17 00:00:00 2001 From: Oleksandr Andrushchenko Date: Tue, 12 Jun 2018 15:03:36 -0700 Subject: [PATCH 0654/2269] Input: xen-kbdfront - fix multi-touch XenStore node's locations [ Upstream commit ce6f7d087e2b037f47349c1c36ac97678d02e394 ] kbdif protocol describes multi-touch device parameters as a part of frontend's XenBus configuration nodes while they belong to backend's configuration. Fix this by reading the parameters as defined by the protocol. Fixes: 49aac8204da5 ("Input: xen-kbdfront - add multi-touch support") Signed-off-by: Oleksandr Andrushchenko Reviewed-by: Juergen Gross Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/input/misc/xen-kbdfront.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c index d91f3b1c53755..92d7396490225 100644 --- a/drivers/input/misc/xen-kbdfront.c +++ b/drivers/input/misc/xen-kbdfront.c @@ -229,7 +229,7 @@ static int xenkbd_probe(struct xenbus_device *dev, } } - touch = xenbus_read_unsigned(dev->nodename, + touch = xenbus_read_unsigned(dev->otherend, XENKBD_FIELD_FEAT_MTOUCH, 0); if (touch) { ret = xenbus_write(XBT_NIL, dev->nodename, @@ -304,13 +304,13 @@ static int xenkbd_probe(struct xenbus_device *dev, if (!mtouch) goto error_nomem; - num_cont = xenbus_read_unsigned(info->xbdev->nodename, + num_cont = xenbus_read_unsigned(info->xbdev->otherend, XENKBD_FIELD_MT_NUM_CONTACTS, 1); - width = xenbus_read_unsigned(info->xbdev->nodename, + width = xenbus_read_unsigned(info->xbdev->otherend, XENKBD_FIELD_MT_WIDTH, XENFB_WIDTH); - height = xenbus_read_unsigned(info->xbdev->nodename, + height = xenbus_read_unsigned(info->xbdev->otherend, XENKBD_FIELD_MT_HEIGHT, XENFB_HEIGHT); -- GitLab From 99795ed0c62d9aa81c6930838e4a8ab6132bc7b4 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Thu, 24 May 2018 16:37:46 -0400 Subject: [PATCH 0655/2269] iio: 104-quad-8: Fix off-by-one error in register selection [ Upstream commit 2873c3f0e2bd12a7612e905c920c058855f4072a ] The reset flags operation is selected by bit 2 in the "Reset and Load Signals Decoders" register, not bit 1. Fixes: 28e5d3bb0325 ("iio: 104-quad-8: Add IIO support for the ACCES 104-QUAD-8") Signed-off-by: William Breathitt Gray Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/iio/counter/104-quad-8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/counter/104-quad-8.c b/drivers/iio/counter/104-quad-8.c index ba3d9030cd514..181585ae6e171 100644 --- a/drivers/iio/counter/104-quad-8.c +++ b/drivers/iio/counter/104-quad-8.c @@ -138,7 +138,7 @@ static int quad8_write_raw(struct iio_dev *indio_dev, outb(val >> (8 * i), base_offset); /* Reset Borrow, Carry, Compare, and Sign flags */ - outb(0x02, base_offset + 1); + outb(0x04, base_offset + 1); /* Reset Error flag */ outb(0x06, base_offset + 1); -- GitLab From f88e50ea03000bba631833540a654c7692f6410f Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Mon, 21 May 2018 13:08:32 -0700 Subject: [PATCH 0656/2269] ARM: dts: dra7: fix DCAN node addresses [ Upstream commit 949bdcc8a97c6078f21c8d4966436b117f2e4cd3 ] Fix the DT node addresses to match the reg property addresses, which were verified to match the TRM: http://www.ti.com/lit/pdf/sprui30 Cc: Roger Quadros Signed-off-by: Kevin Hilman Acked-by: Roger Quadros Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/dra7.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 02a136a4661aa..a5bd8f0205e8e 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -1818,7 +1818,7 @@ }; }; - dcan1: can@481cc000 { + dcan1: can@4ae3c000 { compatible = "ti,dra7-d_can"; ti,hwmods = "dcan1"; reg = <0x4ae3c000 0x2000>; @@ -1828,7 +1828,7 @@ status = "disabled"; }; - dcan2: can@481d0000 { + dcan2: can@48480000 { compatible = "ti,dra7-d_can"; ti,hwmods = "dcan2"; reg = <0x48480000 0x2000>; -- GitLab From 04bc4dd86d0f2b166640c8ea5b7a030d92a3d993 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Thu, 20 Sep 2018 09:09:48 -0600 Subject: [PATCH 0657/2269] floppy: Do not copy a kernel pointer to user memory in FDGETPRM ioctl commit 65eea8edc315589d6c993cf12dbb5d0e9ef1fe4e upstream. The final field of a floppy_struct is the field "name", which is a pointer to a string in kernel memory. The kernel pointer should not be copied to user memory. The FDGETPRM ioctl copies a floppy_struct to user memory, including this "name" field. This pointer cannot be used by the user and it will leak a kernel address to user-space, which will reveal the location of kernel code and data and undermine KASLR protection. Model this code after the compat ioctl which copies the returned data to a previously cleared temporary structure on the stack (excluding the name pointer) and copy out to userspace from there. As we already have an inparam union with an appropriate member and that memory is already cleared even for read only calls make use of that as a temporary store. Based on an initial patch by Brian Belleville. CVE-2018-7755 Signed-off-by: Andy Whitcroft Broke up long line. Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/floppy.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 60c086a536094..3d0287e212fe5 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3462,6 +3462,9 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int (struct floppy_struct **)&outparam); if (ret) return ret; + memcpy(&inparam.g, outparam, + offsetof(struct floppy_struct, name)); + outparam = &inparam.g; break; case FDMSGON: UDP->flags |= FTD_MSG; -- GitLab From 4fe780c1baec24dcc098cbc848fdc589b62a8ce0 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 20 Sep 2018 10:58:28 +0800 Subject: [PATCH 0658/2269] x86/mm: Expand static page table for fixmap space commit 05ab1d8a4b36ee912b7087c6da127439ed0a903e upstream. We met a kernel panic when enabling earlycon, which is due to the fixmap address of earlycon is not statically setup. Currently the static fixmap setup in head_64.S only covers 2M virtual address space, while it actually could be in 4M space with different kernel configurations, e.g. when VSYSCALL emulation is disabled. So increase the static space to 4M for now by defining FIXMAP_PMD_NUM to 2, and add a build time check to ensure that the fixmap is covered by the initial static page tables. Fixes: 1ad83c858c7d ("x86_64,vsyscall: Make vsyscall emulation configurable") Suggested-by: Thomas Gleixner Signed-off-by: Feng Tang Signed-off-by: Thomas Gleixner Tested-by: kernel test robot Reviewed-by: Juergen Gross (Xen parts) Cc: H Peter Anvin Cc: Peter Zijlstra Cc: Michal Hocko Cc: Yinghai Lu Cc: Dave Hansen Cc: Andi Kleen Cc: Andy Lutomirsky Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180920025828.23699-1-feng.tang@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/fixmap.h | 10 ++++++++++ arch/x86/include/asm/pgtable_64.h | 3 ++- arch/x86/kernel/head64.c | 4 +++- arch/x86/kernel/head_64.S | 16 ++++++++++++---- arch/x86/mm/pgtable.c | 9 +++++++++ arch/x86/xen/mmu_pv.c | 8 ++++++-- 6 files changed, 42 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index e203169931c72..6390bd8c141b4 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -14,6 +14,16 @@ #ifndef _ASM_X86_FIXMAP_H #define _ASM_X86_FIXMAP_H +/* + * Exposed to assembly code for setting up initial page tables. Cannot be + * calculated in assembly code (fixmap entries are an enum), but is sanity + * checked in the actual fixmap C code to make sure that the fixmap is + * covered fully. + */ +#define FIXMAP_PMD_NUM 2 +/* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */ +#define FIXMAP_PMD_TOP 507 + #ifndef __ASSEMBLY__ #include #include diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 4ecb728319384..ef938583147ea 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -14,6 +14,7 @@ #include #include #include +#include extern p4d_t level4_kernel_pgt[512]; extern p4d_t level4_ident_pgt[512]; @@ -22,7 +23,7 @@ extern pud_t level3_ident_pgt[512]; extern pmd_t level2_kernel_pgt[512]; extern pmd_t level2_fixmap_pgt[512]; extern pmd_t level2_ident_pgt[512]; -extern pte_t level1_fixmap_pgt[512]; +extern pte_t level1_fixmap_pgt[512 * FIXMAP_PMD_NUM]; extern pgd_t init_top_pgt[]; #define swapper_pg_dir init_top_pgt diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 7ba5d819ebe3b..45b5c6c4a55ed 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -31,6 +31,7 @@ #include #include #include +#include /* * Manage page tables very early on. @@ -93,7 +94,8 @@ unsigned long __head __startup_64(unsigned long physaddr, pud[511] += load_delta; pmd = fixup_pointer(level2_fixmap_pgt, physaddr); - pmd[506] += load_delta; + for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--) + pmd[i] += load_delta; /* * Set up the identity mapping for the switchover. These diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 0f545b3cf9267..8d59dfe629a96 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -24,6 +24,7 @@ #include "../entry/calling.h" #include #include +#include #ifdef CONFIG_PARAVIRT #include @@ -438,13 +439,20 @@ NEXT_PAGE(level2_kernel_pgt) KERNEL_IMAGE_SIZE/PMD_SIZE) NEXT_PAGE(level2_fixmap_pgt) - .fill 506,8,0 - .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC - /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */ - .fill 5,8,0 + .fill (512 - 4 - FIXMAP_PMD_NUM),8,0 + pgtno = 0 + .rept (FIXMAP_PMD_NUM) + .quad level1_fixmap_pgt + (pgtno << PAGE_SHIFT) - __START_KERNEL_map \ + + _PAGE_TABLE_NOENC; + pgtno = pgtno + 1 + .endr + /* 6 MB reserved space + a 2MB hole */ + .fill 4,8,0 NEXT_PAGE(level1_fixmap_pgt) + .rept (FIXMAP_PMD_NUM) .fill 512,8,0 + .endr #undef PMDS diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 2bdb8e8a9d7c8..aafd4edfa2ac6 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -573,6 +573,15 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte) { unsigned long address = __fix_to_virt(idx); +#ifdef CONFIG_X86_64 + /* + * Ensure that the static initial page tables are covering the + * fixmap completely. + */ + BUILD_BUG_ON(__end_of_permanent_fixed_addresses > + (FIXMAP_PMD_NUM * PTRS_PER_PTE)); +#endif + if (idx >= __end_of_fixed_addresses) { BUG(); return; diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 42cfad67b6acd..8ed11a5b1a9d8 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1879,7 +1879,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) /* L3_k[511] -> level2_fixmap_pgt */ convert_pfn_mfn(level3_kernel_pgt); - /* L3_k[511][506] -> level1_fixmap_pgt */ + /* L3_k[511][508-FIXMAP_PMD_NUM ... 507] -> level1_fixmap_pgt */ convert_pfn_mfn(level2_fixmap_pgt); /* We get [511][511] and have Xen's version of level2_kernel_pgt */ @@ -1924,7 +1924,11 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); - set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO); + + for (i = 0; i < FIXMAP_PMD_NUM; i++) { + set_page_prot(level1_fixmap_pgt + i * PTRS_PER_PTE, + PAGE_KERNEL_RO); + } /* Pin down new L4 */ pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, -- GitLab From 2b7ba104769b4b4c5abe2f6d23284a2aaa495197 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Tue, 28 Aug 2018 12:44:24 +0200 Subject: [PATCH 0659/2269] tty: serial: lpuart: avoid leaking struct tty_struct commit 3216c622a24b0ebb9c159a8d1daf7f17a106b3f5 upstream. The function tty_port_tty_get() gets a reference to the tty. Since the code is not using tty_port_tty_set(), the reference is kept even after closing the tty. Avoid using tty_port_tty_get() by directly access the tty instance. Since lpuart_start_rx_dma() is called from the .startup() and .set_termios() callback, it is safe to assume the tty instance is valid. Cc: stable@vger.kernel.org # v4.9+ Fixes: 5887ad43ee02 ("tty: serial: fsl_lpuart: Use cyclic DMA for Rx") Signed-off-by: Stefan Agner Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 7a3db9378fa38..fd64ac2c1a748 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -983,7 +983,8 @@ static inline int lpuart_start_rx_dma(struct lpuart_port *sport) struct circ_buf *ring = &sport->rx_ring; int ret, nent; int bits, baud; - struct tty_struct *tty = tty_port_tty_get(&sport->port.state->port); + struct tty_port *port = &sport->port.state->port; + struct tty_struct *tty = port->tty; struct ktermios *termios = &tty->termios; baud = tty_get_baud_rate(tty); -- GitLab From 247cc73cd8f5e0707f19ac56affadad4300095a6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 14 Sep 2018 10:32:50 +0000 Subject: [PATCH 0660/2269] serial: cpm_uart: return immediately from console poll commit be28c1e3ca29887e207f0cbcd294cefe5074bab6 upstream. kgdb expects poll function to return immediately and returning NO_POLL_CHAR when no character is available. Fixes: f5316b4aea024 ("kgdb,8250,pl011: Return immediately from console poll") Cc: Jason Wessel Cc: Signed-off-by: Christophe Leroy Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/cpm_uart/cpm_uart_core.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_core.c b/drivers/tty/serial/cpm_uart/cpm_uart_core.c index 9ac142cfc1f1b..8b2b694334ec0 100644 --- a/drivers/tty/serial/cpm_uart/cpm_uart_core.c +++ b/drivers/tty/serial/cpm_uart/cpm_uart_core.c @@ -1068,8 +1068,8 @@ static int poll_wait_key(char *obuf, struct uart_cpm_port *pinfo) /* Get the address of the host memory buffer. */ bdp = pinfo->rx_cur; - while (bdp->cbd_sc & BD_SC_EMPTY) - ; + if (bdp->cbd_sc & BD_SC_EMPTY) + return NO_POLL_CHAR; /* If the buffer address is in the CPM DPRAM, don't * convert it. @@ -1104,7 +1104,11 @@ static int cpm_get_poll_char(struct uart_port *port) poll_chars = 0; } if (poll_chars <= 0) { - poll_chars = poll_wait_key(poll_buf, pinfo); + int ret = poll_wait_key(poll_buf, pinfo); + + if (ret == NO_POLL_CHAR) + return ret; + poll_chars = ret; pollp = poll_buf; } poll_chars--; -- GitLab From dc89d37f9098ce6855457a86769f111e9190bd16 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 18 Sep 2018 16:10:47 +0300 Subject: [PATCH 0661/2269] intel_th: Fix device removal logic commit 8801922cd94c918e4dc3a108ecaa500c4d40583f upstream. Commit a753bfcfdb1f ("intel_th: Make the switch allocate its subdevices") brings in new subdevice addition/removal logic that's broken for "host mode": the SWITCH device has no children to begin with, which is not handled in the code. This results in a null dereference bug later down the path. This patch fixes the subdevice removal code to handle host mode correctly. Signed-off-by: Alexander Shishkin Fixes: a753bfcfdb1f ("intel_th: Make the switch allocate its subdevices") CC: stable@vger.kernel.org # v4.14+ Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hwtracing/intel_th/core.c b/drivers/hwtracing/intel_th/core.c index c1793313bb087..757801d276043 100644 --- a/drivers/hwtracing/intel_th/core.c +++ b/drivers/hwtracing/intel_th/core.c @@ -147,7 +147,8 @@ static int intel_th_remove(struct device *dev) th->thdev[i] = NULL; } - th->num_thdevs = lowest; + if (lowest >= 0) + th->num_thdevs = lowest; } if (thdrv->attr_group) -- GitLab From 429773341c34cde3caacbc3992e2dcb1d7ccea86 Mon Sep 17 00:00:00 2001 From: Marcel Ziswiler Date: Wed, 29 Aug 2018 08:47:57 +0200 Subject: [PATCH 0662/2269] spi: tegra20-slink: explicitly enable/disable clock commit 7001cab1dabc0b72b2b672ef58a90ab64f5e2343 upstream. Depending on the SPI instance one may get an interrupt storm upon requesting resp. interrupt unless the clock is explicitly enabled beforehand. This has been observed trying to bring up instance 4 on T20. Signed-off-by: Marcel Ziswiler Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-tegra20-slink.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c index 3e12d5f87ee44..9831c1106945e 100644 --- a/drivers/spi/spi-tegra20-slink.c +++ b/drivers/spi/spi-tegra20-slink.c @@ -1063,6 +1063,24 @@ static int tegra_slink_probe(struct platform_device *pdev) goto exit_free_master; } + /* disabled clock may cause interrupt storm upon request */ + tspi->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(tspi->clk)) { + ret = PTR_ERR(tspi->clk); + dev_err(&pdev->dev, "Can not get clock %d\n", ret); + goto exit_free_master; + } + ret = clk_prepare(tspi->clk); + if (ret < 0) { + dev_err(&pdev->dev, "Clock prepare failed %d\n", ret); + goto exit_free_master; + } + ret = clk_enable(tspi->clk); + if (ret < 0) { + dev_err(&pdev->dev, "Clock enable failed %d\n", ret); + goto exit_free_master; + } + spi_irq = platform_get_irq(pdev, 0); tspi->irq = spi_irq; ret = request_threaded_irq(tspi->irq, tegra_slink_isr, @@ -1071,14 +1089,7 @@ static int tegra_slink_probe(struct platform_device *pdev) if (ret < 0) { dev_err(&pdev->dev, "Failed to register ISR for IRQ %d\n", tspi->irq); - goto exit_free_master; - } - - tspi->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(tspi->clk)) { - dev_err(&pdev->dev, "can not get clock\n"); - ret = PTR_ERR(tspi->clk); - goto exit_free_irq; + goto exit_clk_disable; } tspi->rst = devm_reset_control_get_exclusive(&pdev->dev, "spi"); @@ -1138,6 +1149,8 @@ exit_rx_dma_free: tegra_slink_deinit_dma_param(tspi, true); exit_free_irq: free_irq(spi_irq, tspi); +exit_clk_disable: + clk_disable(tspi->clk); exit_free_master: spi_master_put(master); return ret; @@ -1150,6 +1163,8 @@ static int tegra_slink_remove(struct platform_device *pdev) free_irq(tspi->irq, tspi); + clk_disable(tspi->clk); + if (tspi->tx_dma_chan) tegra_slink_deinit_dma_param(tspi, false); -- GitLab From d120858fca5f631195fbb7e2c2768d525d1c80da Mon Sep 17 00:00:00 2001 From: Gaku Inami Date: Wed, 5 Sep 2018 10:49:36 +0200 Subject: [PATCH 0663/2269] spi: sh-msiof: Fix invalid SPI use during system suspend commit ffa69d6a16f686efe45269342474e421f2aa58b2 upstream. If the SPI queue is running during system suspend, the system may lock up. Fix this by stopping/restarting the queue during system suspend/resume by calling spi_master_suspend()/spi_master_resume() from the PM callbacks. In-kernel users will receive an -ESHUTDOWN error while system suspend/resume is in progress. Signed-off-by: Gaku Inami Signed-off-by: Hiromitsu Yamasaki [geert: Cleanup, reword] Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-sh-msiof.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index 0fea18ab970e3..380b10c2a2446 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -1361,12 +1361,37 @@ static const struct platform_device_id spi_driver_ids[] = { }; MODULE_DEVICE_TABLE(platform, spi_driver_ids); +#ifdef CONFIG_PM_SLEEP +static int sh_msiof_spi_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct sh_msiof_spi_priv *p = platform_get_drvdata(pdev); + + return spi_master_suspend(p->master); +} + +static int sh_msiof_spi_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct sh_msiof_spi_priv *p = platform_get_drvdata(pdev); + + return spi_master_resume(p->master); +} + +static SIMPLE_DEV_PM_OPS(sh_msiof_spi_pm_ops, sh_msiof_spi_suspend, + sh_msiof_spi_resume); +#define DEV_PM_OPS &sh_msiof_spi_pm_ops +#else +#define DEV_PM_OPS NULL +#endif /* CONFIG_PM_SLEEP */ + static struct platform_driver sh_msiof_spi_drv = { .probe = sh_msiof_spi_probe, .remove = sh_msiof_spi_remove, .id_table = spi_driver_ids, .driver = { .name = "spi_sh_msiof", + .pm = DEV_PM_OPS, .of_match_table = of_match_ptr(sh_msiof_match), }, }; -- GitLab From 6074b71d617ddfafc15164f4b6e320c7db4d24d7 Mon Sep 17 00:00:00 2001 From: Hiromitsu Yamasaki Date: Wed, 5 Sep 2018 10:49:37 +0200 Subject: [PATCH 0664/2269] spi: sh-msiof: Fix handling of write value for SISTR register commit 31a5fae4c5a009898da6d177901d5328051641ff upstream. This patch changes writing to the SISTR register according to the H/W user's manual. The TDREQ bit and RDREQ bits of SISTR are read-only, and must be written their initial values of zero. Signed-off-by: Hiromitsu Yamasaki [geert: reword] Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-sh-msiof.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index 380b10c2a2446..db2a529accae8 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -384,7 +384,8 @@ static void sh_msiof_spi_set_mode_regs(struct sh_msiof_spi_priv *p, static void sh_msiof_reset_str(struct sh_msiof_spi_priv *p) { - sh_msiof_write(p, STR, sh_msiof_read(p, STR)); + sh_msiof_write(p, STR, + sh_msiof_read(p, STR) & ~(STR_TDREQ | STR_RDREQ)); } static void sh_msiof_spi_write_fifo_8(struct sh_msiof_spi_priv *p, -- GitLab From 082e34f367a5493b46658ff8bee1b9fba48c3fc0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 5 Sep 2018 10:49:38 +0200 Subject: [PATCH 0665/2269] spi: rspi: Fix invalid SPI use during system suspend commit c1ca59c22c56930b377a665fdd1b43351887830b upstream. If the SPI queue is running during system suspend, the system may lock up. Fix this by stopping/restarting the queue during system suspend/resume, by calling spi_master_suspend()/spi_master_resume() from the PM callbacks. In-kernel users will receive an -ESHUTDOWN error while system suspend/resume is in progress. Based on a patch for sh-msiof by Gaku Inami. Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-rspi.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c index 2a10b3f94ff72..efabb3c6ec1e8 100644 --- a/drivers/spi/spi-rspi.c +++ b/drivers/spi/spi-rspi.c @@ -1352,12 +1352,36 @@ static const struct platform_device_id spi_driver_ids[] = { MODULE_DEVICE_TABLE(platform, spi_driver_ids); +#ifdef CONFIG_PM_SLEEP +static int rspi_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct rspi_data *rspi = platform_get_drvdata(pdev); + + return spi_master_suspend(rspi->master); +} + +static int rspi_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct rspi_data *rspi = platform_get_drvdata(pdev); + + return spi_master_resume(rspi->master); +} + +static SIMPLE_DEV_PM_OPS(rspi_pm_ops, rspi_suspend, rspi_resume); +#define DEV_PM_OPS &rspi_pm_ops +#else +#define DEV_PM_OPS NULL +#endif /* CONFIG_PM_SLEEP */ + static struct platform_driver rspi_driver = { .probe = rspi_probe, .remove = rspi_remove, .id_table = spi_driver_ids, .driver = { .name = "renesas_spi", + .pm = DEV_PM_OPS, .of_match_table = of_match_ptr(rspi_of_match), }, }; -- GitLab From b6adc1f24bb356c5601391b79ea689f06a55a17f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 5 Sep 2018 10:49:39 +0200 Subject: [PATCH 0666/2269] spi: rspi: Fix interrupted DMA transfers commit 8dbbaa47b96f6ea5f09f922b4effff3c505cd8cf upstream. When interrupted, wait_event_interruptible_timeout() returns -ERESTARTSYS, and the SPI transfer in progress will fail, as expected: m25p80 spi0.0: SPI transfer failed: -512 spi_master spi0: failed to transfer one message from queue However, as the underlying DMA transfers may not have completed, all subsequent SPI transfers may start to fail: spi_master spi0: receive timeout qspi_transfer_out_in() returned -110 m25p80 spi0.0: SPI transfer failed: -110 spi_master spi0: failed to transfer one message from queue Fix this by calling dmaengine_terminate_all() not only for timeouts, but also for errors. This can be reproduced on r8a7991/koelsch, using "hd /dev/mtd0" followed by CTRL-C. Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-rspi.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c index efabb3c6ec1e8..20981e08ee975 100644 --- a/drivers/spi/spi-rspi.c +++ b/drivers/spi/spi-rspi.c @@ -598,11 +598,13 @@ static int rspi_dma_transfer(struct rspi_data *rspi, struct sg_table *tx, ret = wait_event_interruptible_timeout(rspi->wait, rspi->dma_callbacked, HZ); - if (ret > 0 && rspi->dma_callbacked) + if (ret > 0 && rspi->dma_callbacked) { ret = 0; - else if (!ret) { - dev_err(&rspi->master->dev, "DMA timeout\n"); - ret = -ETIMEDOUT; + } else { + if (!ret) { + dev_err(&rspi->master->dev, "DMA timeout\n"); + ret = -ETIMEDOUT; + } if (tx) dmaengine_terminate_all(rspi->master->dma_tx); if (rx) -- GitLab From 5eaaa5e9bd56813deef7afd5b70f2d62f51187f4 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Wed, 19 Sep 2018 15:30:51 -0600 Subject: [PATCH 0667/2269] regulator: fix crash caused by null driver data commit fb6de923ca3358a91525552b4907d4cb38730bdd upstream. dev_set_drvdata() needs to be called before device_register() exposes device to userspace. Otherwise kernel crashes after it gets null pointer from dev_get_drvdata() when userspace tries to access sysfs entries. [Removed backtrace for length -- broonie] Signed-off-by: Yu Zhao Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index b64b7916507f2..b2cb4f497ef67 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -4115,13 +4115,13 @@ regulator_register(const struct regulator_desc *regulator_desc, !rdev->desc->fixed_uV) rdev->is_switch = true; + dev_set_drvdata(&rdev->dev, rdev); ret = device_register(&rdev->dev); if (ret != 0) { put_device(&rdev->dev); goto unset_supplies; } - dev_set_drvdata(&rdev->dev, rdev); rdev_init_debugfs(rdev); /* try to resolve regulators supply since a new one was registered */ -- GitLab From 4253abe6a3aac68012b5906317803a331a472f5e Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 10 Sep 2018 13:59:59 -0400 Subject: [PATCH 0668/2269] USB: fix error handling in usb_driver_claim_interface() commit bd729f9d67aa9a303d8925bb8c4f06af25f407d1 upstream. The syzbot fuzzing project found a use-after-free bug in the USB core. The bug was caused by usbfs not unbinding from an interface when the USB device file was closed, which led another process to attempt the unbind later on, after the private data structure had been deallocated. The reason usbfs did not unbind the interface at the appropriate time was because it thought the interface had never been claimed in the first place. This was caused by the fact that usb_driver_claim_interface() does not clean up properly when device_bind_driver() returns an error. Although the error code gets passed back to the caller, the iface->dev.driver pointer remains set and iface->condition remains equal to USB_INTERFACE_BOUND. This patch adds proper error handling to usb_driver_claim_interface(). Signed-off-by: Alan Stern Reported-by: syzbot+f84aa7209ccec829536f@syzkaller.appspotmail.com CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/driver.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index eb87a259d55c3..1d5ccc2d74dcd 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -565,6 +565,21 @@ int usb_driver_claim_interface(struct usb_driver *driver, if (!lpm_disable_error) usb_unlocked_enable_lpm(udev); + if (retval) { + dev->driver = NULL; + usb_set_intfdata(iface, NULL); + iface->needs_remote_wakeup = 0; + iface->condition = USB_INTERFACE_UNBOUND; + + /* + * Unbound interfaces are always runtime-PM-disabled + * and runtime-PM-suspended + */ + if (driver->supports_autosuspend) + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); + } + return retval; } EXPORT_SYMBOL_GPL(usb_driver_claim_interface); -- GitLab From 5b6717c6a3c0c92fe08a439717c19fa61c8c0099 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 10 Sep 2018 14:00:53 -0400 Subject: [PATCH 0669/2269] USB: handle NULL config in usb_find_alt_setting() commit c9a4cb204e9eb7fa7dfbe3f7d3a674fa530aa193 upstream. usb_find_alt_setting() takes a pointer to a struct usb_host_config as an argument; it searches for an interface with specified interface and alternate setting numbers in that config. However, it crashes if the usb_host_config pointer argument is NULL. Since this is a general-purpose routine, available for use in many places, we want to to be more robust. This patch makes it return NULL whenever the config argument is NULL. Signed-off-by: Alan Stern Reported-by: syzbot+19c3aaef85a89d451eac@syzkaller.appspotmail.com CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/usb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index 17681d5638ac3..f8b50eaf6d1ee 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -228,6 +228,8 @@ struct usb_host_interface *usb_find_alt_setting( struct usb_interface_cache *intf_cache = NULL; int i; + if (!config) + return NULL; for (i = 0; i < config->desc.bNumInterfaces; i++) { if (config->intf_cache[i]->altsetting[0].desc.bInterfaceNumber == iface_num) { -- GitLab From e75c01761a11c445246bb59e15b0d2dea428f6a5 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Mon, 17 Sep 2018 11:40:22 -0500 Subject: [PATCH 0670/2269] usb: musb: dsps: do not disable CPPI41 irq in driver teardown commit 783f3b4e9ec50491c21746e7e05ec6c39c21f563 upstream. TI AM335x CPPI 4.1 module uses a single register bit for CPPI interrupts in both musb controllers. So disabling the CPPI irq in one musb driver breaks the other musb module. Since musb is already disabled before tearing down dma controller in musb_remove(), it is safe to not disable CPPI irq in musb_dma_controller_destroy(). Fixes: 255348289f71 ("usb: musb: dsps: Manage CPPI 4.1 DMA interrupt in DSPS") Cc: stable@vger.kernel.org Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_dsps.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c index f6b526606ad10..dbb482b7e0ba6 100644 --- a/drivers/usb/musb/musb_dsps.c +++ b/drivers/usb/musb/musb_dsps.c @@ -684,16 +684,6 @@ dsps_dma_controller_create(struct musb *musb, void __iomem *base) return controller; } -static void dsps_dma_controller_destroy(struct dma_controller *c) -{ - struct musb *musb = c->musb; - struct dsps_glue *glue = dev_get_drvdata(musb->controller->parent); - void __iomem *usbss_base = glue->usbss_base; - - musb_writel(usbss_base, USBSS_IRQ_CLEARR, USBSS_IRQ_PD_COMP); - cppi41_dma_controller_destroy(c); -} - #ifdef CONFIG_PM_SLEEP static void dsps_dma_controller_suspend(struct dsps_glue *glue) { @@ -723,7 +713,7 @@ static struct musb_platform_ops dsps_ops = { #ifdef CONFIG_USB_TI_CPPI41_DMA .dma_init = dsps_dma_controller_create, - .dma_exit = dsps_dma_controller_destroy, + .dma_exit = cppi41_dma_controller_destroy, #endif .enable = dsps_musb_enable, .disable = dsps_musb_disable, -- GitLab From 1ddc0781c0ce451805b4ee6a5b7521bf9c49d5aa Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 5 Apr 2018 16:21:10 -0700 Subject: [PATCH 0671/2269] slub: make ->cpu_partial unsigned int commit e5d9998f3e09359b372a037a6ac55ba235d95d57 upstream. /* * cpu_partial determined the maximum number of objects * kept in the per cpu partial lists of a processor. */ Can't be negative. Link: http://lkml.kernel.org/r/20180305200730.15812-15-adobriyan@gmail.com Signed-off-by: Alexey Dobriyan Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: zhong jiang Signed-off-by: Greg Kroah-Hartman --- include/linux/slub_def.h | 3 ++- mm/slub.c | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 2038ab5316161..f8ced87a2efea 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -88,7 +88,8 @@ struct kmem_cache { int object_size; /* The size of an object without meta data */ int offset; /* Free pointer offset. */ #ifdef CONFIG_SLUB_CPU_PARTIAL - int cpu_partial; /* Number of per cpu partial objects to keep around */ + /* Number of per cpu partial objects to keep around */ + unsigned int cpu_partial; #endif struct kmem_cache_order_objects oo; diff --git a/mm/slub.c b/mm/slub.c index 10e54c4acd199..220d42e592ef5 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1807,7 +1807,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, { struct page *page, *page2; void *object = NULL; - int available = 0; + unsigned int available = 0; int objects; /* @@ -4942,10 +4942,10 @@ static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf) static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf, size_t length) { - unsigned long objects; + unsigned int objects; int err; - err = kstrtoul(buf, 10, &objects); + err = kstrtouint(buf, 10, &objects); if (err) return err; if (objects && !kmem_cache_has_cpu_partial(s)) -- GitLab From 67d8e231759fe4ba5fdbae73a94df963816e7652 Mon Sep 17 00:00:00 2001 From: ming_qian Date: Tue, 8 May 2018 22:13:08 -0400 Subject: [PATCH 0672/2269] media: uvcvideo: Support realtek's UVC 1.5 device commit f620d1d7afc7db57ab59f35000752840c91f67e7 upstream. media: uvcvideo: Support UVC 1.5 video probe & commit controls The length of UVC 1.5 video control is 48, and it is 34 for UVC 1.1. Change it to 48 for UVC 1.5 device, and the UVC 1.5 device can be recognized. More changes to the driver are needed for full UVC 1.5 compatibility. However, at least the UVC 1.5 Realtek RTS5847/RTS5852 cameras have been reported to work well. [laurent.pinchart@ideasonboard.com: Factor out code to helper function, update size checks] Cc: stable@vger.kernel.org Signed-off-by: ming_qian Signed-off-by: Laurent Pinchart Tested-by: Kai-Heng Feng Tested-by: Ana Guerrero Lopez Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/uvc/uvc_video.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/media/usb/uvc/uvc_video.c b/drivers/media/usb/uvc/uvc_video.c index fb86d6af398d3..a6d8002918831 100644 --- a/drivers/media/usb/uvc/uvc_video.c +++ b/drivers/media/usb/uvc/uvc_video.c @@ -163,14 +163,27 @@ static void uvc_fixup_video_ctrl(struct uvc_streaming *stream, } } +static size_t uvc_video_ctrl_size(struct uvc_streaming *stream) +{ + /* + * Return the size of the video probe and commit controls, which depends + * on the protocol version. + */ + if (stream->dev->uvc_version < 0x0110) + return 26; + else if (stream->dev->uvc_version < 0x0150) + return 34; + else + return 48; +} + static int uvc_get_video_ctrl(struct uvc_streaming *stream, struct uvc_streaming_control *ctrl, int probe, __u8 query) { + __u16 size = uvc_video_ctrl_size(stream); __u8 *data; - __u16 size; int ret; - size = stream->dev->uvc_version >= 0x0110 ? 34 : 26; if ((stream->dev->quirks & UVC_QUIRK_PROBE_DEF) && query == UVC_GET_DEF) return -EIO; @@ -225,7 +238,7 @@ static int uvc_get_video_ctrl(struct uvc_streaming *stream, ctrl->dwMaxVideoFrameSize = get_unaligned_le32(&data[18]); ctrl->dwMaxPayloadTransferSize = get_unaligned_le32(&data[22]); - if (size == 34) { + if (size >= 34) { ctrl->dwClockFrequency = get_unaligned_le32(&data[26]); ctrl->bmFramingInfo = data[30]; ctrl->bPreferedVersion = data[31]; @@ -254,11 +267,10 @@ out: static int uvc_set_video_ctrl(struct uvc_streaming *stream, struct uvc_streaming_control *ctrl, int probe) { + __u16 size = uvc_video_ctrl_size(stream); __u8 *data; - __u16 size; int ret; - size = stream->dev->uvc_version >= 0x0110 ? 34 : 26; data = kzalloc(size, GFP_KERNEL); if (data == NULL) return -ENOMEM; @@ -275,7 +287,7 @@ static int uvc_set_video_ctrl(struct uvc_streaming *stream, put_unaligned_le32(ctrl->dwMaxVideoFrameSize, &data[18]); put_unaligned_le32(ctrl->dwMaxPayloadTransferSize, &data[22]); - if (size == 34) { + if (size >= 34) { put_unaligned_le32(ctrl->dwClockFrequency, &data[26]); data[30] = ctrl->bmFramingInfo; data[31] = ctrl->bPreferedVersion; -- GitLab From 25a8d4825165174dfcc256cf92f7bf63c4a8812f Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 5 Sep 2018 12:07:02 +0200 Subject: [PATCH 0673/2269] USB: usbdevfs: sanitize flags more commit 7a68d9fb851012829c29e770621905529bd9490b upstream. Requesting a ZERO_PACKET or not is sensible only for output. In the input direction the device decides. Likewise accepting short packets makes sense only for input. This allows operation with panic_on_warn without opening up a local DOS. Signed-off-by: Oliver Neukum Reported-by: syzbot+843efa30c8821bd69f53@syzkaller.appspotmail.com Fixes: 0cb54a3e47cb ("USB: debugging code shouldn't alter control flow") Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index ab245352f102a..fc722dd110053 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1451,10 +1451,13 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb struct async *as = NULL; struct usb_ctrlrequest *dr = NULL; unsigned int u, totlen, isofrmlen; - int i, ret, is_in, num_sgs = 0, ifnum = -1; + int i, ret, num_sgs = 0, ifnum = -1; int number_of_packets = 0; unsigned int stream_id = 0; void *buf; + bool is_in; + bool allow_short = false; + bool allow_zero = false; unsigned long mask = USBDEVFS_URB_SHORT_NOT_OK | USBDEVFS_URB_BULK_CONTINUATION | USBDEVFS_URB_NO_FSBR | @@ -1488,6 +1491,8 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb u = 0; switch (uurb->type) { case USBDEVFS_URB_TYPE_CONTROL: + if (is_in) + allow_short = true; if (!usb_endpoint_xfer_control(&ep->desc)) return -EINVAL; /* min 8 byte setup packet */ @@ -1528,6 +1533,10 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb break; case USBDEVFS_URB_TYPE_BULK: + if (!is_in) + allow_zero = true; + else + allow_short = true; switch (usb_endpoint_type(&ep->desc)) { case USB_ENDPOINT_XFER_CONTROL: case USB_ENDPOINT_XFER_ISOC: @@ -1548,6 +1557,10 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb if (!usb_endpoint_xfer_int(&ep->desc)) return -EINVAL; interrupt_urb: + if (!is_in) + allow_zero = true; + else + allow_short = true; break; case USBDEVFS_URB_TYPE_ISO: @@ -1692,11 +1705,11 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb u = (is_in ? URB_DIR_IN : URB_DIR_OUT); if (uurb->flags & USBDEVFS_URB_ISO_ASAP) u |= URB_ISO_ASAP; - if (uurb->flags & USBDEVFS_URB_SHORT_NOT_OK && is_in) + if (allow_short && uurb->flags & USBDEVFS_URB_SHORT_NOT_OK) u |= URB_SHORT_NOT_OK; if (uurb->flags & USBDEVFS_URB_NO_FSBR) u |= URB_NO_FSBR; - if (uurb->flags & USBDEVFS_URB_ZERO_PACKET) + if (allow_zero && uurb->flags & USBDEVFS_URB_ZERO_PACKET) u |= URB_ZERO_PACKET; if (uurb->flags & USBDEVFS_URB_NO_INTERRUPT) u |= URB_NO_INTERRUPT; -- GitLab From ec6dc4b61c3312e6d2de4186ccca2bf1daa1d640 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 5 Sep 2018 12:07:03 +0200 Subject: [PATCH 0674/2269] USB: usbdevfs: restore warning for nonsensical flags commit 81e0403b26d94360abd1f6a57311337973bc82cd upstream. If we filter flags before they reach the core we need to generate our own warnings. Signed-off-by: Oliver Neukum Fixes: 0cb54a3e47cb ("USB: debugging code shouldn't alter control flow") Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index fc722dd110053..76cb9b3649b49 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1715,6 +1715,11 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb u |= URB_NO_INTERRUPT; as->urb->transfer_flags = u; + if (!allow_short && uurb->flags & USBDEVFS_URB_SHORT_NOT_OK) + dev_warn(&ps->dev->dev, "Requested nonsensical USBDEVFS_URB_SHORT_NOT_OK.\n"); + if (!allow_zero && uurb->flags & USBDEVFS_URB_ZERO_PACKET) + dev_warn(&ps->dev->dev, "Requested nonsensical USBDEVFS_URB_ZERO_PACKET.\n"); + as->urb->transfer_buffer_length = uurb->buffer_length; as->urb->setup_packet = (unsigned char *)dr; dr = NULL; -- GitLab From be2360ed2d22761b8cfa5e891fd05ef08736dc71 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 11 Sep 2018 10:00:44 +0200 Subject: [PATCH 0675/2269] Revert "usb: cdc-wdm: Fix a sleep-in-atomic-context bug in service_outstanding_interrupt()" commit e871db8d78df1c411032cbb3acfdf8930509360e upstream. This reverts commit 6e22e3af7bb3a7b9dc53cb4687659f6e63fca427. The bug the patch describes to, has been already fixed in commit 2df6948428542 ("USB: cdc-wdm: don't enable interrupts in USB-giveback") so need to this, revert it. Fixes: 6e22e3af7bb3 ("usb: cdc-wdm: Fix a sleep-in-atomic-context bug in service_outstanding_interrupt()") Cc: stable Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index a9509ecccedba..3e865dbf878c7 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -457,7 +457,7 @@ static int service_outstanding_interrupt(struct wdm_device *desc) set_bit(WDM_RESPONDING, &desc->flags); spin_unlock_irq(&desc->iuspin); - rv = usb_submit_urb(desc->response, GFP_ATOMIC); + rv = usb_submit_urb(desc->response, GFP_KERNEL); spin_lock_irq(&desc->iuspin); if (rv) { dev_err(&desc->intf->dev, -- GitLab From 9691f745e17a17f8ee7d7ea18dfe0bbd2a090cbd Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 10 Sep 2018 13:58:51 -0400 Subject: [PATCH 0676/2269] USB: remove LPM management from usb_driver_claim_interface() commit c183813fcee44a249339b7c46e1ad271ca1870aa upstream. usb_driver_claim_interface() disables and re-enables Link Power Management, but it shouldn't do either one, for the reasons listed below. This patch removes the two LPM-related function calls from the routine. The reason for disabling LPM in the analogous function usb_probe_interface() is so that drivers won't have to deal with unwanted LPM transitions in their probe routine. But usb_driver_claim_interface() doesn't call the driver's probe routine (or any other callbacks), so that reason doesn't apply here. Furthermore, no driver other than usbfs will ever call usb_driver_claim_interface() unless it is already bound to another interface in the same device, which means disabling LPM here would be redundant. usbfs doesn't interact with LPM at all. Lastly, the error return from usb_unlocked_disable_lpm() isn't handled properly; the code doesn't clean up its earlier actions before returning. Signed-off-by: Alan Stern Fixes: 8306095fd2c1 ("USB: Disable USB 3.0 LPM in critical sections.") CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/driver.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 1d5ccc2d74dcd..2f3dbf1c3c2d6 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -512,7 +512,6 @@ int usb_driver_claim_interface(struct usb_driver *driver, struct device *dev; struct usb_device *udev; int retval = 0; - int lpm_disable_error = -ENODEV; if (!iface) return -ENODEV; @@ -533,16 +532,6 @@ int usb_driver_claim_interface(struct usb_driver *driver, iface->condition = USB_INTERFACE_BOUND; - /* See the comment about disabling LPM in usb_probe_interface(). */ - if (driver->disable_hub_initiated_lpm) { - lpm_disable_error = usb_unlocked_disable_lpm(udev); - if (lpm_disable_error) { - dev_err(&iface->dev, "%s Failed to disable LPM for driver %s\n.", - __func__, driver->name); - return -ENOMEM; - } - } - /* Claimed interfaces are initially inactive (suspended) and * runtime-PM-enabled, but only if the driver has autosuspend * support. Otherwise they are marked active, to prevent the @@ -561,10 +550,6 @@ int usb_driver_claim_interface(struct usb_driver *driver, if (device_is_registered(dev)) retval = device_bind_driver(dev); - /* Attempt to re-enable USB3 LPM, if the disable was successful. */ - if (!lpm_disable_error) - usb_unlocked_enable_lpm(udev); - if (retval) { dev->driver = NULL; usb_set_intfdata(iface, NULL); -- GitLab From 3011b91478ffa64cae02bf08dffbd8efb328458a Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Tue, 18 Sep 2018 09:32:22 -0700 Subject: [PATCH 0677/2269] Input: elantech - enable middle button of touchpad on ThinkPad P72 commit 91a97507323e1ad4bfc10f4a5922e67cdaf8b3cd upstream. Adding 2 new touchpad IDs to support middle button support. Cc: stable@vger.kernel.org Signed-off-by: Aaron Ma Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elantech.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index a250f433eb968..84c69e962230b 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1180,6 +1180,8 @@ static const struct dmi_system_id elantech_dmi_has_middle_button[] = { static const char * const middle_button_pnp_ids[] = { "LEN2131", /* ThinkPad P52 w/ NFC */ "LEN2132", /* ThinkPad P52 */ + "LEN2133", /* ThinkPad P72 w/ NFC */ + "LEN2134", /* ThinkPad P72 */ NULL }; -- GitLab From fcbe49c82b8242fd8a45309c30c6f887f14ab83b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 17 Sep 2018 18:10:05 -0700 Subject: [PATCH 0678/2269] IB/srp: Avoid that sg_reset -d ${srp_device} triggers an infinite loop commit ee92efe41cf358f4b99e73509f2bfd4733609f26 upstream. Use different loop variables for the inner and outer loop. This avoids that an infinite loop occurs if there are more RDMA channels than target->req_ring_size. Fixes: d92c0da71a35 ("IB/srp: Add multichannel support") Cc: Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srp/ib_srp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 299a97b7e17ff..ade98c234dcb3 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2669,7 +2669,7 @@ static int srp_reset_device(struct scsi_cmnd *scmnd) { struct srp_target_port *target = host_to_target(scmnd->device->host); struct srp_rdma_ch *ch; - int i; + int i, j; u8 status; shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n"); @@ -2683,8 +2683,8 @@ static int srp_reset_device(struct scsi_cmnd *scmnd) for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; - for (i = 0; i < target->req_ring_size; ++i) { - struct srp_request *req = &ch->req_ring[i]; + for (j = 0; j < target->req_ring_size; ++j) { + struct srp_request *req = &ch->req_ring[j]; srp_finish_req(ch, req, scmnd->device, DID_RESET << 16); } -- GitLab From d9e49e9ed8d6f59dfc7c34f904d99462829fecc1 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 20 Sep 2018 12:58:46 -0700 Subject: [PATCH 0679/2269] IB/hfi1: Fix SL array bounds check commit 0dbfaa9f2813787679e296eb5476e40938ab48c8 upstream. The SL specified by a user needs to be a valid SL. Add a range check to the user specified SL value which protects from running off the end of the SL to SC table. CC: stable@vger.kernel.org Fixes: 7724105686e7 ("IB/hfi1: add driver files") Signed-off-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/verbs.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index e232f3c608b41..63d404a6752a1 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1573,6 +1573,7 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr) struct hfi1_pportdata *ppd; struct hfi1_devdata *dd; u8 sc5; + u8 sl; if (hfi1_check_mcast(rdma_ah_get_dlid(ah_attr)) && !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) @@ -1581,8 +1582,13 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr) /* test the mapping for validity */ ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr)); ppd = ppd_from_ibp(ibp); - sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)]; dd = dd_from_ppd(ppd); + + sl = rdma_ah_get_sl(ah_attr); + if (sl >= ARRAY_SIZE(ibp->sl_to_sc)) + return -EINVAL; + + sc5 = ibp->sl_to_sc[sl]; if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf) return -EINVAL; return 0; -- GitLab From 412a4b4db1a6754d9141538e54a15841b121a02c Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Thu, 20 Sep 2018 12:58:56 -0700 Subject: [PATCH 0680/2269] IB/hfi1: Invalid user input can result in crash commit 94694d18cf27a6faad91487a38ce516c2b16e7d9 upstream. If the number of packets in a user sdma request does not match the actual iovectors being sent, sdma_cleanup can be called on an uninitialized request structure, resulting in a crash similar to this: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: [] __sdma_txclean+0x57/0x1e0 [hfi1] PGD 8000001044f61067 PUD 1052706067 PMD 0 Oops: 0000 [#1] SMP CPU: 30 PID: 69912 Comm: upsm Kdump: loaded Tainted: G OE ------------ 3.10.0-862.el7.x86_64 #1 Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0019.101220160604 10/12/2016 task: ffff8b331c890000 ti: ffff8b2ed1f98000 task.ti: ffff8b2ed1f98000 RIP: 0010:[] [] __sdma_txclean+0x57/0x1e0 [hfi1] RSP: 0018:ffff8b2ed1f9bab0 EFLAGS: 00010286 RAX: 0000000000008b2b RBX: ffff8b2adf6e0000 RCX: 0000000000000000 RDX: 00000000000000a0 RSI: ffff8b2e9eedc540 RDI: ffff8b2adf6e0000 RBP: ffff8b2ed1f9bad8 R08: 0000000000000000 R09: ffffffffc0b04a06 R10: ffff8b331c890190 R11: ffffe6ed00bf1840 R12: ffff8b3315480000 R13: ffff8b33154800f0 R14: 00000000fffffff2 R15: ffff8b2e9eedc540 FS: 00007f035ac47740(0000) GS:ffff8b331e100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 0000000c03fe6000 CR4: 00000000001607e0 Call Trace: [] user_sdma_send_pkts+0xdcd/0x1990 [hfi1] [] ? gup_pud_range+0x140/0x290 [] ? hfi1_mmu_rb_insert+0x155/0x1b0 [hfi1] [] hfi1_user_sdma_process_request+0xc5b/0x11b0 [hfi1] [] hfi1_aio_write+0xba/0x110 [hfi1] [] do_sync_readv_writev+0x7b/0xd0 [] do_readv_writev+0xce/0x260 [] ? tty_ldisc_deref+0x19/0x20 [] ? n_tty_ioctl+0xe0/0xe0 [] vfs_writev+0x35/0x60 [] SyS_writev+0x7f/0x110 [] system_call_fastpath+0x1c/0x21 Code: 06 49 c7 47 18 00 00 00 00 0f 87 89 01 00 00 5b 41 5c 41 5d 41 5e 41 5f 5d c3 66 2e 0f 1f 84 00 00 00 00 00 48 8b 4e 10 48 89 fb <48> 8b 51 08 49 89 d4 83 e2 0c 41 81 e4 00 e0 00 00 48 c1 ea 02 RIP [] __sdma_txclean+0x57/0x1e0 [hfi1] RSP CR2: 0000000000000008 There are two exit points from user_sdma_send_pkts(). One (free_tx) merely frees the slab entry and one (free_txreq) cleans the sdma_txreq prior to freeing the slab entry. The free_txreq variation can only be called after one of the sdma_init*() variations has been called. In the panic case, the slab entry had been allocated but not inited. Fix the issue by exiting through free_tx thus avoiding sdma_clean(). Cc: # 4.9.x+ Fixes: 7724105686e7 ("IB/hfi1: add driver files") Reviewed-by: Mike Marciniszyn Reviewed-by: Lukasz Odzioba Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/user_sdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index c0c0e0445cbfb..8c954a0ae3b69 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -828,7 +828,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) if (ACCESS_ONCE(iovec->offset) == iovec->iov.iov_len) { if (++req->iov_idx == req->data_iovs) { ret = -EFAULT; - goto free_txreq; + goto free_tx; } iovec = &req->iovs[req->iov_idx]; WARN_ON(iovec->offset); -- GitLab From 693536a7ce39a44a424acc7937fc8ebfcc7a72ee Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Thu, 20 Sep 2018 12:59:05 -0700 Subject: [PATCH 0681/2269] IB/hfi1: Fix context recovery when PBC has an UnsupportedVL commit d623500b3c4efd8d4e945ac9003c6b87b469a9ab upstream. If a packet stream uses an UnsupportedVL (virtual lane), the send engine will not send the packet, and it will not indicate that an error has occurred. This will cause the packet stream to block. HFI has 8 virtual lanes available for packet streams. Each lane can be enabled or disabled using the UnsupportedVL mask. If a lane is disabled, adding a packet to the send context must be disallowed. The current mask for determining unsupported VLs defaults to 0 (allow all). This is incorrect. Only the VLs that are defined should be allowed. Determine which VLs are disabled (mtu == 0), and set the appropriate unsupported bit in the mask. The correct mask will allow the send engine to error on the invalid VL, and error recovery will work correctly. Cc: # 4.9.x+ Fixes: 7724105686e7 ("IB/hfi1: add driver files") Reviewed-by: Mike Marciniszyn Reviewed-by: Lukasz Odzioba Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/pio.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index a95ac62465592..19a8e6052820f 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -86,6 +86,7 @@ void pio_send_control(struct hfi1_devdata *dd, int op) unsigned long flags; int write = 1; /* write sendctrl back */ int flush = 0; /* re-read sendctrl to make sure it is flushed */ + int i; spin_lock_irqsave(&dd->sendctrl_lock, flags); @@ -95,9 +96,13 @@ void pio_send_control(struct hfi1_devdata *dd, int op) reg |= SEND_CTRL_SEND_ENABLE_SMASK; /* Fall through */ case PSC_DATA_VL_ENABLE: + mask = 0; + for (i = 0; i < ARRAY_SIZE(dd->vld); i++) + if (!dd->vld[i].mtu) + mask |= BIT_ULL(i); /* Disallow sending on VLs not enabled */ - mask = (((~0ull) << num_vls) & SEND_CTRL_UNSUPPORTED_VL_MASK) << - SEND_CTRL_UNSUPPORTED_VL_SHIFT; + mask = (mask & SEND_CTRL_UNSUPPORTED_VL_MASK) << + SEND_CTRL_UNSUPPORTED_VL_SHIFT; reg = (reg & ~SEND_CTRL_UNSUPPORTED_VL_SMASK) | mask; break; case PSC_GLOBAL_DISABLE: -- GitLab From 105470069de32b48fb8a9784ee6a8fc7b76c9791 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 31 Aug 2018 07:16:03 -0700 Subject: [PATCH 0682/2269] RDMA/uverbs: Atomically flush and mark closed the comp event queue commit 67e3816842fe6414d629c7515b955952ec40c7d7 upstream. Currently a uverbs completion event queue is flushed of events in ib_uverbs_comp_event_close() with the queue spinlock held and then released. Yet setting ev_queue->is_closed is not set until later in uverbs_hot_unplug_completion_event_file(). In between the time ib_uverbs_comp_event_close() releases the lock and uverbs_hot_unplug_completion_event_file() acquires the lock, a completion event can arrive and be inserted into the event queue by ib_uverbs_comp_handler(). This can cause a "double add" list_add warning or crash depending on the kernel configuration, or a memory leak because the event is never dequeued since the queue is already closed down. So add setting ev_queue->is_closed = 1 to ib_uverbs_comp_event_close(). Cc: stable@vger.kernel.org Fixes: 1e7710f3f656 ("IB/core: Change completion channel to use the reworked objects schema") Signed-off-by: Steve Wise Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/uverbs_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 0f70ff91276ef..aff6ef3ad52c2 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -424,6 +424,7 @@ static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp) list_del(&entry->obj_list); kfree(entry); } + file->ev_queue.is_closed = 1; spin_unlock_irq(&file->ev_queue.lock); uverbs_close_fd(filp); -- GitLab From d134e9170417c7d4ee71d8a1d195997abeee783c Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 4 Feb 2018 15:35:09 +0200 Subject: [PATCH 0683/2269] ovl: hash non-dir by lower inode for fsnotify commit 764baba80168ad3adafb521d2ab483ccbc49e344 upstream. Commit 31747eda41ef ("ovl: hash directory inodes for fsnotify") fixed an issue of inotify watch on directory that stops getting events after dropping dentry caches. A similar issue exists for non-dir non-upper files, for example: $ mkdir -p lower upper work merged $ touch lower/foo $ mount -t overlay -o lowerdir=lower,workdir=work,upperdir=upper none merged $ inotifywait merged/foo & $ echo 2 > /proc/sys/vm/drop_caches $ cat merged/foo inotifywait doesn't get the OPEN event, because ovl_lookup() called from 'cat' allocates a new overlay inode and does not reuse the watched inode. Fix this by hashing non-dir overlay inodes by lower real inode in the following cases that were not hashed before this change: - A non-upper overlay mount - A lower non-hardlink when index=off A helper ovl_hash_bylower() was added to put all the logic and documentation about which real inode an overlay inode is hashed by into one place. The issue dates back to initial version of overlayfs, but this patch depends on ovl_inode code that was introduced in kernel v4.13. Cc: #v4.13 Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi Signed-off-by: Mark Salyzyn #4.14 Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/inode.c | 62 +++++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 18 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index d60900b615f9c..efed50304b497 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -14,6 +14,7 @@ #include #include #include "overlayfs.h" +#include "ovl_entry.h" int ovl_setattr(struct dentry *dentry, struct iattr *attr) { @@ -608,39 +609,63 @@ static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, return true; } +/* + * Does overlay inode need to be hashed by lower inode? + */ +static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper, + struct dentry *lower, struct dentry *index) +{ + struct ovl_fs *ofs = sb->s_fs_info; + + /* No, if pure upper */ + if (!lower) + return false; + + /* Yes, if already indexed */ + if (index) + return true; + + /* Yes, if won't be copied up */ + if (!ofs->upper_mnt) + return true; + + /* No, if lower hardlink is or will be broken on copy up */ + if ((upper || !ovl_indexdir(sb)) && + !d_is_dir(lower) && d_inode(lower)->i_nlink > 1) + return false; + + /* No, if non-indexed upper with NFS export */ + if (sb->s_export_op && upper) + return false; + + /* Otherwise, hash by lower inode for fsnotify */ + return true; +} + struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, struct dentry *index) { + struct super_block *sb = dentry->d_sb; struct dentry *lowerdentry = ovl_dentry_lower(dentry); struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; struct inode *inode; - /* Already indexed or could be indexed on copy up? */ - bool indexed = (index || (ovl_indexdir(dentry->d_sb) && !upperdentry)); - struct dentry *origin = indexed ? lowerdentry : NULL; + bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, index); bool is_dir; - if (WARN_ON(upperdentry && indexed && !lowerdentry)) - return ERR_PTR(-EIO); - if (!realinode) realinode = d_inode(lowerdentry); /* - * Copy up origin (lower) may exist for non-indexed non-dir upper, but - * we must not use lower as hash key in that case. - * Hash non-dir that is or could be indexed by origin inode. - * Hash dir that is or could be merged by origin inode. - * Hash pure upper and non-indexed non-dir by upper inode. + * Copy up origin (lower) may exist for non-indexed upper, but we must + * not use lower as hash key if this is a broken hardlink. */ is_dir = S_ISDIR(realinode->i_mode); - if (is_dir) - origin = lowerdentry; - - if (upperdentry || origin) { - struct inode *key = d_inode(origin ?: upperdentry); + if (upperdentry || bylower) { + struct inode *key = d_inode(bylower ? lowerdentry : + upperdentry); unsigned int nlink = is_dir ? 1 : realinode->i_nlink; - inode = iget5_locked(dentry->d_sb, (unsigned long) key, + inode = iget5_locked(sb, (unsigned long) key, ovl_inode_test, ovl_inode_set, key); if (!inode) goto out_nomem; @@ -664,7 +689,8 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink); set_nlink(inode, nlink); } else { - inode = new_inode(dentry->d_sb); + /* Lower hardlink that will be broken on copy up */ + inode = new_inode(sb); if (!inode) goto out_nomem; } -- GitLab From 016d4aae9d849aa871707df58bedb5fee1d1a31c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 6 Dec 2017 12:49:13 +0000 Subject: [PATCH 0684/2269] drm/i915: Remove vma from object on destroy, not close commit 010e3e68cd9cb65ea50c0af605e966cda333cb2a upstream. Originally we translated from the object to the vma by walking obj->vma_list to find the matching vm (for user lookups). Now we process user lookups using the rbtree, and we only use obj->vma_list itself for maintaining state (e.g. ensuring that all vma are flushed or rebound). As such maintenance needs to go on beyond the user's awareness of the vma, defer removal of the vma from the obj->vma_list from i915_vma_close() to i915_vma_destroy() Fixes: 5888fc9eac3c ("drm/i915: Flush pending GTT writes before unbinding") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104155 Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20171206124914.19960-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Cc: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem.c | 3 ++- drivers/gpu/drm/i915/i915_vma.c | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3c0ce3ee07108..f354cfe63f7be 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3608,7 +3608,8 @@ restart: return -EBUSY; } - if (i915_gem_valid_gtt_space(vma, cache_level)) + if (!i915_vma_is_closed(vma) && + i915_gem_valid_gtt_space(vma, cache_level)) continue; ret = i915_vma_unbind(vma); diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 02d1a5eacb00e..76eed1fdac096 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -430,6 +430,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) u64 start, end; int ret; + GEM_BUG_ON(i915_vma_is_closed(vma)); GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); @@ -590,7 +591,9 @@ static void i915_vma_destroy(struct i915_vma *vma) GEM_BUG_ON(i915_gem_active_isset(&vma->last_read[i])); GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence)); + list_del(&vma->obj_link); list_del(&vma->vm_link); + if (!i915_vma_is_ggtt(vma)) i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); @@ -602,7 +605,6 @@ void i915_vma_close(struct i915_vma *vma) GEM_BUG_ON(i915_vma_is_closed(vma)); vma->flags |= I915_VMA_CLOSED; - list_del(&vma->obj_link); rb_erase(&vma->obj_node, &vma->obj->vma_tree); if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) -- GitLab From d428e43eb684af5f3aaad1b2f67e55276ffd6bd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 20 Sep 2018 14:11:17 +0200 Subject: [PATCH 0685/2269] serial: imx: restore handshaking irq for imx1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7e620984b62532783912312e334f3c48cdacbd5d upstream. Back in 2015 when irda was dropped from the driver imx1 was broken. This change reintroduces the support for the third interrupt of the UART. Fixes: afe9cbb1a6ad ("serial: imx: drop support for IRDA") Cc: stable Signed-off-by: Uwe Kleine-König Reviewed-by: Leonard Crestez Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 8deaf2ad8b34a..4e827e5a52a36 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -2213,6 +2213,14 @@ static int serial_imx_probe(struct platform_device *pdev) ret); return ret; } + + ret = devm_request_irq(&pdev->dev, rtsirq, imx_rtsint, 0, + dev_name(&pdev->dev), sport); + if (ret) { + dev_err(&pdev->dev, "failed to request rts irq: %d\n", + ret); + return ret; + } } else { ret = devm_request_irq(&pdev->dev, rxirq, imx_int, 0, dev_name(&pdev->dev), sport); -- GitLab From 38d070f9090af15b5bdb26fc0e084b22f34eabd9 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 27 Sep 2018 16:53:21 +0100 Subject: [PATCH 0686/2269] arm64: KVM: Tighten guest core register access from userspace commit d26c25a9d19b5976b319af528886f89cf455692d upstream. We currently allow userspace to access the core register file in about any possible way, including straddling multiple registers and doing unaligned accesses. This is not the expected use of the ABI, and nobody is actually using it that way. Let's tighten it by explicitly checking the size and alignment for each field of the register file. Cc: Fixes: 2f4a07c5f9fe ("arm64: KVM: guest one-reg interface") Reviewed-by: Christoffer Dall Reviewed-by: Mark Rutland Signed-off-by: Dave Martin [maz: rewrote Dave's initial patch to be more easily backported] Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/guest.c | 45 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 811f04c5760e4..3f4817dd69ae3 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -57,6 +57,45 @@ static u64 core_reg_offset_from_id(u64 id) return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); } +static int validate_core_offset(const struct kvm_one_reg *reg) +{ + u64 off = core_reg_offset_from_id(reg->id); + int size; + + switch (off) { + case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... + KVM_REG_ARM_CORE_REG(regs.regs[30]): + case KVM_REG_ARM_CORE_REG(regs.sp): + case KVM_REG_ARM_CORE_REG(regs.pc): + case KVM_REG_ARM_CORE_REG(regs.pstate): + case KVM_REG_ARM_CORE_REG(sp_el1): + case KVM_REG_ARM_CORE_REG(elr_el1): + case KVM_REG_ARM_CORE_REG(spsr[0]) ... + KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]): + size = sizeof(__u64); + break; + + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + size = sizeof(__uint128_t); + break; + + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + size = sizeof(__u32); + break; + + default: + return -EINVAL; + } + + if (KVM_REG_SIZE(reg->id) == size && + IS_ALIGNED(off, size / sizeof(__u32))) + return 0; + + return -EINVAL; +} + static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { /* @@ -76,6 +115,9 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; + if (validate_core_offset(reg)) + return -EINVAL; + if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id))) return -EFAULT; @@ -98,6 +140,9 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; + if (validate_core_offset(reg)) + return -EINVAL; + if (KVM_REG_SIZE(reg->id) > sizeof(tmp)) return -EINVAL; -- GitLab From 33906ae926e0634e8eea89ec36858ab4a36bcd28 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 20 Aug 2018 00:01:42 +0300 Subject: [PATCH 0687/2269] qed: Wait for ready indication before rereading the shmem [ Upstream commit f00d25f3154b676fcea4502a25b94bd7f142ca74 ] The MFW might be reset and re-update its shared memory. Upon the detection of such a reset the driver rereads this memory, but it has to wait till the data is valid. This patch adds the missing wait for a data ready indication. Signed-off-by: Tomer Tayar Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 50 +++++++++++++++++++---- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 3c469355f5a43..9348d367cfdfb 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -182,18 +182,57 @@ int qed_mcp_free(struct qed_hwfn *p_hwfn) return 0; } +/* Maximum of 1 sec to wait for the SHMEM ready indication */ +#define QED_MCP_SHMEM_RDY_MAX_RETRIES 20 +#define QED_MCP_SHMEM_RDY_ITER_MS 50 + static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { struct qed_mcp_info *p_info = p_hwfn->mcp_info; + u8 cnt = QED_MCP_SHMEM_RDY_MAX_RETRIES; + u8 msec = QED_MCP_SHMEM_RDY_ITER_MS; u32 drv_mb_offsize, mfw_mb_offsize; u32 mcp_pf_id = MCP_PF_ID(p_hwfn); p_info->public_base = qed_rd(p_hwfn, p_ptt, MISC_REG_SHARED_MEM_ADDR); - if (!p_info->public_base) - return 0; + if (!p_info->public_base) { + DP_NOTICE(p_hwfn, + "The address of the MCP scratch-pad is not configured\n"); + return -EINVAL; + } p_info->public_base |= GRCBASE_MCP; + /* Get the MFW MB address and number of supported messages */ + mfw_mb_offsize = qed_rd(p_hwfn, p_ptt, + SECTION_OFFSIZE_ADDR(p_info->public_base, + PUBLIC_MFW_MB)); + p_info->mfw_mb_addr = SECTION_ADDR(mfw_mb_offsize, mcp_pf_id); + p_info->mfw_mb_length = (u16)qed_rd(p_hwfn, p_ptt, + p_info->mfw_mb_addr + + offsetof(struct public_mfw_mb, + sup_msgs)); + + /* The driver can notify that there was an MCP reset, and might read the + * SHMEM values before the MFW has completed initializing them. + * To avoid this, the "sup_msgs" field in the MFW mailbox is used as a + * data ready indication. + */ + while (!p_info->mfw_mb_length && --cnt) { + msleep(msec); + p_info->mfw_mb_length = + (u16)qed_rd(p_hwfn, p_ptt, + p_info->mfw_mb_addr + + offsetof(struct public_mfw_mb, sup_msgs)); + } + + if (!cnt) { + DP_NOTICE(p_hwfn, + "Failed to get the SHMEM ready notification after %d msec\n", + QED_MCP_SHMEM_RDY_MAX_RETRIES * msec); + return -EBUSY; + } + /* Calculate the driver and MFW mailbox address */ drv_mb_offsize = qed_rd(p_hwfn, p_ptt, SECTION_OFFSIZE_ADDR(p_info->public_base, @@ -203,13 +242,6 @@ static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) "drv_mb_offsiz = 0x%x, drv_mb_addr = 0x%x mcp_pf_id = 0x%x\n", drv_mb_offsize, p_info->drv_mb_addr, mcp_pf_id); - /* Set the MFW MB address */ - mfw_mb_offsize = qed_rd(p_hwfn, p_ptt, - SECTION_OFFSIZE_ADDR(p_info->public_base, - PUBLIC_MFW_MB)); - p_info->mfw_mb_addr = SECTION_ADDR(mfw_mb_offsize, mcp_pf_id); - p_info->mfw_mb_length = (u16)qed_rd(p_hwfn, p_ptt, p_info->mfw_mb_addr); - /* Get the current driver mailbox sequence before sending * the first command */ -- GitLab From 73046b822c4ce80847da902af458c30cb8b1558b Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 20 Aug 2018 00:01:43 +0300 Subject: [PATCH 0688/2269] qed: Wait for MCP halt and resume commands to take place [ Upstream commit 76271809f49056f079e202bf6513d17b0d6dd34d ] Successive iterations of halting and resuming the management chip (MCP) might fail, since currently the driver doesn't wait for these operations to actually take place. This patch prevents the driver from moving forward before the operations are reflected in the state register. Signed-off-by: Tomer Tayar Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 46 +++++++++++++++---- .../net/ethernet/qlogic/qed/qed_reg_addr.h | 1 + 2 files changed, 39 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 9348d367cfdfb..5748bcb26d40e 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -1998,31 +1998,61 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn, return rc; } +/* A maximal 100 msec waiting time for the MCP to halt */ +#define QED_MCP_HALT_SLEEP_MS 10 +#define QED_MCP_HALT_MAX_RETRIES 10 + int qed_mcp_halt(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { - u32 resp = 0, param = 0; + u32 resp = 0, param = 0, cpu_state, cnt = 0; int rc; rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MCP_HALT, 0, &resp, ¶m); - if (rc) + if (rc) { DP_ERR(p_hwfn, "MCP response failure, aborting\n"); + return rc; + } - return rc; + do { + msleep(QED_MCP_HALT_SLEEP_MS); + cpu_state = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_STATE); + if (cpu_state & MCP_REG_CPU_STATE_SOFT_HALTED) + break; + } while (++cnt < QED_MCP_HALT_MAX_RETRIES); + + if (cnt == QED_MCP_HALT_MAX_RETRIES) { + DP_NOTICE(p_hwfn, + "Failed to halt the MCP [CPU_MODE = 0x%08x, CPU_STATE = 0x%08x]\n", + qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE), cpu_state); + return -EBUSY; + } + + return 0; } +#define QED_MCP_RESUME_SLEEP_MS 10 + int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { - u32 value, cpu_mode; + u32 cpu_mode, cpu_state; qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_STATE, 0xffffffff); - value = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE); - value &= ~MCP_REG_CPU_MODE_SOFT_HALT; - qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_MODE, value); cpu_mode = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE); + cpu_mode &= ~MCP_REG_CPU_MODE_SOFT_HALT; + qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_MODE, cpu_mode); + msleep(QED_MCP_RESUME_SLEEP_MS); + cpu_state = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_STATE); - return (cpu_mode & MCP_REG_CPU_MODE_SOFT_HALT) ? -EAGAIN : 0; + if (cpu_state & MCP_REG_CPU_STATE_SOFT_HALTED) { + DP_NOTICE(p_hwfn, + "Failed to resume the MCP [CPU_MODE = 0x%08x, CPU_STATE = 0x%08x]\n", + cpu_mode, cpu_state); + return -EBUSY; + } + + return 0; } int qed_mcp_ov_update_current_config(struct qed_hwfn *p_hwfn, diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h index 0cdb4337b3a05..887cfb16b53e2 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h +++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h @@ -554,6 +554,7 @@ 0 #define MCP_REG_CPU_STATE \ 0xe05004UL +#define MCP_REG_CPU_STATE_SOFT_HALTED (0x1UL << 10) #define MCP_REG_CPU_EVENT_MASK \ 0xe05008UL #define PGLUE_B_REG_PF_BAR0_SIZE \ -- GitLab From 583f866501c1f4294670d29b242e57e8470e7bf9 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 20 Aug 2018 00:01:44 +0300 Subject: [PATCH 0689/2269] qed: Prevent a possible deadlock during driver load and unload [ Upstream commit eaa50fc59e5841910987e90b0438b2643041f508 ] The MFW manages an internal lock to prevent concurrent hardware (de)initialization of different PFs. This, together with the busy-waiting for the MFW's responses for commands, might lead to a deadlock during concurrent load or unload of PFs. This patch adds the option to sleep within the busy-waiting, and uses it for the (un)load requests (which are not sent from an interrupt context) to prevent the possible deadlock. Signed-off-by: Tomer Tayar Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 43 ++++++++++++++++------- drivers/net/ethernet/qlogic/qed/qed_mcp.h | 21 ++++++----- 2 files changed, 44 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 5748bcb26d40e..a1e1092fa3ce3 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -47,7 +47,7 @@ #include "qed_reg_addr.h" #include "qed_sriov.h" -#define CHIP_MCP_RESP_ITER_US 10 +#define QED_MCP_RESP_ITER_US 10 #define QED_DRV_MB_MAX_RETRIES (500 * 1000) /* Account for 5 sec */ #define QED_MCP_RESET_RETRIES (50 * 1000) /* Account for 500 msec */ @@ -316,7 +316,7 @@ static void qed_mcp_reread_offsets(struct qed_hwfn *p_hwfn, int qed_mcp_reset(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { - u32 org_mcp_reset_seq, seq, delay = CHIP_MCP_RESP_ITER_US, cnt = 0; + u32 org_mcp_reset_seq, seq, delay = QED_MCP_RESP_ITER_US, cnt = 0; int rc = 0; /* Ensure that only a single thread is accessing the mailbox */ @@ -448,10 +448,10 @@ static int _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, struct qed_mcp_mb_params *p_mb_params, - u32 max_retries, u32 delay) + u32 max_retries, u32 usecs) { + u32 cnt = 0, msecs = DIV_ROUND_UP(usecs, 1000); struct qed_mcp_cmd_elem *p_cmd_elem; - u32 cnt = 0; u16 seq_num; int rc = 0; @@ -474,7 +474,11 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, goto err; spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); - udelay(delay); + + if (QED_MB_FLAGS_IS_SET(p_mb_params, CAN_SLEEP)) + msleep(msecs); + else + udelay(usecs); } while (++cnt < max_retries); if (cnt >= max_retries) { @@ -503,7 +507,11 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, * The spinlock stays locked until the list element is removed. */ - udelay(delay); + if (QED_MB_FLAGS_IS_SET(p_mb_params, CAN_SLEEP)) + msleep(msecs); + else + udelay(usecs); + spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); if (p_cmd_elem->b_is_completed) @@ -538,7 +546,7 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, "MFW mailbox: response 0x%08x param 0x%08x [after %d.%03d ms]\n", p_mb_params->mcp_resp, p_mb_params->mcp_param, - (cnt * delay) / 1000, (cnt * delay) % 1000); + (cnt * usecs) / 1000, (cnt * usecs) % 1000); /* Clear the sequence number from the MFW response */ p_mb_params->mcp_resp &= FW_MSG_CODE_MASK; @@ -556,7 +564,7 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, { size_t union_data_size = sizeof(union drv_union_data); u32 max_retries = QED_DRV_MB_MAX_RETRIES; - u32 delay = CHIP_MCP_RESP_ITER_US; + u32 usecs = QED_MCP_RESP_ITER_US; /* MCP not initialized */ if (!qed_mcp_is_init(p_hwfn)) { @@ -573,8 +581,13 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, return -EINVAL; } + if (QED_MB_FLAGS_IS_SET(p_mb_params, CAN_SLEEP)) { + max_retries = DIV_ROUND_UP(max_retries, 1000); + usecs *= 1000; + } + return _qed_mcp_cmd_and_union(p_hwfn, p_ptt, p_mb_params, max_retries, - delay); + usecs); } int qed_mcp_cmd(struct qed_hwfn *p_hwfn, @@ -763,6 +776,7 @@ __qed_mcp_load_req(struct qed_hwfn *p_hwfn, mb_params.data_src_size = sizeof(load_req); mb_params.p_data_dst = &load_rsp; mb_params.data_dst_size = sizeof(load_rsp); + mb_params.flags = QED_MB_FLAG_CAN_SLEEP; DP_VERBOSE(p_hwfn, QED_MSG_SP, "Load Request: param 0x%08x [init_hw %d, drv_type %d, hsi_ver %d, pda 0x%04x]\n", @@ -984,7 +998,8 @@ int qed_mcp_load_req(struct qed_hwfn *p_hwfn, int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { - u32 wol_param, mcp_resp, mcp_param; + struct qed_mcp_mb_params mb_params; + u32 wol_param; switch (p_hwfn->cdev->wol_config) { case QED_OV_WOL_DISABLED: @@ -1002,8 +1017,12 @@ int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) wol_param = DRV_MB_PARAM_UNLOAD_WOL_MCP; } - return qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_UNLOAD_REQ, wol_param, - &mcp_resp, &mcp_param); + memset(&mb_params, 0, sizeof(mb_params)); + mb_params.cmd = DRV_MSG_CODE_UNLOAD_REQ; + mb_params.param = wol_param; + mb_params.flags = QED_MB_FLAG_CAN_SLEEP; + + return qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); } int qed_mcp_unload_done(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index c7ec2395d1ceb..e5d0483f3c440 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -565,14 +565,19 @@ struct qed_mcp_info { }; struct qed_mcp_mb_params { - u32 cmd; - u32 param; - void *p_data_src; - u8 data_src_size; - void *p_data_dst; - u8 data_dst_size; - u32 mcp_resp; - u32 mcp_param; + u32 cmd; + u32 param; + void *p_data_src; + void *p_data_dst; + u8 data_src_size; + u8 data_dst_size; + u32 mcp_resp; + u32 mcp_param; + u32 flags; +#define QED_MB_FLAG_CAN_SLEEP (0x1 << 0) +#define QED_MB_FLAGS_IS_SET(params, flag) \ + ({ typeof(params) __params = (params); \ + (__params && (__params->flags & QED_MB_FLAG_ ## flag)); }) }; /** -- GitLab From 308206bd277099607d490b4c5337c1eee7d7a1dd Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 20 Aug 2018 00:01:45 +0300 Subject: [PATCH 0690/2269] qed: Avoid sending mailbox commands when MFW is not responsive [ Upstream commit b310974e041913231b6e3d5d475d4df55c312301 ] Keep sending mailbox commands to the MFW when it is not responsive ends up with a redundant amount of timeout expiries. This patch prints the MCP status on the first command which is not responded, and blocks the following commands. Since the (un)load request commands might be not responded due to other PFs, the patch also adds the option to skip the blocking upon a failure. Signed-off-by: Tomer Tayar Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 52 ++++++++++++++++++- drivers/net/ethernet/qlogic/qed/qed_mcp.h | 6 ++- .../net/ethernet/qlogic/qed/qed_reg_addr.h | 1 + 3 files changed, 56 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index a1e1092fa3ce3..7938abe9a3010 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -319,6 +319,12 @@ int qed_mcp_reset(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) u32 org_mcp_reset_seq, seq, delay = QED_MCP_RESP_ITER_US, cnt = 0; int rc = 0; + if (p_hwfn->mcp_info->b_block_cmd) { + DP_NOTICE(p_hwfn, + "The MFW is not responsive. Avoid sending MCP_RESET mailbox command.\n"); + return -EBUSY; + } + /* Ensure that only a single thread is accessing the mailbox */ spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); @@ -444,6 +450,33 @@ static void __qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, (p_mb_params->cmd | seq_num), p_mb_params->param); } +static void qed_mcp_cmd_set_blocking(struct qed_hwfn *p_hwfn, bool block_cmd) +{ + p_hwfn->mcp_info->b_block_cmd = block_cmd; + + DP_INFO(p_hwfn, "%s sending of mailbox commands to the MFW\n", + block_cmd ? "Block" : "Unblock"); +} + +static void qed_mcp_print_cpu_info(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + u32 cpu_mode, cpu_state, cpu_pc_0, cpu_pc_1, cpu_pc_2; + u32 delay = QED_MCP_RESP_ITER_US; + + cpu_mode = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE); + cpu_state = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_STATE); + cpu_pc_0 = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_PROGRAM_COUNTER); + udelay(delay); + cpu_pc_1 = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_PROGRAM_COUNTER); + udelay(delay); + cpu_pc_2 = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_PROGRAM_COUNTER); + + DP_NOTICE(p_hwfn, + "MCP CPU info: mode 0x%08x, state 0x%08x, pc {0x%08x, 0x%08x, 0x%08x}\n", + cpu_mode, cpu_state, cpu_pc_0, cpu_pc_1, cpu_pc_2); +} + static int _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, @@ -530,11 +563,15 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, DP_NOTICE(p_hwfn, "The MFW failed to respond to command 0x%08x [param 0x%08x].\n", p_mb_params->cmd, p_mb_params->param); + qed_mcp_print_cpu_info(p_hwfn, p_ptt); spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); qed_mcp_cmd_del_elem(p_hwfn, p_cmd_elem); spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); + if (!QED_MB_FLAGS_IS_SET(p_mb_params, AVOID_BLOCK)) + qed_mcp_cmd_set_blocking(p_hwfn, true); + return -EAGAIN; } @@ -572,6 +609,13 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, return -EBUSY; } + if (p_hwfn->mcp_info->b_block_cmd) { + DP_NOTICE(p_hwfn, + "The MFW is not responsive. Avoid sending mailbox command 0x%08x [param 0x%08x].\n", + p_mb_params->cmd, p_mb_params->param); + return -EBUSY; + } + if (p_mb_params->data_src_size > union_data_size || p_mb_params->data_dst_size > union_data_size) { DP_ERR(p_hwfn, @@ -776,7 +820,7 @@ __qed_mcp_load_req(struct qed_hwfn *p_hwfn, mb_params.data_src_size = sizeof(load_req); mb_params.p_data_dst = &load_rsp; mb_params.data_dst_size = sizeof(load_rsp); - mb_params.flags = QED_MB_FLAG_CAN_SLEEP; + mb_params.flags = QED_MB_FLAG_CAN_SLEEP | QED_MB_FLAG_AVOID_BLOCK; DP_VERBOSE(p_hwfn, QED_MSG_SP, "Load Request: param 0x%08x [init_hw %d, drv_type %d, hsi_ver %d, pda 0x%04x]\n", @@ -1020,7 +1064,7 @@ int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) memset(&mb_params, 0, sizeof(mb_params)); mb_params.cmd = DRV_MSG_CODE_UNLOAD_REQ; mb_params.param = wol_param; - mb_params.flags = QED_MB_FLAG_CAN_SLEEP; + mb_params.flags = QED_MB_FLAG_CAN_SLEEP | QED_MB_FLAG_AVOID_BLOCK; return qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); } @@ -2047,6 +2091,8 @@ int qed_mcp_halt(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) return -EBUSY; } + qed_mcp_cmd_set_blocking(p_hwfn, true); + return 0; } @@ -2071,6 +2117,8 @@ int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) return -EBUSY; } + qed_mcp_cmd_set_blocking(p_hwfn, false); + return 0; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index e5d0483f3c440..f1fe5e3427ea5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -540,11 +540,14 @@ struct qed_mcp_info { */ spinlock_t cmd_lock; + /* Flag to indicate whether sending a MFW mailbox command is blocked */ + bool b_block_cmd; + /* Spinlock used for syncing SW link-changes and link-changes * originating from attention context. */ spinlock_t link_lock; - bool block_mb_sending; + u32 public_base; u32 drv_mb_addr; u32 mfw_mb_addr; @@ -575,6 +578,7 @@ struct qed_mcp_mb_params { u32 mcp_param; u32 flags; #define QED_MB_FLAG_CAN_SLEEP (0x1 << 0) +#define QED_MB_FLAG_AVOID_BLOCK (0x1 << 1) #define QED_MB_FLAGS_IS_SET(params, flag) \ ({ typeof(params) __params = (params); \ (__params && (__params->flags & QED_MB_FLAG_ ## flag)); }) diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h index 887cfb16b53e2..d1201bb2d4bb7 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h +++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h @@ -557,6 +557,7 @@ #define MCP_REG_CPU_STATE_SOFT_HALTED (0x1UL << 10) #define MCP_REG_CPU_EVENT_MASK \ 0xe05008UL +#define MCP_REG_CPU_PROGRAM_COUNTER 0xe0501cUL #define PGLUE_B_REG_PF_BAR0_SIZE \ 0x2aae60UL #define PGLUE_B_REG_PF_BAR1_SIZE \ -- GitLab From 083be6fbfdcb06b59792fd5b3468069daa1bf8c2 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 31 Jul 2018 00:56:49 +0800 Subject: [PATCH 0691/2269] thermal: of-thermal: disable passive polling when thermal zone is disabled [ Upstream commit 152395fd03d4ce1e535a75cdbf58105e50587611 ] When thermal zone is in passive mode, disabling its mode from sysfs is NOT taking effect at all, it is still polling the temperature of the disabled thermal zone and handling all thermal trips, it makes user confused. The disabling operation should disable the thermal zone behavior completely, for both active and passive mode, this patch clears the passive_delay when thermal zone is disabled and restores it when it is enabled. Signed-off-by: Anson Huang Signed-off-by: Eduardo Valentin Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/of-thermal.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c index d04ec3b9e5ff2..8a70b57d129cf 100644 --- a/drivers/thermal/of-thermal.c +++ b/drivers/thermal/of-thermal.c @@ -278,10 +278,13 @@ static int of_thermal_set_mode(struct thermal_zone_device *tz, mutex_lock(&tz->lock); - if (mode == THERMAL_DEVICE_ENABLED) + if (mode == THERMAL_DEVICE_ENABLED) { tz->polling_delay = data->polling_delay; - else + tz->passive_delay = data->passive_delay; + } else { tz->polling_delay = 0; + tz->passive_delay = 0; + } mutex_unlock(&tz->lock); -- GitLab From 97ee8505c63706e28d1354ea5fc721a47ee84850 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 16 Aug 2018 21:44:02 -0500 Subject: [PATCH 0692/2269] isofs: reject hardware sector size > 2048 bytes [ Upstream commit 09a4e0be5826aa66c4ce9954841f110ffe63ef4f ] The largest block size supported by isofs is ISOFS_BLOCK_SIZE (2048), but isofs_fill_super calls sb_min_blocksize and sets the blocksize to the device's logical block size if it's larger than what we ended up with after option parsing. If for some reason we try to mount a hard 4k device as an isofs filesystem, we'll set opt.blocksize to 4096, and when we try to read the superblock we found via: block = iso_blknum << (ISOFS_BLOCK_BITS - s->s_blocksize_bits) with s_blocksize_bits greater than ISOFS_BLOCK_BITS, we'll have a negative shift and the bread will fail somewhat cryptically: isofs_fill_super: bread failed, dev=sda, iso_blknum=17, block=-2147483648 It seems best to just catch and clearly reject mounts of such a device. Reported-by: Bryan Gurney Signed-off-by: Eric Sandeen Signed-off-by: Jan Kara Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/isofs/inode.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index ed4edcd2bc56d..a4994e25e19e7 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "isofs.h" #include "zisofs.h" @@ -653,6 +654,12 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) /* * What if bugger tells us to go beyond page size? */ + if (bdev_logical_block_size(s->s_bdev) > 2048) { + printk(KERN_WARNING + "ISOFS: unsupported/invalid hardware sector size %d\n", + bdev_logical_block_size(s->s_bdev)); + goto out_freesbi; + } opt.blocksize = sb_min_blocksize(s, opt.blocksize); sbi->s_high_sierra = 0; /* default is iso9660 */ -- GitLab From f0a8c1257fc3e8310a14c8f9d654620370cf568f Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 22 Aug 2018 08:37:32 -0700 Subject: [PATCH 0693/2269] tls: possible hang when do_tcp_sendpages hits sndbuf is full case [ Upstream commit 67db7cd249e71f64346f481b629724376d063e08 ] Currently, the lower protocols sk_write_space handler is not called if TLS is sending a scatterlist via tls_push_sg. However, normally tls_push_sg calls do_tcp_sendpage, which may be under memory pressure, that in turn may trigger a wait via sk_wait_event. Typically, this happens when the in-flight bytes exceed the sdnbuf size. In the normal case when enough ACKs are received sk_write_space() will be called and the sk_wait_event will be woken up allowing it to send more data and/or return to the user. But, in the TLS case because the sk_write_space() handler does not wake up the events the above send will wait until the sndtimeo is exceeded. By default this is MAX_SCHEDULE_TIMEOUT so it look like a hang to the user (especially this impatient user). To fix this pass the sk_write_space event to the lower layers sk_write_space event which in the TCP case will wake any pending events. I observed the above while integrating sockmap and ktls. It initially appeared as test_sockmap (modified to use ktls) occasionally hanging. To reliably reproduce this reduce the sndbuf size and stress the tls layer by sending many 1B sends. This results in every byte needing a header and each byte individually being sent to the crypto layer. Signed-off-by: John Fastabend Acked-by: Dave Watson Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_main.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index efa2cdba99d30..4f2971f528dbd 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -195,9 +195,14 @@ static void tls_write_space(struct sock *sk) { struct tls_context *ctx = tls_get_ctx(sk); - /* We are already sending pages, ignore notification */ - if (ctx->in_tcp_sendpages) + /* If in_tcp_sendpages call lower protocol write space handler + * to ensure we wake up any waiting operations there. For example + * if do_tcp_sendpages where to call sk_wait_event. + */ + if (ctx->in_tcp_sendpages) { + ctx->sk_write_space(sk); return; + } if (!sk->sk_write_pending && tls_is_pending_closed_record(ctx)) { gfp_t sk_allocation = sk->sk_allocation; -- GitLab From 92935e1c2a7e27795456f5058759dbb3acf67e2c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 22 Aug 2018 08:37:37 -0700 Subject: [PATCH 0694/2269] bpf: sockmap: write_space events need to be passed to TCP handler [ Upstream commit 9b2e0388bec8ec5427403e23faff3b58dd1c3200 ] When sockmap code is using the stream parser it also handles the write space events in order to handle the case where (a) verdict redirects skb to another socket and (b) the sockmap then sends the skb but due to memory constraints (or other EAGAIN errors) needs to do a retry. But the initial code missed a third case where the skb_send_sock_locked() triggers an sk_wait_event(). A typically case would be when sndbuf size is exceeded. If this happens because we do not pass the write_space event to the lower layers we never wake up the event and it will wait for sndtimeo. Which as noted in ktls fix may be rather large and look like a hang to the user. To reproduce the best test is to reduce the sndbuf size and send 1B data chunks to stress the memory handling. To fix this pass the event from the upper layer to the lower layer. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/sockmap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 53a4787c08d80..20eaddfa691c2 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -313,12 +313,15 @@ out: static void smap_write_space(struct sock *sk) { struct smap_psock *psock; + void (*write_space)(struct sock *sk); rcu_read_lock(); psock = smap_psock_sk(sk); if (likely(psock && test_bit(SMAP_TX_RUNNING, &psock->state))) schedule_work(&psock->tx_work); + write_space = psock->save_write_space; rcu_read_unlock(); + write_space(sk); } static void smap_stop_sock(struct smap_psock *psock, struct sock *sk) -- GitLab From 333f26129fd9f932ed50fdb71983ce3d950f8a70 Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Thu, 23 Aug 2018 11:10:10 +0800 Subject: [PATCH 0695/2269] net: hns: fix length and page_offset overflow when CONFIG_ARM64_64K_PAGES [ Upstream commit 3ed614dce3ca9912d22be215ff0f11104b69fe62 ] When enable the config item "CONFIG_ARM64_64K_PAGES", the size of PAGE_SIZE is 65536(64K). But the type of length and page_offset are u16, they will overflow. So change them to u32. Fixes: 6fe6611ff275 ("net: add Hisilicon Network Subsystem hnae framework support") Signed-off-by: Huazhong Tan Signed-off-by: Salil Mehta Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/hisilicon/hns/hnae.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h index fa5b30f547f66..cad52bd331f7b 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.h +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h @@ -220,10 +220,10 @@ struct hnae_desc_cb { /* priv data for the desc, e.g. skb when use with ip stack*/ void *priv; - u16 page_offset; - u16 reuse_flag; + u32 page_offset; + u32 length; /* length of the buffer */ - u16 length; /* length of the buffer */ + u16 reuse_flag; /* desc type, used by the ring user to mark the type of the priv data */ u16 type; -- GitLab From e8baff89bc3f79fa1977108c55a02e41d876132b Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Thu, 23 Aug 2018 11:10:12 +0800 Subject: [PATCH 0696/2269] net: hns: fix skb->truesize underestimation [ Upstream commit b1ccd4c0ab6ef499f47dd84ed4920502a7147bba ] skb->truesize is not meant to be tracking amount of used bytes in a skb, but amount of reserved/consumed bytes in memory. For instance, if we use a single byte in last page fragment, we have to account the full size of the fragment. So skb_add_rx_frag needs to calculate the length of the entire buffer into turesize. Fixes: 9cbe9fd5214e ("net: hns: optimize XGE capability by reducing cpu usage") Signed-off-by: Huazhong tan Signed-off-by: Salil Mehta Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index e77192683dbad..25a9732afc842 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -530,7 +530,7 @@ static void hns_nic_reuse_page(struct sk_buff *skb, int i, } skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len, - size - pull_len, truesize - pull_len); + size - pull_len, truesize); /* avoid re-using remote pages,flag default unreuse */ if (unlikely(page_to_nid(desc_cb->priv) != numa_node_id())) -- GitLab From 716865940461b64a42e637992483708ca90d51f7 Mon Sep 17 00:00:00 2001 From: Bo Chen Date: Mon, 23 Jul 2018 09:01:29 -0700 Subject: [PATCH 0697/2269] e1000: check on netif_running() before calling e1000_up() [ Upstream commit cf1acec008f8d7761aa3fd7c4bca7e17b2d2512d ] When the device is not up, the call to 'e1000_up()' from the error handling path of 'e1000_set_ringparam()' causes a kernel oops with a null-pointer dereference. The null-pointer dereference is triggered in function 'e1000_alloc_rx_buffers()' at line 'buffer_info = &rx_ring->buffer_info[i]'. This bug was reported by COD, a tool for testing kernel module binaries I am building. This bug was also detected by KFI from Dr. Kai Cong. This patch fixes the bug by checking on 'netif_running()' before calling 'e1000_up()' in 'e1000_set_ringparam()'. Signed-off-by: Bo Chen Acked-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/e1000/e1000_ethtool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index 3b3983a1ffbba..d44d3643677dc 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -664,7 +664,8 @@ err_setup_rx: err_alloc_rx: kfree(txdr); err_alloc_tx: - e1000_up(adapter); + if (netif_running(adapter->netdev)) + e1000_up(adapter); err_setup: clear_bit(__E1000_RESETTING, &adapter->flags); return err; -- GitLab From 243af256387c60f4ccdbe6b009425bb4efa0f00e Mon Sep 17 00:00:00 2001 From: Bo Chen Date: Mon, 23 Jul 2018 09:01:30 -0700 Subject: [PATCH 0698/2269] e1000: ensure to free old tx/rx rings in set_ringparam() [ Upstream commit ee400a3f1bfe7004a3e14b81c38ccc5583c26295 ] In 'e1000_set_ringparam()', the tx_ring and rx_ring are updated with new value and the old tx/rx rings are freed only when the device is up. There are resource leaks on old tx/rx rings when the device is not up. This bug is reported by COD, a tool for testing kernel module binaries I am building. This patch fixes the bug by always calling 'kfree()' on old tx/rx rings in 'e1000_set_ringparam()'. Signed-off-by: Bo Chen Reviewed-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/e1000/e1000_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index d44d3643677dc..10df2d60c1814 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -644,14 +644,14 @@ static int e1000_set_ringparam(struct net_device *netdev, adapter->tx_ring = tx_old; e1000_free_all_rx_resources(adapter); e1000_free_all_tx_resources(adapter); - kfree(tx_old); - kfree(rx_old); adapter->rx_ring = rxdr; adapter->tx_ring = txdr; err = e1000_up(adapter); if (err) goto err_setup; } + kfree(tx_old); + kfree(rx_old); clear_bit(__E1000_RESETTING, &adapter->flags); return 0; -- GitLab From 59f5838cc95070135fe5137125d4ab7fdb471800 Mon Sep 17 00:00:00 2001 From: Srikanth Jampala Date: Wed, 22 Aug 2018 12:40:52 +0530 Subject: [PATCH 0699/2269] crypto: cavium/nitrox - fix for command corruption in queue full case with backlog submissions. [ Upstream commit 3d7c82060d1fe65bde4023aac41a0b1bd7718e07 ] Earlier used to post the current command without checking queue full after backlog submissions. So, post the current command only after confirming the space in queue after backlog submissions. Maintain host write index instead of reading device registers to get the next free slot to post the command. Return -ENOSPC in queue full case. Signed-off-by: Srikanth Jampala Reviewed-by: Gadam Sreerama Tested-by: Jha, Chandan Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/cavium/nitrox/nitrox_dev.h | 3 +- drivers/crypto/cavium/nitrox/nitrox_lib.c | 1 + drivers/crypto/cavium/nitrox/nitrox_reqmgr.c | 57 +++++++++++--------- 3 files changed, 35 insertions(+), 26 deletions(-) diff --git a/drivers/crypto/cavium/nitrox/nitrox_dev.h b/drivers/crypto/cavium/nitrox/nitrox_dev.h index 9a476bb6d4c7e..af596455b420f 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_dev.h +++ b/drivers/crypto/cavium/nitrox/nitrox_dev.h @@ -35,6 +35,7 @@ struct nitrox_cmdq { /* requests in backlog queues */ atomic_t backlog_count; + int write_idx; /* command size 32B/64B */ u8 instr_size; u8 qno; @@ -87,7 +88,7 @@ struct nitrox_bh { struct bh_data *slc; }; -/* NITROX-5 driver state */ +/* NITROX-V driver state */ #define NITROX_UCODE_LOADED 0 #define NITROX_READY 1 diff --git a/drivers/crypto/cavium/nitrox/nitrox_lib.c b/drivers/crypto/cavium/nitrox/nitrox_lib.c index 4fdc921ba611b..9906c00866476 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_lib.c +++ b/drivers/crypto/cavium/nitrox/nitrox_lib.c @@ -36,6 +36,7 @@ static int cmdq_common_init(struct nitrox_cmdq *cmdq) cmdq->head = PTR_ALIGN(cmdq->head_unaligned, PKT_IN_ALIGN); cmdq->dma = PTR_ALIGN(cmdq->dma_unaligned, PKT_IN_ALIGN); cmdq->qsize = (qsize + PKT_IN_ALIGN); + cmdq->write_idx = 0; spin_lock_init(&cmdq->response_lock); spin_lock_init(&cmdq->cmdq_lock); diff --git a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c index 4addc238a6ef3..4adf28176a4e4 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c +++ b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c @@ -43,6 +43,16 @@ * Invalid flag options in AES-CCM IV. */ +static inline int incr_index(int index, int count, int max) +{ + if ((index + count) >= max) + index = index + count - max; + else + index += count; + + return index; +} + /** * dma_free_sglist - unmap and free the sg lists. * @ndev: N5 device @@ -427,30 +437,29 @@ static void post_se_instr(struct nitrox_softreq *sr, struct nitrox_cmdq *cmdq) { struct nitrox_device *ndev = sr->ndev; - union nps_pkt_in_instr_baoff_dbell pkt_in_baoff_dbell; - u64 offset; + int idx; u8 *ent; spin_lock_bh(&cmdq->cmdq_lock); - /* get the next write offset */ - offset = NPS_PKT_IN_INSTR_BAOFF_DBELLX(cmdq->qno); - pkt_in_baoff_dbell.value = nitrox_read_csr(ndev, offset); + idx = cmdq->write_idx; /* copy the instruction */ - ent = cmdq->head + pkt_in_baoff_dbell.s.aoff; + ent = cmdq->head + (idx * cmdq->instr_size); memcpy(ent, &sr->instr, cmdq->instr_size); - /* flush the command queue updates */ - dma_wmb(); - sr->tstamp = jiffies; atomic_set(&sr->status, REQ_POSTED); response_list_add(sr, cmdq); + sr->tstamp = jiffies; + /* flush the command queue updates */ + dma_wmb(); /* Ring doorbell with count 1 */ writeq(1, cmdq->dbell_csr_addr); /* orders the doorbell rings */ mmiowb(); + cmdq->write_idx = incr_index(idx, 1, ndev->qlen); + spin_unlock_bh(&cmdq->cmdq_lock); } @@ -460,6 +469,9 @@ static int post_backlog_cmds(struct nitrox_cmdq *cmdq) struct nitrox_softreq *sr, *tmp; int ret = 0; + if (!atomic_read(&cmdq->backlog_count)) + return 0; + spin_lock_bh(&cmdq->backlog_lock); list_for_each_entry_safe(sr, tmp, &cmdq->backlog_head, backlog) { @@ -467,7 +479,7 @@ static int post_backlog_cmds(struct nitrox_cmdq *cmdq) /* submit until space available */ if (unlikely(cmdq_full(cmdq, ndev->qlen))) { - ret = -EBUSY; + ret = -ENOSPC; break; } /* delete from backlog list */ @@ -492,23 +504,20 @@ static int nitrox_enqueue_request(struct nitrox_softreq *sr) { struct nitrox_cmdq *cmdq = sr->cmdq; struct nitrox_device *ndev = sr->ndev; - int ret = -EBUSY; + + /* try to post backlog requests */ + post_backlog_cmds(cmdq); if (unlikely(cmdq_full(cmdq, ndev->qlen))) { if (!(sr->flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) - return -EAGAIN; - + return -ENOSPC; + /* add to backlog list */ backlog_list_add(sr, cmdq); - } else { - ret = post_backlog_cmds(cmdq); - if (ret) { - backlog_list_add(sr, cmdq); - return ret; - } - post_se_instr(sr, cmdq); - ret = -EINPROGRESS; + return -EBUSY; } - return ret; + post_se_instr(sr, cmdq); + + return -EINPROGRESS; } /** @@ -625,11 +634,9 @@ int nitrox_process_se_request(struct nitrox_device *ndev, */ sr->instr.fdata[0] = *((u64 *)&req->gph); sr->instr.fdata[1] = 0; - /* flush the soft_req changes before posting the cmd */ - wmb(); ret = nitrox_enqueue_request(sr); - if (ret == -EAGAIN) + if (ret == -ENOSPC) goto send_fail; return ret; -- GitLab From 0647ce03bd48aac37b055355c168b4a889bcc36b Mon Sep 17 00:00:00 2001 From: Lothar Felten Date: Tue, 14 Aug 2018 09:09:37 +0200 Subject: [PATCH 0700/2269] hwmon: (ina2xx) fix sysfs shunt resistor read access [ Upstream commit 3ad867001c91657c46dcf6656d52eb6080286fd5 ] fix the sysfs shunt resistor read access: return the shunt resistor value, not the calibration register contents. update email address Signed-off-by: Lothar Felten Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- Documentation/hwmon/ina2xx | 2 +- drivers/hwmon/ina2xx.c | 13 +++++++++++-- include/linux/platform_data/ina2xx.h | 2 +- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/Documentation/hwmon/ina2xx b/Documentation/hwmon/ina2xx index cfd31d94c8727..f8bf14055c2f3 100644 --- a/Documentation/hwmon/ina2xx +++ b/Documentation/hwmon/ina2xx @@ -32,7 +32,7 @@ Supported chips: Datasheet: Publicly available at the Texas Instruments website http://www.ti.com/ -Author: Lothar Felten +Author: Lothar Felten Description ----------- diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c index e9e6aeabbf842..71d3445ba869c 100644 --- a/drivers/hwmon/ina2xx.c +++ b/drivers/hwmon/ina2xx.c @@ -17,7 +17,7 @@ * Bi-directional Current/Power Monitor with I2C Interface * Datasheet: http://www.ti.com/product/ina230 * - * Copyright (C) 2012 Lothar Felten + * Copyright (C) 2012 Lothar Felten * Thanks to Jan Volkering * * This program is free software; you can redistribute it and/or modify @@ -329,6 +329,15 @@ static int ina2xx_set_shunt(struct ina2xx_data *data, long val) return 0; } +static ssize_t ina2xx_show_shunt(struct device *dev, + struct device_attribute *da, + char *buf) +{ + struct ina2xx_data *data = dev_get_drvdata(dev); + + return snprintf(buf, PAGE_SIZE, "%li\n", data->rshunt); +} + static ssize_t ina2xx_store_shunt(struct device *dev, struct device_attribute *da, const char *buf, size_t count) @@ -403,7 +412,7 @@ static SENSOR_DEVICE_ATTR(power1_input, S_IRUGO, ina2xx_show_value, NULL, /* shunt resistance */ static SENSOR_DEVICE_ATTR(shunt_resistor, S_IRUGO | S_IWUSR, - ina2xx_show_value, ina2xx_store_shunt, + ina2xx_show_shunt, ina2xx_store_shunt, INA2XX_CALIBRATION); /* update interval (ina226 only) */ diff --git a/include/linux/platform_data/ina2xx.h b/include/linux/platform_data/ina2xx.h index 9abc0ca7259b7..9f0aa1b48c784 100644 --- a/include/linux/platform_data/ina2xx.h +++ b/include/linux/platform_data/ina2xx.h @@ -1,7 +1,7 @@ /* * Driver for Texas Instruments INA219, INA226 power monitor chips * - * Copyright (C) 2012 Lothar Felten + * Copyright (C) 2012 Lothar Felten * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as -- GitLab From d3ddd8e16cab439c0abbf08a33a6a0465c8e67da Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 14 Aug 2018 13:07:47 +0300 Subject: [PATCH 0701/2269] hwmon: (adt7475) Make adt7475_read_word() return errors [ Upstream commit f196dec6d50abb2e65fb54a0621b2f1b4d922995 ] The adt7475_read_word() function was meant to return negative error codes on failure. Signed-off-by: Dan Carpenter Reviewed-by: Tokunori Ikegami Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/adt7475.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c index 9ef84998c7f38..37db2eb66ed79 100644 --- a/drivers/hwmon/adt7475.c +++ b/drivers/hwmon/adt7475.c @@ -303,14 +303,18 @@ static inline u16 volt2reg(int channel, long volt, u8 bypass_attn) return clamp_val(reg, 0, 1023) & (0xff << 2); } -static u16 adt7475_read_word(struct i2c_client *client, int reg) +static int adt7475_read_word(struct i2c_client *client, int reg) { - u16 val; + int val1, val2; - val = i2c_smbus_read_byte_data(client, reg); - val |= (i2c_smbus_read_byte_data(client, reg + 1) << 8); + val1 = i2c_smbus_read_byte_data(client, reg); + if (val1 < 0) + return val1; + val2 = i2c_smbus_read_byte_data(client, reg + 1); + if (val2 < 0) + return val2; - return val; + return val1 | (val2 << 8); } static void adt7475_write_word(struct i2c_client *client, int reg, u16 val) -- GitLab From 9190a7ea313ffbc94531b8c9551fb5f04bbba405 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Tue, 24 Jul 2018 19:14:19 +0300 Subject: [PATCH 0702/2269] Revert "ARM: dts: imx7d: Invert legacy PCI irq mapping" [ Upstream commit 538d6e9d597584e80514698e24321645debde78f ] This reverts commit 1c86c9dd82f859b474474a7fee0d5195da2c9c1d. That commit followed the reference manual but unfortunately the imx7d manual is incorrect. Tested with ath9k pcie card and confirmed internally. Signed-off-by: Leonard Crestez Acked-by: Lucas Stach Fixes: 1c86c9dd82f8 ("ARM: dts: imx7d: Invert legacy PCI irq mapping") Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx7d.dtsi | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/imx7d.dtsi b/arch/arm/boot/dts/imx7d.dtsi index 4d308d17f040c..119b63ffb0fec 100644 --- a/arch/arm/boot/dts/imx7d.dtsi +++ b/arch/arm/boot/dts/imx7d.dtsi @@ -144,10 +144,14 @@ interrupt-names = "msi"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; - interrupt-map = <0 0 0 1 &intc GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 2 &intc GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 3 &intc GIC_SPI 124 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 4 &intc GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>; + /* + * Reference manual lists pci irqs incorrectly + * Real hardware ordering is same as imx6: D+MSI, C, B, A + */ + interrupt-map = <0 0 0 1 &intc GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 2 &intc GIC_SPI 124 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 3 &intc GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 4 &intc GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX7D_PCIE_CTRL_ROOT_CLK>, <&clks IMX7D_PLL_ENET_MAIN_100M_CLK>, <&clks IMX7D_PCIE_PHY_ROOT_CLK>; -- GitLab From 50850b432cc512551d48e2d3b49447b2eb275406 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Fri, 24 Aug 2018 17:26:23 +0800 Subject: [PATCH 0703/2269] drm/amdgpu: Enable/disable gfx PG feature in rlc safe mode [ Upstream commit 8ef23364b654d44244400d79988e677e504b21ba ] This is required by gfx hw and can fix the rlc hang when do s3 stree test on Cz/St. Reviewed-by: Alex Deucher Signed-off-by: Hang Zhou Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index a7e54820a330d..85bcd236890ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -5479,6 +5479,11 @@ static int gfx_v8_0_set_powergating_state(void *handle, if (amdgpu_sriov_vf(adev)) return 0; + if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG | + AMD_PG_SUPPORT_RLC_SMU_HS | + AMD_PG_SUPPORT_CP | + AMD_PG_SUPPORT_GFX_DMG)) + adev->gfx.rlc.funcs->enter_safe_mode(adev); switch (adev->asic_type) { case CHIP_CARRIZO: case CHIP_STONEY: @@ -5527,7 +5532,11 @@ static int gfx_v8_0_set_powergating_state(void *handle, default: break; } - + if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG | + AMD_PG_SUPPORT_RLC_SMU_HS | + AMD_PG_SUPPORT_CP | + AMD_PG_SUPPORT_GFX_DMG)) + adev->gfx.rlc.funcs->exit_safe_mode(adev); return 0; } -- GitLab From 46cb720a8a3ece894eff4e1b85b3925efb71afd2 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Fri, 24 Aug 2018 16:17:54 +0800 Subject: [PATCH 0704/2269] drm/amdgpu: Update power state at the end of smu hw_init. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2ab4d0e74256fc49b7b270f63c1d1e47c2455abc ] For SI/Kv, the power state is managed by function amdgpu_pm_compute_clocks. when dpm enabled, we should call amdgpu_pm_compute_clocks to update current power state instand of set boot state. this change can fix the oops when kfd driver was enabled on Kv. Reviewed-by: Alex Deucher Tested-by: Michel Dänzer Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/kv_dpm.c | 4 +--- drivers/gpu/drm/amd/amdgpu/si_dpm.c | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index 3bbf2ccfca89c..c76073b422d6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c @@ -1352,8 +1352,6 @@ static int kv_dpm_enable(struct amdgpu_device *adev) return ret; } - kv_update_current_ps(adev, adev->pm.dpm.boot_ps); - if (adev->irq.installed && amdgpu_is_internal_thermal_sensor(adev->pm.int_thermal_type)) { ret = kv_set_thermal_temperature_range(adev, KV_TEMP_RANGE_MIN, KV_TEMP_RANGE_MAX); @@ -3054,7 +3052,7 @@ static int kv_dpm_hw_init(void *handle) else adev->pm.dpm_enabled = true; mutex_unlock(&adev->pm.mutex); - + amdgpu_pm_compute_clocks(adev); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index 6f1dea157a775..55613f425931d 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -6884,7 +6884,6 @@ static int si_dpm_enable(struct amdgpu_device *adev) si_enable_auto_throttle_source(adev, AMDGPU_DPM_AUTO_THROTTLE_SRC_THERMAL, true); si_thermal_start_thermal_controller(adev); - ni_update_current_ps(adev, boot_ps); return 0; } @@ -7758,7 +7757,7 @@ static int si_dpm_hw_init(void *handle) else adev->pm.dpm_enabled = true; mutex_unlock(&adev->pm.mutex); - + amdgpu_pm_compute_clocks(adev); return ret; } -- GitLab From 516b72e36dedaac451a307f8e9a0ef82272d92fb Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 15 Jul 2018 22:09:29 +0200 Subject: [PATCH 0705/2269] ata: ftide010: Add a quirk for SQ201 [ Upstream commit 46cb52ad414ac829680d0bb8cc7090ac2b577ca7 ] The DMA is broken on this specific device for some unknown reason (probably badly designed or plain broken interface electronics) and will only work with PIO. Other users of the same hardware does not have this problem. Add a specific quirk so that this Gemini device gets DMA turned off. Also fix up some code around passing the port information around in probe while we're at it. Signed-off-by: Linus Walleij Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/ata/pata_ftide010.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/ata/pata_ftide010.c b/drivers/ata/pata_ftide010.c index 5d4b72e21161a..569a4a662dcd4 100644 --- a/drivers/ata/pata_ftide010.c +++ b/drivers/ata/pata_ftide010.c @@ -256,14 +256,12 @@ static struct ata_port_operations pata_ftide010_port_ops = { .qc_issue = ftide010_qc_issue, }; -static struct ata_port_info ftide010_port_info[] = { - { - .flags = ATA_FLAG_SLAVE_POSS, - .mwdma_mask = ATA_MWDMA2, - .udma_mask = ATA_UDMA6, - .pio_mask = ATA_PIO4, - .port_ops = &pata_ftide010_port_ops, - }, +static struct ata_port_info ftide010_port_info = { + .flags = ATA_FLAG_SLAVE_POSS, + .mwdma_mask = ATA_MWDMA2, + .udma_mask = ATA_UDMA6, + .pio_mask = ATA_PIO4, + .port_ops = &pata_ftide010_port_ops, }; #if IS_ENABLED(CONFIG_SATA_GEMINI) @@ -349,6 +347,7 @@ static int pata_ftide010_gemini_cable_detect(struct ata_port *ap) } static int pata_ftide010_gemini_init(struct ftide010 *ftide, + struct ata_port_info *pi, bool is_ata1) { struct device *dev = ftide->dev; @@ -373,7 +372,13 @@ static int pata_ftide010_gemini_init(struct ftide010 *ftide, /* Flag port as SATA-capable */ if (gemini_sata_bridge_enabled(sg, is_ata1)) - ftide010_port_info[0].flags |= ATA_FLAG_SATA; + pi->flags |= ATA_FLAG_SATA; + + /* This device has broken DMA, only PIO works */ + if (of_machine_is_compatible("itian,sq201")) { + pi->mwdma_mask = 0; + pi->udma_mask = 0; + } /* * We assume that a simple 40-wire cable is used in the PATA mode. @@ -435,6 +440,7 @@ static int pata_ftide010_gemini_init(struct ftide010 *ftide, } #else static int pata_ftide010_gemini_init(struct ftide010 *ftide, + struct ata_port_info *pi, bool is_ata1) { return -ENOTSUPP; @@ -446,7 +452,7 @@ static int pata_ftide010_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct device_node *np = dev->of_node; - const struct ata_port_info pi = ftide010_port_info[0]; + struct ata_port_info pi = ftide010_port_info; const struct ata_port_info *ppi[] = { &pi, NULL }; struct ftide010 *ftide; struct resource *res; @@ -490,6 +496,7 @@ static int pata_ftide010_probe(struct platform_device *pdev) * are ATA0. This will also set up the cable types. */ ret = pata_ftide010_gemini_init(ftide, + &pi, (res->start == 0x63400000)); if (ret) goto err_dis_clk; -- GitLab From d11237bdcf9570edc0c562d93df219a4a8a4c9f0 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 9 Aug 2018 16:00:14 -0700 Subject: [PATCH 0706/2269] nvme-fcloop: Fix dropped LS's to removed target port [ Upstream commit afd299ca996929f4f98ac20da0044c0cdc124879 ] When a targetport is removed from the config, fcloop will avoid calling the LS done() routine thinking the targetport is gone. This leaves the initiator reset/reconnect hanging as it waits for a status on the Create_Association LS for the reconnect. Change the filter in the LS callback path. If tport null (set when failed validation before "sending to remote port"), be sure to call done. This was the main bug. But, continue the logic that only calls done if tport was set but there is no remoteport (e.g. case where remoteport has been removed, thus host doesn't expect a completion). Signed-off-by: James Smart Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/target/fcloop.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index c0080f6ab2f5b..0b0a4825b3eb1 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -300,7 +300,7 @@ fcloop_tgt_lsrqst_done_work(struct work_struct *work) struct fcloop_tport *tport = tls_req->tport; struct nvmefc_ls_req *lsreq = tls_req->lsreq; - if (tport->remoteport) + if (!tport || tport->remoteport) lsreq->done(lsreq, tls_req->status); } @@ -318,6 +318,7 @@ fcloop_ls_req(struct nvme_fc_local_port *localport, if (!rport->targetport) { tls_req->status = -ECONNREFUSED; + tls_req->tport = NULL; schedule_work(&tls_req->work); return ret; } -- GitLab From 75b3054d6807134741601872fd0f91be6a373bd5 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 27 Aug 2018 19:18:21 -0700 Subject: [PATCH 0707/2269] ARM: dts: omap4-droid4: Fix emmc errors seen on some devices [ Upstream commit 2d59bb602314a4b2593fde267734266b5e872dd0 ] Otherwise we can get the following errors occasionally on some devices: mmc1: tried to HW reset card, got error -110 mmcblk1: error -110 requesting status mmcblk1: recovery failed! print_req_error: I/O error, dev mmcblk1, sector 14329 ... I have one device that hits this error almost on every boot, and another one that hits it only rarely with the other ones I've used behave without problems. I'm not sure if the issue is related to a particular eMMC card model, but in case it is, both of the machines with issues have: # cat /sys/class/mmc_host/mmc1/mmc1:0001/manfid \ /sys/class/mmc_host/mmc1/mmc1:0001/oemid \ /sys/class/mmc_host/mmc1/mmc1:0001/name 0x000045 0x0100 SEM16G and the working ones have: 0x000011 0x0100 016G92 Note that "ti,non-removable" is different as omap_hsmmc_reg_get() does not call omap_hsmmc_disable_boot_regulators() if no_regulator_off_init is set. And currently we set no_regulator_off_init only for "ti,non-removable" and not for "non-removable". It seems that we should have "non-removable" with some other mmc generic property behave in the same way instead of having to use a non-generic property. But let's fix the issue first. Fixes: 7e2f8c0ae670 ("ARM: dts: Add minimal support for motorola droid 4 xt894") Cc: Marcel Partap Cc: Merlijn Wajer Cc: Michael Scott Cc: NeKit Cc: Pavel Machek Cc: Sebastian Reichel Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/omap4-droid4-xt894.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/omap4-droid4-xt894.dts b/arch/arm/boot/dts/omap4-droid4-xt894.dts index 8b93d37310f28..bad690b23081b 100644 --- a/arch/arm/boot/dts/omap4-droid4-xt894.dts +++ b/arch/arm/boot/dts/omap4-droid4-xt894.dts @@ -314,7 +314,7 @@ &mmc2 { vmmc-supply = <&vsdio>; bus-width = <8>; - non-removable; + ti,non-removable; }; &mmc3 { -- GitLab From 826d8678cde2a4329d19b96b93e093fa0f4d0883 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 24 Aug 2018 15:08:29 +0100 Subject: [PATCH 0708/2269] arm/arm64: smccc-1.1: Make return values unsigned long [ Upstream commit 1d8f574708a3fb6f18c85486d0c5217df893c0cf ] An unfortunate consequence of having a strong typing for the input values to the SMC call is that it also affects the type of the return values, limiting r0 to 32 bits and r{1,2,3} to whatever was passed as an input. Let's turn everything into "unsigned long", which satisfies the requirements of both architectures, and allows for the full range of return values. Reported-by: Julien Grall Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/arm-smccc.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index ca1d2cc2cdfa0..5a91ff33720b2 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -199,31 +199,31 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, #define __declare_arg_0(a0, res) \ struct arm_smccc_res *___res = res; \ - register u32 r0 asm("r0") = a0; \ + register unsigned long r0 asm("r0") = (u32)a0; \ register unsigned long r1 asm("r1"); \ register unsigned long r2 asm("r2"); \ register unsigned long r3 asm("r3") #define __declare_arg_1(a0, a1, res) \ struct arm_smccc_res *___res = res; \ - register u32 r0 asm("r0") = a0; \ - register typeof(a1) r1 asm("r1") = a1; \ + register unsigned long r0 asm("r0") = (u32)a0; \ + register unsigned long r1 asm("r1") = a1; \ register unsigned long r2 asm("r2"); \ register unsigned long r3 asm("r3") #define __declare_arg_2(a0, a1, a2, res) \ struct arm_smccc_res *___res = res; \ - register u32 r0 asm("r0") = a0; \ - register typeof(a1) r1 asm("r1") = a1; \ - register typeof(a2) r2 asm("r2") = a2; \ + register unsigned long r0 asm("r0") = (u32)a0; \ + register unsigned long r1 asm("r1") = a1; \ + register unsigned long r2 asm("r2") = a2; \ register unsigned long r3 asm("r3") #define __declare_arg_3(a0, a1, a2, a3, res) \ struct arm_smccc_res *___res = res; \ - register u32 r0 asm("r0") = a0; \ - register typeof(a1) r1 asm("r1") = a1; \ - register typeof(a2) r2 asm("r2") = a2; \ - register typeof(a3) r3 asm("r3") = a3 + register unsigned long r0 asm("r0") = (u32)a0; \ + register unsigned long r1 asm("r1") = a1; \ + register unsigned long r2 asm("r2") = a2; \ + register unsigned long r3 asm("r3") = a3 #define __declare_arg_4(a0, a1, a2, a3, a4, res) \ __declare_arg_3(a0, a1, a2, a3, res); \ -- GitLab From 647b6d4ff699f841db598f378b0deec2e0b41c2f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 24 Aug 2018 15:08:30 +0100 Subject: [PATCH 0709/2269] arm/arm64: smccc-1.1: Handle function result as parameters [ Upstream commit 755a8bf5579d22eb5636685c516d8dede799e27b ] If someone has the silly idea to write something along those lines: extern u64 foo(void); void bar(struct arm_smccc_res *res) { arm_smccc_1_1_smc(0xbad, foo(), res); } they are in for a surprise, as this gets compiled as: 0000000000000588 : 588: a9be7bfd stp x29, x30, [sp, #-32]! 58c: 910003fd mov x29, sp 590: f9000bf3 str x19, [sp, #16] 594: aa0003f3 mov x19, x0 598: aa1e03e0 mov x0, x30 59c: 94000000 bl 0 <_mcount> 5a0: 94000000 bl 0 5a4: aa0003e1 mov x1, x0 5a8: d4000003 smc #0x0 5ac: b4000073 cbz x19, 5b8 5b0: a9000660 stp x0, x1, [x19] 5b4: a9010e62 stp x2, x3, [x19, #16] 5b8: f9400bf3 ldr x19, [sp, #16] 5bc: a8c27bfd ldp x29, x30, [sp], #32 5c0: d65f03c0 ret 5c4: d503201f nop The call to foo "overwrites" the x0 register for the return value, and we end up calling the wrong secure service. A solution is to evaluate all the parameters before assigning anything to specific registers, leading to the expected result: 0000000000000588 : 588: a9be7bfd stp x29, x30, [sp, #-32]! 58c: 910003fd mov x29, sp 590: f9000bf3 str x19, [sp, #16] 594: aa0003f3 mov x19, x0 598: aa1e03e0 mov x0, x30 59c: 94000000 bl 0 <_mcount> 5a0: 94000000 bl 0 5a4: aa0003e1 mov x1, x0 5a8: d28175a0 mov x0, #0xbad 5ac: d4000003 smc #0x0 5b0: b4000073 cbz x19, 5bc 5b4: a9000660 stp x0, x1, [x19] 5b8: a9010e62 stp x2, x3, [x19, #16] 5bc: f9400bf3 ldr x19, [sp, #16] 5c0: a8c27bfd ldp x29, x30, [sp], #32 5c4: d65f03c0 ret Reported-by: Julien Grall Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/arm-smccc.h | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 5a91ff33720b2..18863d56273cc 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -205,41 +205,51 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, register unsigned long r3 asm("r3") #define __declare_arg_1(a0, a1, res) \ + typeof(a1) __a1 = a1; \ struct arm_smccc_res *___res = res; \ register unsigned long r0 asm("r0") = (u32)a0; \ - register unsigned long r1 asm("r1") = a1; \ + register unsigned long r1 asm("r1") = __a1; \ register unsigned long r2 asm("r2"); \ register unsigned long r3 asm("r3") #define __declare_arg_2(a0, a1, a2, res) \ + typeof(a1) __a1 = a1; \ + typeof(a2) __a2 = a2; \ struct arm_smccc_res *___res = res; \ register unsigned long r0 asm("r0") = (u32)a0; \ - register unsigned long r1 asm("r1") = a1; \ - register unsigned long r2 asm("r2") = a2; \ + register unsigned long r1 asm("r1") = __a1; \ + register unsigned long r2 asm("r2") = __a2; \ register unsigned long r3 asm("r3") #define __declare_arg_3(a0, a1, a2, a3, res) \ + typeof(a1) __a1 = a1; \ + typeof(a2) __a2 = a2; \ + typeof(a3) __a3 = a3; \ struct arm_smccc_res *___res = res; \ register unsigned long r0 asm("r0") = (u32)a0; \ - register unsigned long r1 asm("r1") = a1; \ - register unsigned long r2 asm("r2") = a2; \ - register unsigned long r3 asm("r3") = a3 + register unsigned long r1 asm("r1") = __a1; \ + register unsigned long r2 asm("r2") = __a2; \ + register unsigned long r3 asm("r3") = __a3 #define __declare_arg_4(a0, a1, a2, a3, a4, res) \ + typeof(a4) __a4 = a4; \ __declare_arg_3(a0, a1, a2, a3, res); \ - register typeof(a4) r4 asm("r4") = a4 + register unsigned long r4 asm("r4") = __a4 #define __declare_arg_5(a0, a1, a2, a3, a4, a5, res) \ + typeof(a5) __a5 = a5; \ __declare_arg_4(a0, a1, a2, a3, a4, res); \ - register typeof(a5) r5 asm("r5") = a5 + register unsigned long r5 asm("r5") = __a5 #define __declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res) \ + typeof(a6) __a6 = a6; \ __declare_arg_5(a0, a1, a2, a3, a4, a5, res); \ - register typeof(a6) r6 asm("r6") = a6 + register unsigned long r6 asm("r6") = __a6 #define __declare_arg_7(a0, a1, a2, a3, a4, a5, a6, a7, res) \ + typeof(a7) __a7 = a7; \ __declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res); \ - register typeof(a7) r7 asm("r7") = a7 + register unsigned long r7 asm("r7") = __a7 #define ___declare_args(count, ...) __declare_arg_ ## count(__VA_ARGS__) #define __declare_args(count, ...) ___declare_args(count, __VA_ARGS__) -- GitLab From 23210d92f617d44e6b8322d22bf59af3fd208bb9 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 30 Aug 2018 11:50:13 +0300 Subject: [PATCH 0710/2269] i2c: i801: Allow ACPI AML access I/O ports not reserved for SMBus [ Upstream commit 7fd6d98b89f382d414e1db528e29a67bbd749457 ] Commit 7ae81952cda ("i2c: i801: Allow ACPI SystemIO OpRegion to conflict with PCI BAR") made it possible for AML code to access SMBus I/O ports by installing custom SystemIO OpRegion handler and blocking i80i driver access upon first AML read/write to this OpRegion. However, while ThinkPad T560 does have SystemIO OpRegion declared under the SMBus device, it does not access any of the SMBus registers: Device (SMBU) { ... OperationRegion (SMBP, PCI_Config, 0x50, 0x04) Field (SMBP, DWordAcc, NoLock, Preserve) { , 5, TCOB, 11, Offset (0x04) } Name (TCBV, 0x00) Method (TCBS, 0, NotSerialized) { If ((TCBV == 0x00)) { TCBV = (\_SB.PCI0.SMBU.TCOB << 0x05) } Return (TCBV) /* \_SB_.PCI0.SMBU.TCBV */ } OperationRegion (TCBA, SystemIO, TCBS (), 0x10) Field (TCBA, ByteAcc, NoLock, Preserve) { Offset (0x04), , 9, CPSC, 1 } } Problem with the current approach is that it blocks all I/O port access and because this system has touchpad connected to the SMBus controller after first AML access (happens during suspend/resume cycle) the touchpad fails to work anymore. Fix this so that we allow ACPI AML I/O port access if it does not touch the region reserved for the SMBus. Fixes: 7ae81952cda ("i2c: i801: Allow ACPI SystemIO OpRegion to conflict with PCI BAR") Link: https://bugzilla.kernel.org/show_bug.cgi?id=200737 Reported-by: Yussuf Khalil Signed-off-by: Mika Westerberg Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-i801.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 67cbd9f61acce..06debfa903b9c 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1416,6 +1416,13 @@ static void i801_add_tco(struct i801_priv *priv) } #ifdef CONFIG_ACPI +static bool i801_acpi_is_smbus_ioport(const struct i801_priv *priv, + acpi_physical_address address) +{ + return address >= priv->smba && + address <= pci_resource_end(priv->pci_dev, SMBBAR); +} + static acpi_status i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits, u64 *value, void *handler_context, void *region_context) @@ -1431,7 +1438,7 @@ i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits, */ mutex_lock(&priv->acpi_lock); - if (!priv->acpi_reserved) { + if (!priv->acpi_reserved && i801_acpi_is_smbus_ioport(priv, address)) { priv->acpi_reserved = true; dev_warn(&pdev->dev, "BIOS is accessing SMBus registers\n"); -- GitLab From 4fff53acff152dcdf5d8758e273847f28c3b8e46 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 1 Sep 2018 21:01:28 -0700 Subject: [PATCH 0711/2269] x86/pti: Fix section mismatch warning/error [ Upstream commit ff924c5a1ec7548825cc2d07980b03be4224ffac ] Fix the section mismatch warning in arch/x86/mm/pti.c: WARNING: vmlinux.o(.text+0x6972a): Section mismatch in reference from the function pti_clone_pgtable() to the function .init.text:pti_user_pagetable_walk_pte() The function pti_clone_pgtable() references the function __init pti_user_pagetable_walk_pte(). This is often because pti_clone_pgtable lacks a __init annotation or the annotation of pti_user_pagetable_walk_pte is wrong. FATAL: modpost: Section mismatches detected. Fixes: 85900ea51577 ("x86/pti: Map the vsyscall page if needed") Reported-by: kbuild test robot Signed-off-by: Randy Dunlap Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/43a6d6a3-d69d-5eda-da09-0b1c88215a2a@infradead.org Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/pti.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index b07e3ffc5ac50..60c48f5d6b0ee 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -224,7 +224,7 @@ static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) * * Returns a pointer to a PTE on success, or NULL on failure. */ -static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address) +static pte_t *pti_user_pagetable_walk_pte(unsigned long address) { gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); pmd_t *pmd; -- GitLab From fcaca557760f4fd78e793d8bfc4364fb58d7fb85 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 27 Sep 2018 16:53:22 +0100 Subject: [PATCH 0712/2269] arm64: KVM: Sanitize PSTATE.M when being set from userspace commit 2a3f93459d689d990b3ecfbe782fec89b97d3279 upstream. Not all execution modes are valid for a guest, and some of them depend on what the HW actually supports. Let's verify that what userspace provides is compatible with both the VM settings and the HW capabilities. Cc: Fixes: 0d854a60b1d7 ("arm64: KVM: enable initialization of a 32bit vcpu") Reviewed-by: Christoffer Dall Reviewed-by: Mark Rutland Reviewed-by: Dave Martin Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/kvm_emulate.h | 5 +++++ arch/arm64/kvm/guest.c | 10 +++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index e5df3fce00082..2b55aee7c0518 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -42,6 +42,11 @@ void kvm_inject_vabt(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); +static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) +{ + return !(vcpu->arch.hcr_el2 & HCR_RW); +} + static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) { vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 3f4817dd69ae3..76d27edf33cbf 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -152,17 +152,25 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) } if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) { - u32 mode = (*(u32 *)valp) & COMPAT_PSR_MODE_MASK; + u64 mode = (*(u64 *)valp) & COMPAT_PSR_MODE_MASK; switch (mode) { case COMPAT_PSR_MODE_USR: + if (!system_supports_32bit_el0()) + return -EINVAL; + break; case COMPAT_PSR_MODE_FIQ: case COMPAT_PSR_MODE_IRQ: case COMPAT_PSR_MODE_SVC: case COMPAT_PSR_MODE_ABT: case COMPAT_PSR_MODE_UND: + if (!vcpu_el1_is_32bit(vcpu)) + return -EINVAL; + break; case PSR_MODE_EL0t: case PSR_MODE_EL1t: case PSR_MODE_EL1h: + if (vcpu_el1_is_32bit(vcpu)) + return -EINVAL; break; default: err = -EINVAL; -- GitLab From d61ba3417e4fb71963441aa0c2e9c26f4568215b Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 11 Sep 2018 05:32:37 -0400 Subject: [PATCH 0713/2269] media: v4l: event: Prevent freeing event subscriptions while accessed commit ad608fbcf166fec809e402d548761768f602702c upstream. The event subscriptions are added to the subscribed event list while holding a spinlock, but that lock is subsequently released while still accessing the subscription object. This makes it possible to unsubscribe the event --- and freeing the subscription object's memory --- while the subscription object is simultaneously accessed. Prevent this by adding a mutex to serialise the event subscription and unsubscription. This also gives a guarantee to the callback ops that the add op has returned before the del op is called. This change also results in making the elems field less special: subscriptions are only added to the event list once they are fully initialised. Signed-off-by: Sakari Ailus Reviewed-by: Hans Verkuil Reviewed-by: Laurent Pinchart Cc: stable@vger.kernel.org # for 4.14 and up Fixes: c3b5b0241f62 ("V4L/DVB: V4L: Events: Add backend") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-event.c | 38 +++++++++++++++------------- drivers/media/v4l2-core/v4l2-fh.c | 2 ++ include/media/v4l2-fh.h | 4 +++ 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-event.c b/drivers/media/v4l2-core/v4l2-event.c index 968c2eb08b5ab..568dd4affb337 100644 --- a/drivers/media/v4l2-core/v4l2-event.c +++ b/drivers/media/v4l2-core/v4l2-event.c @@ -115,14 +115,6 @@ static void __v4l2_event_queue_fh(struct v4l2_fh *fh, const struct v4l2_event *e if (sev == NULL) return; - /* - * If the event has been added to the fh->subscribed list, but its - * add op has not completed yet elems will be 0, treat this as - * not being subscribed. - */ - if (!sev->elems) - return; - /* Increase event sequence number on fh. */ fh->sequence++; @@ -208,6 +200,7 @@ int v4l2_event_subscribe(struct v4l2_fh *fh, struct v4l2_subscribed_event *sev, *found_ev; unsigned long flags; unsigned i; + int ret = 0; if (sub->type == V4L2_EVENT_ALL) return -EINVAL; @@ -226,31 +219,36 @@ int v4l2_event_subscribe(struct v4l2_fh *fh, sev->flags = sub->flags; sev->fh = fh; sev->ops = ops; + sev->elems = elems; + + mutex_lock(&fh->subscribe_lock); spin_lock_irqsave(&fh->vdev->fh_lock, flags); found_ev = v4l2_event_subscribed(fh, sub->type, sub->id); - if (!found_ev) - list_add(&sev->list, &fh->subscribed); spin_unlock_irqrestore(&fh->vdev->fh_lock, flags); if (found_ev) { + /* Already listening */ kvfree(sev); - return 0; /* Already listening */ + goto out_unlock; } if (sev->ops && sev->ops->add) { - int ret = sev->ops->add(sev, elems); + ret = sev->ops->add(sev, elems); if (ret) { - sev->ops = NULL; - v4l2_event_unsubscribe(fh, sub); - return ret; + kvfree(sev); + goto out_unlock; } } - /* Mark as ready for use */ - sev->elems = elems; + spin_lock_irqsave(&fh->vdev->fh_lock, flags); + list_add(&sev->list, &fh->subscribed); + spin_unlock_irqrestore(&fh->vdev->fh_lock, flags); - return 0; +out_unlock: + mutex_unlock(&fh->subscribe_lock); + + return ret; } EXPORT_SYMBOL_GPL(v4l2_event_subscribe); @@ -289,6 +287,8 @@ int v4l2_event_unsubscribe(struct v4l2_fh *fh, return 0; } + mutex_lock(&fh->subscribe_lock); + spin_lock_irqsave(&fh->vdev->fh_lock, flags); sev = v4l2_event_subscribed(fh, sub->type, sub->id); @@ -306,6 +306,8 @@ int v4l2_event_unsubscribe(struct v4l2_fh *fh, if (sev && sev->ops && sev->ops->del) sev->ops->del(sev); + mutex_unlock(&fh->subscribe_lock); + kvfree(sev); return 0; diff --git a/drivers/media/v4l2-core/v4l2-fh.c b/drivers/media/v4l2-core/v4l2-fh.c index 3895999bf8805..c91a7bd3ecfc7 100644 --- a/drivers/media/v4l2-core/v4l2-fh.c +++ b/drivers/media/v4l2-core/v4l2-fh.c @@ -45,6 +45,7 @@ void v4l2_fh_init(struct v4l2_fh *fh, struct video_device *vdev) INIT_LIST_HEAD(&fh->available); INIT_LIST_HEAD(&fh->subscribed); fh->sequence = -1; + mutex_init(&fh->subscribe_lock); } EXPORT_SYMBOL_GPL(v4l2_fh_init); @@ -90,6 +91,7 @@ void v4l2_fh_exit(struct v4l2_fh *fh) return; v4l_disable_media_source(fh->vdev); v4l2_event_unsubscribe_all(fh); + mutex_destroy(&fh->subscribe_lock); fh->vdev = NULL; } EXPORT_SYMBOL_GPL(v4l2_fh_exit); diff --git a/include/media/v4l2-fh.h b/include/media/v4l2-fh.h index 62633e7d2630c..0f22a5eda4cc5 100644 --- a/include/media/v4l2-fh.h +++ b/include/media/v4l2-fh.h @@ -37,10 +37,13 @@ struct v4l2_ctrl_handler; * @prio: priority of the file handler, as defined by &enum v4l2_priority * * @wait: event' s wait queue + * @subscribe_lock: serialise changes to the subscribed list; guarantee that + * the add and del event callbacks are orderly called * @subscribed: list of subscribed events * @available: list of events waiting to be dequeued * @navailable: number of available events at @available list * @sequence: event sequence number + * * @m2m_ctx: pointer to &struct v4l2_m2m_ctx */ struct v4l2_fh { @@ -51,6 +54,7 @@ struct v4l2_fh { /* Events */ wait_queue_head_t wait; + struct mutex subscribe_lock; struct list_head subscribed; struct list_head available; unsigned int navailable; -- GitLab From e6abbe80c8838e9c0bdb51835e6218008fa49386 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 3 Oct 2018 17:01:00 -0700 Subject: [PATCH 0714/2269] Linux 4.14.74 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 89f30ca964b67..cc0e65a8d7bf3 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 73 +SUBLEVEL = 74 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 8c958cd74663aa1fc3751a28170f6ffdcf734615 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Fri, 20 Jul 2018 16:11:40 -0700 Subject: [PATCH 0715/2269] ANDROID: sdcardfs: Change current->fs under lock Adjusted from previous version to add missing include bug: 111641492 Change-Id: I321d83f5d599efb3abdfaf2f3a4900ac512beca6 Reported-by: Jann Horn Signed-off-by: Daniel Rosenberg --- fs/sdcardfs/inode.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 7fc2f083aaad5..4dd681e0d59d9 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -21,6 +21,7 @@ #include "sdcardfs.h" #include #include +#include const struct cred *override_fsids(struct sdcardfs_sb_info *sbi, struct sdcardfs_inode_data *data) @@ -96,8 +97,11 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, err = -ENOMEM; goto out_unlock; } + copied_fs->umask = 0; + task_lock(current); current->fs = copied_fs; - current->fs->umask = 0; + task_unlock(current); + err = vfs_create2(lower_dentry_mnt, d_inode(lower_parent_dentry), lower_dentry, mode, want_excl); if (err) goto out; @@ -111,7 +115,9 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, fixup_lower_ownership(dentry, dentry->d_name.name); out: + task_lock(current); current->fs = saved_fs; + task_unlock(current); free_fs_struct(copied_fs); out_unlock: unlock_dir(lower_parent_dentry); @@ -249,8 +255,11 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode unlock_dir(lower_parent_dentry); goto out_unlock; } + copied_fs->umask = 0; + task_lock(current); current->fs = copied_fs; - current->fs->umask = 0; + task_unlock(current); + err = vfs_mkdir2(lower_mnt, d_inode(lower_parent_dentry), lower_dentry, mode); if (err) { @@ -318,7 +327,10 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode } } out: + task_lock(current); current->fs = saved_fs; + task_unlock(current); + free_fs_struct(copied_fs); out_unlock: sdcardfs_put_lower_path(dentry, &lower_path); -- GitLab From d9e61345652beaafc371092fd7a4532ef351b759 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 6 Jun 2018 13:18:31 +0100 Subject: [PATCH 0716/2269] drm/amd/pp: initialize result to before or'ing in data commit c4ff91dd40e2253ab6dd028011469c2c694e1e19 upstream. The current use of result is or'ing in values and checking for a non-zero result, however, result is not initialized to zero so it potentially contains garbage to start with. Fix this by initializing it to the first return from the call to vega10_program_didt_config_registers. Detected by cppcheck: "(error) Uninitialized variable: result" Fixes: 9b7b8154cdb8 ("drm/amd/powerplay: added didt support for vega10") Signed-off-by: Colin Ian King Acked-by: Huang Rui [Fix the subject as Colin's comment] Signed-off-by: Huang Rui Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c index e7fa67063cdcb..cb9e1cd456b8c 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c @@ -1142,7 +1142,7 @@ static int vega10_enable_psm_gc_edc_config(struct pp_hwmgr *hwmgr) for (count = 0; count < num_se; count++) { data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | ( count << GRBM_GFX_INDEX__SE_INDEX__SHIFT); cgs_write_register(hwmgr->device, reg, data); - result |= vega10_program_didt_config_registers(hwmgr, PSMSEEDCStallPatternConfig_Vega10, VEGA10_CONFIGREG_DIDT); + result = vega10_program_didt_config_registers(hwmgr, PSMSEEDCStallPatternConfig_Vega10, VEGA10_CONFIGREG_DIDT); result |= vega10_program_didt_config_registers(hwmgr, PSMSEEDCStallDelayConfig_Vega10, VEGA10_CONFIGREG_DIDT); result |= vega10_program_didt_config_registers(hwmgr, PSMSEEDCCtrlResetConfig_Vega10, VEGA10_CONFIGREG_DIDT); result |= vega10_program_didt_config_registers(hwmgr, PSMSEEDCCtrlConfig_Vega10, VEGA10_CONFIGREG_DIDT); -- GitLab From a17e2a72e714ca20c31e41827ab6088df896c190 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 17 Jul 2018 10:52:29 -0500 Subject: [PATCH 0717/2269] drm/amdgpu: add another ATPX quirk for TOPAZ commit b3fc2ab37e27f8d6588a4755382346ba2335a7c7 upstream. Needs ATPX rather than _PR3. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=200517 Reviewed-by: Junwei Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index 1ae5ae8c45a45..1a75a6b9ab2fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -569,6 +569,7 @@ static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = { { 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX }, + { 0x1002, 0x6900, 0x1025, 0x125A, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x67DF, 0x1028, 0x0774, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0, 0, 0, 0, 0 }, }; -- GitLab From cdb2d37d345d694fcf83291c1a5b5465f743fd73 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 26 Aug 2018 19:49:32 +0200 Subject: [PATCH 0718/2269] serial: mvebu-uart: Fix reporting of effective CSIZE to userspace commit e0bf2d4982fe7d9ddaf550dd023803ea286f47fc upstream. Apparently, this driver (or the hardware) does not support character length settings. It's apparently running in 8-bit mode, but it makes userspace believe it's in 5-bit mode. That makes tcsetattr with CS8 incorrectly fail, breaking e.g. getty from busybox, thus the login shell on ttyMVx. Fix by hard-wiring CS8 into c_cflag. Signed-off-by: Jan Kiszka Fixes: 30530791a7a0 ("serial: mvebu-uart: initial support for Armada-3700 serial port") Cc: stable # 4.6+ Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/mvebu-uart.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c index 45b57c294d13b..401c983ec5f36 100644 --- a/drivers/tty/serial/mvebu-uart.c +++ b/drivers/tty/serial/mvebu-uart.c @@ -327,8 +327,10 @@ static void mvebu_uart_set_termios(struct uart_port *port, if ((termios->c_cflag & CREAD) == 0) port->ignore_status_mask |= STAT_RX_RDY | STAT_BRK_ERR; - if (old) + if (old) { tty_termios_copy_hw(termios, old); + termios->c_cflag |= CS8; + } baud = uart_get_baud_rate(port, termios, old, 0, 460800); uart_update_timeout(port, termios->c_cflag, baud); -- GitLab From 3a738e7f734ce0308377fbed37683bf355f0c12f Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 8 Dec 2017 17:38:17 -0500 Subject: [PATCH 0719/2269] tools/power turbostat: fix possible sprintf buffer overflow commit 46c2797826cc6d1ae36fcbd966e76f9fa1907eef upstream. Signed-off-by: Len Brown Cc: Alakesh Haloi Signed-off-by: Greg Kroah-Hartman --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 1512086c8cb85..7a1b20ec52164 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1485,7 +1485,7 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp) if (get_msr(cpu, mp->msr_num, counterp)) return -1; } else { - char path[128]; + char path[128 + PATH_BYTES]; if (mp->flags & SYSFS_PERCPU) { sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", -- GitLab From 4ae9a73be7ac5805a841a6da2bc792c046936c1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Mon, 13 Aug 2018 14:16:25 +0200 Subject: [PATCH 0720/2269] mac80211: Run TXQ teardown code before de-registering interfaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 77cfaf52eca5cac30ed029507e0cab065f888995 ] The TXQ teardown code can reference the vif data structures that are stored in the netdev private memory area if there are still packets on the queue when it is being freed. Since the TXQ teardown code is run after the netdevs are freed, this can lead to a use-after-free. Fix this by moving the TXQ teardown code to earlier in ieee80211_unregister_hw(). Reported-by: Ben Greear Tested-by: Ben Greear Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 8aa1f5b6a0514..cb5b22b613884 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1171,6 +1171,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) #if IS_ENABLED(CONFIG_IPV6) unregister_inet6addr_notifier(&local->ifa6_notifier); #endif + ieee80211_txq_teardown_flows(local); rtnl_lock(); @@ -1199,7 +1200,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) skb_queue_purge(&local->skb_queue); skb_queue_purge(&local->skb_queue_unreliable); skb_queue_purge(&local->skb_queue_tdls_chsw); - ieee80211_txq_teardown_flows(local); destroy_workqueue(local->workqueue); wiphy_unregister(local->hw.wiphy); -- GitLab From 381538ae75cfdcbda1ea7e3735ddb0cd369be67f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 Aug 2018 18:17:03 +0200 Subject: [PATCH 0721/2269] mac80211_hwsim: require at least one channel [ Upstream commit 484004339d4514fde425f6e8a9f6a6cc979bb0c3 ] Syzbot continues to try to create mac80211_hwsim radios, and manages to pass parameters that are later checked with WARN_ON in cfg80211 - catch another one in hwsim directly. Reported-by: syzbot+2a12f11c306afe871c1f@syzkaller.appspotmail.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mac80211_hwsim.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index d686ba10fecc1..62fcf57e22999 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3124,6 +3124,11 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info) if (info->attrs[HWSIM_ATTR_CHANNELS]) param.channels = nla_get_u32(info->attrs[HWSIM_ATTR_CHANNELS]); + if (param.channels < 1) { + GENL_SET_ERR_MSG(info, "must have at least one channel"); + return -EINVAL; + } + if (param.channels > CFG80211_MAX_NUM_DIFFERENT_CHANNELS) { GENL_SET_ERR_MSG(info, "too many channels specified"); return -EINVAL; -- GitLab From 9e685bec07ae0348a895c777d593c3a1b1954745 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 20 Aug 2018 16:05:45 +1000 Subject: [PATCH 0722/2269] KVM: PPC: Book3S HV: Don't truncate HPTE index in xlate function [ Upstream commit 46dec40fb741f00f1864580130779aeeaf24fb3d ] This fixes a bug which causes guest virtual addresses to get translated to guest real addresses incorrectly when the guest is using the HPT MMU and has more than 256GB of RAM, or more specifically has a HPT larger than 2GB. This has showed up in testing as a failure of the host to emulate doorbell instructions correctly on POWER9 for HPT guests with more than 256GB of RAM. The bug is that the HPTE index in kvmppc_mmu_book3s_64_hv_xlate() is stored as an int, and in forming the HPTE address, the index gets shifted left 4 bits as an int before being signed-extended to 64 bits. The simple fix is to make the variable a long int, matching the return type of kvmppc_hv_find_lock_hpte(), which is what calculates the index. Fixes: 697d3899dcb4 ("KVM: PPC: Implement MMIO emulation support for Book3S HV guests") Signed-off-by: Paul Mackerras Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index df9b53f40b1eb..7ac7e21b137e2 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -355,7 +355,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned long pp, key; unsigned long v, orig_v, gr; __be64 *hptep; - int index; + long int index; int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR); /* Get SLB entry */ -- GitLab From 4e380c50cf12f07d01262b81f5c61249503d2ce3 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Mon, 6 Aug 2018 18:12:37 +0800 Subject: [PATCH 0723/2269] btrfs: btrfs_shrink_device should call commit transaction at the end [ Upstream commit 801660b040d132f67fac6a95910ad307c5929b49 ] Test case btrfs/164 reports use-after-free: [ 6712.084324] general protection fault: 0000 [#1] PREEMPT SMP .. [ 6712.195423] btrfs_update_commit_device_size+0x75/0xf0 [btrfs] [ 6712.201424] btrfs_commit_transaction+0x57d/0xa90 [btrfs] [ 6712.206999] btrfs_rm_device+0x627/0x850 [btrfs] [ 6712.211800] btrfs_ioctl+0x2b03/0x3120 [btrfs] Reason for this is that btrfs_shrink_device adds the resized device to the fs_devices::resized_devices after it has called the last commit transaction. So the list fs_devices::resized_devices is not empty when btrfs_shrink_device returns. Now the parent function btrfs_rm_device calls: btrfs_close_bdev(device); call_rcu(&device->rcu, free_device_rcu); and then does the transactio ncommit. It goes through the fs_devices::resized_devices in btrfs_update_commit_device_size and leads to use-after-free. Fix this by making sure btrfs_shrink_device calls the last needed btrfs_commit_transaction before the return. This is consistent with what the grow counterpart does and this makes sure the on-disk state is persistent when the function returns. Reported-by: Lu Fengqi Tested-by: Lu Fengqi Signed-off-by: Anand Jain Reviewed-by: David Sterba [ update changelog ] Signed-off-by: David Sterba Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/volumes.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a39b1f0b06066..a0947f4a3e87f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4517,7 +4517,12 @@ again: /* Now btrfs_update_device() will change the on-disk size. */ ret = btrfs_update_device(trans, device); - btrfs_end_transaction(trans); + if (ret < 0) { + btrfs_abort_transaction(trans, ret); + btrfs_end_transaction(trans); + } else { + ret = btrfs_commit_transaction(trans); + } done: btrfs_free_path(path); if (ret) { -- GitLab From 6054817c5e07702922603efc64c9568d60e3ee78 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Sat, 11 Aug 2018 21:03:58 +0530 Subject: [PATCH 0724/2269] scsi: csiostor: add a check for NULL pointer after kmalloc() [ Upstream commit 89809b028b6f54187b7d81a0c69b35d394c52e62 ] Reported-by: Colin Ian King Signed-off-by: Varun Prakash Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/csiostor/csio_hw.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/csiostor/csio_hw.c b/drivers/scsi/csiostor/csio_hw.c index 5be0086142cac..ab30db8c36c6f 100644 --- a/drivers/scsi/csiostor/csio_hw.c +++ b/drivers/scsi/csiostor/csio_hw.c @@ -2010,8 +2010,8 @@ bye: } /* - * Returns -EINVAL if attempts to flash the firmware failed - * else returns 0, + * Returns -EINVAL if attempts to flash the firmware failed, + * -ENOMEM if memory allocation failed else returns 0, * if flashing was not attempted because the card had the * latest firmware ECANCELED is returned */ @@ -2039,6 +2039,13 @@ csio_hw_flash_fw(struct csio_hw *hw, int *reset) return -EINVAL; } + /* allocate memory to read the header of the firmware on the + * card + */ + card_fw = kmalloc(sizeof(*card_fw), GFP_KERNEL); + if (!card_fw) + return -ENOMEM; + if (csio_is_t5(pci_dev->device & CSIO_HW_CHIP_MASK)) fw_bin_file = FW_FNAME_T5; else @@ -2052,11 +2059,6 @@ csio_hw_flash_fw(struct csio_hw *hw, int *reset) fw_size = fw->size; } - /* allocate memory to read the header of the firmware on the - * card - */ - card_fw = kmalloc(sizeof(*card_fw), GFP_KERNEL); - /* upgrade FW logic */ ret = csio_hw_prep_fw(hw, fw_info, fw_data, fw_size, card_fw, hw->fw_state, reset); -- GitLab From 7c209ebc7f152d3440c00f5dda4dab06a9b4c39b Mon Sep 17 00:00:00 2001 From: Danek Duvall Date: Wed, 22 Aug 2018 16:01:04 -0700 Subject: [PATCH 0725/2269] mac80211: correct use of IEEE80211_VHT_CAP_RXSTBC_X [ Upstream commit 67d1ba8a6dc83d90cd58b89fa6cbf9ae35a0cf7f ] The mod mask for VHT capabilities intends to say that you can override the number of STBC receive streams, and it does, but only by accident. The IEEE80211_VHT_CAP_RXSTBC_X aren't bits to be set, but values (albeit left-shifted). ORing the bits together gets the right answer, but we should use the _MASK macro here instead. Signed-off-by: Danek Duvall Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index cb5b22b613884..9a3dd1467e24e 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -467,10 +467,7 @@ static const struct ieee80211_vht_cap mac80211_vht_capa_mod_mask = { cpu_to_le32(IEEE80211_VHT_CAP_RXLDPC | IEEE80211_VHT_CAP_SHORT_GI_80 | IEEE80211_VHT_CAP_SHORT_GI_160 | - IEEE80211_VHT_CAP_RXSTBC_1 | - IEEE80211_VHT_CAP_RXSTBC_2 | - IEEE80211_VHT_CAP_RXSTBC_3 | - IEEE80211_VHT_CAP_RXSTBC_4 | + IEEE80211_VHT_CAP_RXSTBC_MASK | IEEE80211_VHT_CAP_TXSTBC | IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE | IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE | -- GitLab From 0081e67083edc3acce6fd3c6d086e8f790077b70 Mon Sep 17 00:00:00 2001 From: Danek Duvall Date: Wed, 22 Aug 2018 16:01:05 -0700 Subject: [PATCH 0726/2269] mac80211_hwsim: correct use of IEEE80211_VHT_CAP_RXSTBC_X [ Upstream commit d7c863a2f65e48f442379f4ee1846d52e0c5d24d ] The mac80211_hwsim driver intends to say that it supports up to four STBC receive streams, but instead it ends up saying something undefined. The IEEE80211_VHT_CAP_RXSTBC_X macros aren't independent bits that can be ORed together, but values. In this case, _4 is the appropriate one to use. Signed-off-by: Danek Duvall Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mac80211_hwsim.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 62fcf57e22999..aafa7aa18fbd7 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2632,9 +2632,6 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, IEEE80211_VHT_CAP_SHORT_GI_80 | IEEE80211_VHT_CAP_SHORT_GI_160 | IEEE80211_VHT_CAP_TXSTBC | - IEEE80211_VHT_CAP_RXSTBC_1 | - IEEE80211_VHT_CAP_RXSTBC_2 | - IEEE80211_VHT_CAP_RXSTBC_3 | IEEE80211_VHT_CAP_RXSTBC_4 | IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK; sband->vht_cap.vht_mcs.rx_mcs_map = -- GitLab From 34bec4daf88c2c5ca6294e844318c190ea37a1a4 Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Mon, 13 Aug 2018 15:57:44 +0200 Subject: [PATCH 0727/2269] gpio: adp5588: Fix sleep-in-atomic-context bug [ Upstream commit 6537886cdc9a637711fd6da980dbb87c2c87c9aa ] This fixes: [BUG] gpio: gpio-adp5588: A possible sleep-in-atomic-context bug in adp5588_gpio_write() [BUG] gpio: gpio-adp5588: A possible sleep-in-atomic-context bug in adp5588_gpio_direction_input() Reported-by: Jia-Ju Bai Signed-off-by: Michael Hennerich Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-adp5588.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-adp5588.c b/drivers/gpio/gpio-adp5588.c index e717f8dc39667..202d367a21e4b 100644 --- a/drivers/gpio/gpio-adp5588.c +++ b/drivers/gpio/gpio-adp5588.c @@ -41,6 +41,8 @@ struct adp5588_gpio { uint8_t int_en[3]; uint8_t irq_mask[3]; uint8_t irq_stat[3]; + uint8_t int_input_en[3]; + uint8_t int_lvl_cached[3]; }; static int adp5588_gpio_read(struct i2c_client *client, u8 reg) @@ -173,12 +175,28 @@ static void adp5588_irq_bus_sync_unlock(struct irq_data *d) struct adp5588_gpio *dev = irq_data_get_irq_chip_data(d); int i; - for (i = 0; i <= ADP5588_BANK(ADP5588_MAXGPIO); i++) + for (i = 0; i <= ADP5588_BANK(ADP5588_MAXGPIO); i++) { + if (dev->int_input_en[i]) { + mutex_lock(&dev->lock); + dev->dir[i] &= ~dev->int_input_en[i]; + dev->int_input_en[i] = 0; + adp5588_gpio_write(dev->client, GPIO_DIR1 + i, + dev->dir[i]); + mutex_unlock(&dev->lock); + } + + if (dev->int_lvl_cached[i] != dev->int_lvl[i]) { + dev->int_lvl_cached[i] = dev->int_lvl[i]; + adp5588_gpio_write(dev->client, GPIO_INT_LVL1 + i, + dev->int_lvl[i]); + } + if (dev->int_en[i] ^ dev->irq_mask[i]) { dev->int_en[i] = dev->irq_mask[i]; adp5588_gpio_write(dev->client, GPIO_INT_EN1 + i, dev->int_en[i]); } + } mutex_unlock(&dev->irq_lock); } @@ -221,9 +239,7 @@ static int adp5588_irq_set_type(struct irq_data *d, unsigned int type) else return -EINVAL; - adp5588_gpio_direction_input(&dev->gpio_chip, gpio); - adp5588_gpio_write(dev->client, GPIO_INT_LVL1 + bank, - dev->int_lvl[bank]); + dev->int_input_en[bank] |= bit; return 0; } -- GitLab From 79448960e3d701f498accd3aa493c5e0851c639c Mon Sep 17 00:00:00 2001 From: Yuan-Chi Pang Date: Wed, 29 Aug 2018 09:30:08 +0800 Subject: [PATCH 0728/2269] mac80211: mesh: fix HWMP sequence numbering to follow standard [ Upstream commit 1f631c3201fe5491808df143d8fcba81b3197ffd ] IEEE 802.11-2016 14.10.8.3 HWMP sequence numbering says: If it is a target mesh STA, it shall update its own HWMP SN to maximum (current HWMP SN, target HWMP SN in the PREQ element) + 1 immediately before it generates a PREP element in response to a PREQ element. Signed-off-by: Yuan-Chi Pang Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mesh_hwmp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index d6d3f316de4cf..055ea36ff27b3 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -572,6 +572,10 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, forward = false; reply = true; target_metric = 0; + + if (SN_GT(target_sn, ifmsh->sn)) + ifmsh->sn = target_sn; + if (time_after(jiffies, ifmsh->last_sn_update + net_traversal_jiffies(sdata)) || time_before(jiffies, ifmsh->last_sn_update)) { -- GitLab From 9a5d353908db44d639108dee9fe05c0a8803b572 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Wed, 29 Aug 2018 08:57:02 +0200 Subject: [PATCH 0729/2269] mac80211: avoid kernel panic when building AMSDU from non-linear SKB [ Upstream commit 166ac9d55b0ab70b644e429be1f217fe8393cbd7 ] When building building AMSDU from non-linear SKB, we hit a kernel panic when trying to push the padding to the tail. Instead, put the padding at the head of the next subframe. This also fixes the A-MSDU subframes to not have the padding accounted in the length field and not have pad at all for the last subframe, both required by the spec. Fixes: 6e0456b54545 ("mac80211: add A-MSDU tx support") Signed-off-by: Sara Sharon Reviewed-by: Lorenzo Bianconi Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/tx.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index ccb65f18df5d7..2064234500cfd 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3022,27 +3022,18 @@ void ieee80211_clear_fast_xmit(struct sta_info *sta) } static bool ieee80211_amsdu_realloc_pad(struct ieee80211_local *local, - struct sk_buff *skb, int headroom, - int *subframe_len) + struct sk_buff *skb, int headroom) { - int amsdu_len = *subframe_len + sizeof(struct ethhdr); - int padding = (4 - amsdu_len) & 3; - - if (skb_headroom(skb) < headroom || skb_tailroom(skb) < padding) { + if (skb_headroom(skb) < headroom) { I802_DEBUG_INC(local->tx_expand_skb_head); - if (pskb_expand_head(skb, headroom, padding, GFP_ATOMIC)) { + if (pskb_expand_head(skb, headroom, 0, GFP_ATOMIC)) { wiphy_debug(local->hw.wiphy, "failed to reallocate TX buffer\n"); return false; } } - if (padding) { - *subframe_len += padding; - skb_put_zero(skb, padding); - } - return true; } @@ -3066,8 +3057,7 @@ static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata, if (info->control.flags & IEEE80211_TX_CTRL_AMSDU) return true; - if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(*amsdu_hdr), - &subframe_len)) + if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(*amsdu_hdr))) return false; data = skb_push(skb, sizeof(*amsdu_hdr)); @@ -3133,7 +3123,8 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, void *data; bool ret = false; unsigned int orig_len; - int n = 1, nfrags; + int n = 1, nfrags, pad = 0; + u16 hdrlen; if (!ieee80211_hw_check(&local->hw, TX_AMSDU)) return false; @@ -3184,8 +3175,19 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, if (max_frags && nfrags > max_frags) goto out; - if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(rfc1042_header) + 2, - &subframe_len)) + /* + * Pad out the previous subframe to a multiple of 4 by adding the + * padding to the next one, that's being added. Note that head->len + * is the length of the full A-MSDU, but that works since each time + * we add a new subframe we pad out the previous one to a multiple + * of 4 and thus it no longer matters in the next round. + */ + hdrlen = fast_tx->hdr_len - sizeof(rfc1042_header); + if ((head->len - hdrlen) & 3) + pad = 4 - ((head->len - hdrlen) & 3); + + if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(rfc1042_header) + + 2 + pad)) goto out; ret = true; @@ -3197,6 +3199,8 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, memcpy(data, &len, 2); memcpy(data + 2, rfc1042_header, sizeof(rfc1042_header)); + memset(skb_push(skb, pad), 0, pad); + head->len += skb->len; head->data_len += skb->len; *frag_tail = skb; -- GitLab From 73bfec0a6bde1765d7ad377d390db7e13e714ba9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 13 Aug 2018 19:00:27 +0300 Subject: [PATCH 0730/2269] gpiolib: acpi: Switch to cansleep version of GPIO library call [ Upstream commit 993b9bc5c47fda86f8ab4e53d68c6fea5ff2764a ] The commit ca876c7483b6 ("gpiolib-acpi: make sure we trigger edge events at least once on boot") added a initial value check for pin which is about to be locked as IRQ. Unfortunately, not all GPIO drivers can do that atomically. Thus, switch to cansleep version of the call. Otherwise we have a warning: ... WARNING: CPU: 2 PID: 1408 at drivers/gpio/gpiolib.c:2883 gpiod_get_value+0x46/0x50 ... RIP: 0010:gpiod_get_value+0x46/0x50 ... The change tested on Intel Broxton with Whiskey Cove PMIC GPIO controller. Fixes: ca876c7483b6 ("gpiolib-acpi: make sure we trigger edge events at least once on boot") Signed-off-by: Andy Shevchenko Cc: Hans de Goede Cc: Benjamin Tissoires Acked-by: Mika Westerberg Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib-acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 70b3c556f6cf2..582dde355bded 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -247,7 +247,7 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares, gpiod_direction_input(desc); - value = gpiod_get_value(desc); + value = gpiod_get_value_cansleep(desc); ret = gpiochip_lock_as_irq(chip, pin); if (ret) { -- GitLab From bdd29365a74cda76fd60445af113ea8e5299d409 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 14 Aug 2018 16:07:03 +0200 Subject: [PATCH 0731/2269] gpiolib-acpi: Register GpioInt ACPI event handlers from a late_initcall [ Upstream commit 78d3a92edbfb02e8cb83173cad84c3f2d5e1f070 ] GpioInt ACPI event handlers may see there IRQ triggered immediately after requesting the IRQ (esp. level triggered ones). This means that they may run before any other (builtin) drivers have had a chance to register their OpRegion handlers, leading to errors like this: [ 1.133274] ACPI Error: No handler for Region [PMOP] ((____ptrval____)) [UserDefinedRegion] (20180531/evregion-132) [ 1.133286] ACPI Error: Region UserDefinedRegion (ID=141) has no handler (20180531/exfldio-265) [ 1.133297] ACPI Error: Method parse/execution failed \_SB.GPO2._L01, AE_NOT_EXIST (20180531/psparse-516) We already defer the manual initial trigger of edge triggered interrupts by running it from a late_initcall handler, this commit replaces this with deferring the entire acpi_gpiochip_request_interrupts() call till then, fixing the problem of some OpRegions not being registered yet. Note that this removes the need to have a list of edge triggered handlers which need to run, since the entire acpi_gpiochip_request_interrupts() call is now delayed, acpi_gpiochip_request_interrupt() can call these directly now. Acked-by: Mika Westerberg Signed-off-by: Hans de Goede Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib-acpi.c | 84 +++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 35 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 582dde355bded..33d4bd505b5b8 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -25,7 +25,6 @@ struct acpi_gpio_event { struct list_head node; - struct list_head initial_sync_list; acpi_handle handle; unsigned int pin; unsigned int irq; @@ -49,10 +48,19 @@ struct acpi_gpio_chip { struct mutex conn_lock; struct gpio_chip *chip; struct list_head events; + struct list_head deferred_req_irqs_list_entry; }; -static LIST_HEAD(acpi_gpio_initial_sync_list); -static DEFINE_MUTEX(acpi_gpio_initial_sync_list_lock); +/* + * For gpiochips which call acpi_gpiochip_request_interrupts() before late_init + * (so builtin drivers) we register the ACPI GpioInt event handlers from a + * late_initcall_sync handler, so that other builtin drivers can register their + * OpRegions before the event handlers can run. This list contains gpiochips + * for which the acpi_gpiochip_request_interrupts() has been deferred. + */ +static DEFINE_MUTEX(acpi_gpio_deferred_req_irqs_lock); +static LIST_HEAD(acpi_gpio_deferred_req_irqs_list); +static bool acpi_gpio_deferred_req_irqs_done; static int acpi_gpiochip_find(struct gpio_chip *gc, void *data) { @@ -146,21 +154,6 @@ static struct gpio_desc *acpi_get_gpiod(char *path, int pin) return gpiochip_get_desc(chip, offset); } -static void acpi_gpio_add_to_initial_sync_list(struct acpi_gpio_event *event) -{ - mutex_lock(&acpi_gpio_initial_sync_list_lock); - list_add(&event->initial_sync_list, &acpi_gpio_initial_sync_list); - mutex_unlock(&acpi_gpio_initial_sync_list_lock); -} - -static void acpi_gpio_del_from_initial_sync_list(struct acpi_gpio_event *event) -{ - mutex_lock(&acpi_gpio_initial_sync_list_lock); - if (!list_empty(&event->initial_sync_list)) - list_del_init(&event->initial_sync_list); - mutex_unlock(&acpi_gpio_initial_sync_list_lock); -} - static irqreturn_t acpi_gpio_irq_handler(int irq, void *data) { struct acpi_gpio_event *event = data; @@ -290,7 +283,6 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares, event->irq = irq; event->pin = pin; event->desc = desc; - INIT_LIST_HEAD(&event->initial_sync_list); ret = request_threaded_irq(event->irq, NULL, handler, irqflags, "ACPI:Event", event); @@ -312,10 +304,9 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares, * may refer to OperationRegions from other (builtin) drivers which * may be probed after us. */ - if (handler == acpi_gpio_irq_handler && - (((irqflags & IRQF_TRIGGER_RISING) && value == 1) || - ((irqflags & IRQF_TRIGGER_FALLING) && value == 0))) - acpi_gpio_add_to_initial_sync_list(event); + if (((irqflags & IRQF_TRIGGER_RISING) && value == 1) || + ((irqflags & IRQF_TRIGGER_FALLING) && value == 0)) + handler(event->irq, event); return AE_OK; @@ -344,6 +335,7 @@ void acpi_gpiochip_request_interrupts(struct gpio_chip *chip) struct acpi_gpio_chip *acpi_gpio; acpi_handle handle; acpi_status status; + bool defer; if (!chip->parent || !chip->to_irq) return; @@ -356,6 +348,16 @@ void acpi_gpiochip_request_interrupts(struct gpio_chip *chip) if (ACPI_FAILURE(status)) return; + mutex_lock(&acpi_gpio_deferred_req_irqs_lock); + defer = !acpi_gpio_deferred_req_irqs_done; + if (defer) + list_add(&acpi_gpio->deferred_req_irqs_list_entry, + &acpi_gpio_deferred_req_irqs_list); + mutex_unlock(&acpi_gpio_deferred_req_irqs_lock); + + if (defer) + return; + acpi_walk_resources(handle, "_AEI", acpi_gpiochip_request_interrupt, acpi_gpio); } @@ -386,11 +388,14 @@ void acpi_gpiochip_free_interrupts(struct gpio_chip *chip) if (ACPI_FAILURE(status)) return; + mutex_lock(&acpi_gpio_deferred_req_irqs_lock); + if (!list_empty(&acpi_gpio->deferred_req_irqs_list_entry)) + list_del_init(&acpi_gpio->deferred_req_irqs_list_entry); + mutex_unlock(&acpi_gpio_deferred_req_irqs_lock); + list_for_each_entry_safe_reverse(event, ep, &acpi_gpio->events, node) { struct gpio_desc *desc; - acpi_gpio_del_from_initial_sync_list(event); - if (irqd_is_wakeup_set(irq_get_irq_data(event->irq))) disable_irq_wake(event->irq); @@ -1101,6 +1106,7 @@ void acpi_gpiochip_add(struct gpio_chip *chip) acpi_gpio->chip = chip; INIT_LIST_HEAD(&acpi_gpio->events); + INIT_LIST_HEAD(&acpi_gpio->deferred_req_irqs_list_entry); status = acpi_attach_data(handle, acpi_gpio_chip_dh, acpi_gpio); if (ACPI_FAILURE(status)) { @@ -1247,20 +1253,28 @@ bool acpi_can_fallback_to_crs(struct acpi_device *adev, const char *con_id) return con_id == NULL; } -/* Sync the initial state of handlers after all builtin drivers have probed */ -static int acpi_gpio_initial_sync(void) +/* Run deferred acpi_gpiochip_request_interrupts() */ +static int acpi_gpio_handle_deferred_request_interrupts(void) { - struct acpi_gpio_event *event, *ep; + struct acpi_gpio_chip *acpi_gpio, *tmp; + + mutex_lock(&acpi_gpio_deferred_req_irqs_lock); + list_for_each_entry_safe(acpi_gpio, tmp, + &acpi_gpio_deferred_req_irqs_list, + deferred_req_irqs_list_entry) { + acpi_handle handle; - mutex_lock(&acpi_gpio_initial_sync_list_lock); - list_for_each_entry_safe(event, ep, &acpi_gpio_initial_sync_list, - initial_sync_list) { - acpi_evaluate_object(event->handle, NULL, NULL, NULL); - list_del_init(&event->initial_sync_list); + handle = ACPI_HANDLE(acpi_gpio->chip->parent); + acpi_walk_resources(handle, "_AEI", + acpi_gpiochip_request_interrupt, acpi_gpio); + + list_del_init(&acpi_gpio->deferred_req_irqs_list_entry); } - mutex_unlock(&acpi_gpio_initial_sync_list_lock); + + acpi_gpio_deferred_req_irqs_done = true; + mutex_unlock(&acpi_gpio_deferred_req_irqs_lock); return 0; } /* We must use _sync so that this runs after the first deferred_probe run */ -late_initcall_sync(acpi_gpio_initial_sync); +late_initcall_sync(acpi_gpio_handle_deferred_request_interrupts); -- GitLab From 7fd11a1ad542ab719f18e6480e4de4f7aad711eb Mon Sep 17 00:00:00 2001 From: Peng Li Date: Mon, 27 Aug 2018 09:59:29 +0800 Subject: [PATCH 0732/2269] net: hns: add the code for cleaning pkt in chip [ Upstream commit 31fabbee8f5c658c3fa1603c66e9e4f51ea8c2c6 ] If there are packets in hardware when changing the speed or duplex, it may cause hardware hang up. This patch adds the code for waiting chip to clean the all pkts(TX & RX) in chip when the driver uses the function named "adjust link". This patch cleans the pkts as follows: 1) close rx of chip, close tx of protocol stack. 2) wait rcb, ppe, mac to clean. 3) adjust link 4) open rx of chip, open tx of protocol stack. Signed-off-by: Peng Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/hisilicon/hns/hnae.h | 2 + .../net/ethernet/hisilicon/hns/hns_ae_adapt.c | 67 ++++++++++++++++++- .../ethernet/hisilicon/hns/hns_dsaf_gmac.c | 36 ++++++++++ .../net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 44 ++++++++++++ .../net/ethernet/hisilicon/hns/hns_dsaf_mac.h | 8 +++ .../ethernet/hisilicon/hns/hns_dsaf_main.c | 29 ++++++++ .../ethernet/hisilicon/hns/hns_dsaf_main.h | 3 + .../net/ethernet/hisilicon/hns/hns_dsaf_ppe.c | 23 +++++++ .../net/ethernet/hisilicon/hns/hns_dsaf_ppe.h | 1 + .../net/ethernet/hisilicon/hns/hns_dsaf_rcb.c | 23 +++++++ .../net/ethernet/hisilicon/hns/hns_dsaf_rcb.h | 1 + .../net/ethernet/hisilicon/hns/hns_dsaf_reg.h | 1 + drivers/net/ethernet/hisilicon/hns/hns_enet.c | 21 +++++- 13 files changed, 255 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h index cad52bd331f7b..08a750fb60c49 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.h +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h @@ -486,6 +486,8 @@ struct hnae_ae_ops { u8 *auto_neg, u16 *speed, u8 *duplex); void (*toggle_ring_irq)(struct hnae_ring *ring, u32 val); void (*adjust_link)(struct hnae_handle *handle, int speed, int duplex); + bool (*need_adjust_link)(struct hnae_handle *handle, + int speed, int duplex); int (*set_loopback)(struct hnae_handle *handle, enum hnae_loop loop_mode, int en); void (*get_ring_bdnum_limit)(struct hnae_queue *queue, diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c index bd68379d2beab..bf930ab3c2bde 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c @@ -155,6 +155,41 @@ static void hns_ae_put_handle(struct hnae_handle *handle) hns_ae_get_ring_pair(handle->qs[i])->used_by_vf = 0; } +static int hns_ae_wait_flow_down(struct hnae_handle *handle) +{ + struct dsaf_device *dsaf_dev; + struct hns_ppe_cb *ppe_cb; + struct hnae_vf_cb *vf_cb; + int ret; + int i; + + for (i = 0; i < handle->q_num; i++) { + ret = hns_rcb_wait_tx_ring_clean(handle->qs[i]); + if (ret) + return ret; + } + + ppe_cb = hns_get_ppe_cb(handle); + ret = hns_ppe_wait_tx_fifo_clean(ppe_cb); + if (ret) + return ret; + + dsaf_dev = hns_ae_get_dsaf_dev(handle->dev); + if (!dsaf_dev) + return -EINVAL; + ret = hns_dsaf_wait_pkt_clean(dsaf_dev, handle->dport_id); + if (ret) + return ret; + + vf_cb = hns_ae_get_vf_cb(handle); + ret = hns_mac_wait_fifo_clean(vf_cb->mac_cb); + if (ret) + return ret; + + mdelay(10); + return 0; +} + static void hns_ae_ring_enable_all(struct hnae_handle *handle, int val) { int q_num = handle->q_num; @@ -399,12 +434,41 @@ static int hns_ae_get_mac_info(struct hnae_handle *handle, return hns_mac_get_port_info(mac_cb, auto_neg, speed, duplex); } +static bool hns_ae_need_adjust_link(struct hnae_handle *handle, int speed, + int duplex) +{ + struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle); + + return hns_mac_need_adjust_link(mac_cb, speed, duplex); +} + static void hns_ae_adjust_link(struct hnae_handle *handle, int speed, int duplex) { struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle); - hns_mac_adjust_link(mac_cb, speed, duplex); + switch (mac_cb->dsaf_dev->dsaf_ver) { + case AE_VERSION_1: + hns_mac_adjust_link(mac_cb, speed, duplex); + break; + + case AE_VERSION_2: + /* chip need to clear all pkt inside */ + hns_mac_disable(mac_cb, MAC_COMM_MODE_RX); + if (hns_ae_wait_flow_down(handle)) { + hns_mac_enable(mac_cb, MAC_COMM_MODE_RX); + break; + } + + hns_mac_adjust_link(mac_cb, speed, duplex); + hns_mac_enable(mac_cb, MAC_COMM_MODE_RX); + break; + + default: + break; + } + + return; } static void hns_ae_get_ring_bdnum_limit(struct hnae_queue *queue, @@ -902,6 +966,7 @@ static struct hnae_ae_ops hns_dsaf_ops = { .get_status = hns_ae_get_link_status, .get_info = hns_ae_get_mac_info, .adjust_link = hns_ae_adjust_link, + .need_adjust_link = hns_ae_need_adjust_link, .set_loopback = hns_ae_config_loopback, .get_ring_bdnum_limit = hns_ae_get_ring_bdnum_limit, .get_pauseparam = hns_ae_get_pauseparam, diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c index 74bd260ca02a8..8c7bc5cf193c3 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c @@ -257,6 +257,16 @@ static void hns_gmac_get_pausefrm_cfg(void *mac_drv, u32 *rx_pause_en, *tx_pause_en = dsaf_get_bit(pause_en, GMAC_PAUSE_EN_TX_FDFC_B); } +static bool hns_gmac_need_adjust_link(void *mac_drv, enum mac_speed speed, + int duplex) +{ + struct mac_driver *drv = (struct mac_driver *)mac_drv; + struct hns_mac_cb *mac_cb = drv->mac_cb; + + return (mac_cb->speed != speed) || + (mac_cb->half_duplex == duplex); +} + static int hns_gmac_adjust_link(void *mac_drv, enum mac_speed speed, u32 full_duplex) { @@ -309,6 +319,30 @@ static void hns_gmac_set_promisc(void *mac_drv, u8 en) hns_gmac_set_uc_match(mac_drv, en); } +int hns_gmac_wait_fifo_clean(void *mac_drv) +{ + struct mac_driver *drv = (struct mac_driver *)mac_drv; + int wait_cnt; + u32 val; + + wait_cnt = 0; + while (wait_cnt++ < HNS_MAX_WAIT_CNT) { + val = dsaf_read_dev(drv, GMAC_FIFO_STATE_REG); + /* bit5~bit0 is not send complete pkts */ + if ((val & 0x3f) == 0) + break; + usleep_range(100, 200); + } + + if (wait_cnt >= HNS_MAX_WAIT_CNT) { + dev_err(drv->dev, + "hns ge %d fifo was not idle.\n", drv->mac_id); + return -EBUSY; + } + + return 0; +} + static void hns_gmac_init(void *mac_drv) { u32 port; @@ -690,6 +724,7 @@ void *hns_gmac_config(struct hns_mac_cb *mac_cb, struct mac_params *mac_param) mac_drv->mac_disable = hns_gmac_disable; mac_drv->mac_free = hns_gmac_free; mac_drv->adjust_link = hns_gmac_adjust_link; + mac_drv->need_adjust_link = hns_gmac_need_adjust_link; mac_drv->set_tx_auto_pause_frames = hns_gmac_set_tx_auto_pause_frames; mac_drv->config_max_frame_length = hns_gmac_config_max_frame_length; mac_drv->mac_pausefrm_cfg = hns_gmac_pause_frm_cfg; @@ -717,6 +752,7 @@ void *hns_gmac_config(struct hns_mac_cb *mac_cb, struct mac_params *mac_param) mac_drv->get_strings = hns_gmac_get_strings; mac_drv->update_stats = hns_gmac_update_stats; mac_drv->set_promiscuous = hns_gmac_set_promisc; + mac_drv->wait_fifo_clean = hns_gmac_wait_fifo_clean; return (void *)mac_drv; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index 8b5cdf4908503..5a8dbd72fe45c 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -114,6 +114,26 @@ int hns_mac_get_port_info(struct hns_mac_cb *mac_cb, return 0; } +/** + *hns_mac_is_adjust_link - check is need change mac speed and duplex register + *@mac_cb: mac device + *@speed: phy device speed + *@duplex:phy device duplex + * + */ +bool hns_mac_need_adjust_link(struct hns_mac_cb *mac_cb, int speed, int duplex) +{ + struct mac_driver *mac_ctrl_drv; + + mac_ctrl_drv = (struct mac_driver *)(mac_cb->priv.mac); + + if (mac_ctrl_drv->need_adjust_link) + return mac_ctrl_drv->need_adjust_link(mac_ctrl_drv, + (enum mac_speed)speed, duplex); + else + return true; +} + void hns_mac_adjust_link(struct hns_mac_cb *mac_cb, int speed, int duplex) { int ret; @@ -432,6 +452,16 @@ int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vmid, bool enable) return 0; } +int hns_mac_wait_fifo_clean(struct hns_mac_cb *mac_cb) +{ + struct mac_driver *drv = hns_mac_get_drv(mac_cb); + + if (drv->wait_fifo_clean) + return drv->wait_fifo_clean(drv); + + return 0; +} + void hns_mac_reset(struct hns_mac_cb *mac_cb) { struct mac_driver *drv = hns_mac_get_drv(mac_cb); @@ -1001,6 +1031,20 @@ static int hns_mac_get_max_port_num(struct dsaf_device *dsaf_dev) return DSAF_MAX_PORT_NUM; } +void hns_mac_enable(struct hns_mac_cb *mac_cb, enum mac_commom_mode mode) +{ + struct mac_driver *mac_ctrl_drv = hns_mac_get_drv(mac_cb); + + mac_ctrl_drv->mac_enable(mac_cb->priv.mac, mode); +} + +void hns_mac_disable(struct hns_mac_cb *mac_cb, enum mac_commom_mode mode) +{ + struct mac_driver *mac_ctrl_drv = hns_mac_get_drv(mac_cb); + + mac_ctrl_drv->mac_disable(mac_cb->priv.mac, mode); +} + /** * hns_mac_init - init mac * @dsaf_dev: dsa fabric device struct pointer diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h index bbc0a98e7ca32..fbc75341bef76 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h @@ -356,6 +356,9 @@ struct mac_driver { /*adjust mac mode of port,include speed and duplex*/ int (*adjust_link)(void *mac_drv, enum mac_speed speed, u32 full_duplex); + /* need adjust link */ + bool (*need_adjust_link)(void *mac_drv, enum mac_speed speed, + int duplex); /* config autoegotaite mode of port*/ void (*set_an_mode)(void *mac_drv, u8 enable); /* config loopbank mode */ @@ -394,6 +397,7 @@ struct mac_driver { void (*get_info)(void *mac_drv, struct mac_info *mac_info); void (*update_stats)(void *mac_drv); + int (*wait_fifo_clean)(void *mac_drv); enum mac_mode mac_mode; u8 mac_id; @@ -427,6 +431,7 @@ void *hns_xgmac_config(struct hns_mac_cb *mac_cb, int hns_mac_init(struct dsaf_device *dsaf_dev); void mac_adjust_link(struct net_device *net_dev); +bool hns_mac_need_adjust_link(struct hns_mac_cb *mac_cb, int speed, int duplex); void hns_mac_get_link_status(struct hns_mac_cb *mac_cb, u32 *link_status); int hns_mac_change_vf_addr(struct hns_mac_cb *mac_cb, u32 vmid, char *addr); int hns_mac_set_multi(struct hns_mac_cb *mac_cb, @@ -463,5 +468,8 @@ int hns_mac_add_uc_addr(struct hns_mac_cb *mac_cb, u8 vf_id, int hns_mac_rm_uc_addr(struct hns_mac_cb *mac_cb, u8 vf_id, const unsigned char *addr); int hns_mac_clr_multicast(struct hns_mac_cb *mac_cb, int vfn); +void hns_mac_enable(struct hns_mac_cb *mac_cb, enum mac_commom_mode mode); +void hns_mac_disable(struct hns_mac_cb *mac_cb, enum mac_commom_mode mode); +int hns_mac_wait_fifo_clean(struct hns_mac_cb *mac_cb); #endif /* _HNS_DSAF_MAC_H */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index e0bc79ea3d880..1f056a6b167e7 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -2720,6 +2720,35 @@ void hns_dsaf_set_promisc_tcam(struct dsaf_device *dsaf_dev, soft_mac_entry->index = enable ? entry_index : DSAF_INVALID_ENTRY_IDX; } +int hns_dsaf_wait_pkt_clean(struct dsaf_device *dsaf_dev, int port) +{ + u32 val, val_tmp; + int wait_cnt; + + if (port >= DSAF_SERVICE_NW_NUM) + return 0; + + wait_cnt = 0; + while (wait_cnt++ < HNS_MAX_WAIT_CNT) { + val = dsaf_read_dev(dsaf_dev, DSAF_VOQ_IN_PKT_NUM_0_REG + + (port + DSAF_XGE_NUM) * 0x40); + val_tmp = dsaf_read_dev(dsaf_dev, DSAF_VOQ_OUT_PKT_NUM_0_REG + + (port + DSAF_XGE_NUM) * 0x40); + if (val == val_tmp) + break; + + usleep_range(100, 200); + } + + if (wait_cnt >= HNS_MAX_WAIT_CNT) { + dev_err(dsaf_dev->dev, "hns dsaf clean wait timeout(%u - %u).\n", + val, val_tmp); + return -EBUSY; + } + + return 0; +} + /** * dsaf_probe - probo dsaf dev * @pdev: dasf platform device diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h index 4507e8222683c..0e1cd99831a60 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h @@ -44,6 +44,8 @@ struct hns_mac_cb; #define DSAF_ROCE_CREDIT_CHN 8 #define DSAF_ROCE_CHAN_MODE 3 +#define HNS_MAX_WAIT_CNT 10000 + enum dsaf_roce_port_mode { DSAF_ROCE_6PORT_MODE, DSAF_ROCE_4PORT_MODE, @@ -463,5 +465,6 @@ int hns_dsaf_rm_mac_addr( int hns_dsaf_clr_mac_mc_port(struct dsaf_device *dsaf_dev, u8 mac_id, u8 port_num); +int hns_dsaf_wait_pkt_clean(struct dsaf_device *dsaf_dev, int port); #endif /* __HNS_DSAF_MAIN_H__ */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c index 93e71e27401b4..a19932aeb9d7e 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c @@ -274,6 +274,29 @@ static void hns_ppe_exc_irq_en(struct hns_ppe_cb *ppe_cb, int en) dsaf_write_dev(ppe_cb, PPE_INTEN_REG, msk_vlue & vld_msk); } +int hns_ppe_wait_tx_fifo_clean(struct hns_ppe_cb *ppe_cb) +{ + int wait_cnt; + u32 val; + + wait_cnt = 0; + while (wait_cnt++ < HNS_MAX_WAIT_CNT) { + val = dsaf_read_dev(ppe_cb, PPE_CURR_TX_FIFO0_REG) & 0x3ffU; + if (!val) + break; + + usleep_range(100, 200); + } + + if (wait_cnt >= HNS_MAX_WAIT_CNT) { + dev_err(ppe_cb->dev, "hns ppe tx fifo clean wait timeout, still has %u pkt.\n", + val); + return -EBUSY; + } + + return 0; +} + /** * ppe_init_hw - init ppe * @ppe_cb: ppe device diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h index 9d8e643e8aa6f..f670e63a5a018 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h @@ -100,6 +100,7 @@ struct ppe_common_cb { }; +int hns_ppe_wait_tx_fifo_clean(struct hns_ppe_cb *ppe_cb); int hns_ppe_init(struct dsaf_device *dsaf_dev); void hns_ppe_uninit(struct dsaf_device *dsaf_dev); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c index e2e28532e4dc2..1e43d7a3ca868 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c @@ -66,6 +66,29 @@ void hns_rcb_wait_fbd_clean(struct hnae_queue **qs, int q_num, u32 flag) "queue(%d) wait fbd(%d) clean fail!!\n", i, fbd_num); } +int hns_rcb_wait_tx_ring_clean(struct hnae_queue *qs) +{ + u32 head, tail; + int wait_cnt; + + tail = dsaf_read_dev(&qs->tx_ring, RCB_REG_TAIL); + wait_cnt = 0; + while (wait_cnt++ < HNS_MAX_WAIT_CNT) { + head = dsaf_read_dev(&qs->tx_ring, RCB_REG_HEAD); + if (tail == head) + break; + + usleep_range(100, 200); + } + + if (wait_cnt >= HNS_MAX_WAIT_CNT) { + dev_err(qs->dev->dev, "rcb wait timeout, head not equal to tail.\n"); + return -EBUSY; + } + + return 0; +} + /** *hns_rcb_reset_ring_hw - ring reset *@q: ring struct pointer diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h index 602816498c8dd..2319b772a271e 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h @@ -136,6 +136,7 @@ void hns_rcbv2_int_clr_hw(struct hnae_queue *q, u32 flag); void hns_rcb_init_hw(struct ring_pair_cb *ring); void hns_rcb_reset_ring_hw(struct hnae_queue *q); void hns_rcb_wait_fbd_clean(struct hnae_queue **qs, int q_num, u32 flag); +int hns_rcb_wait_tx_ring_clean(struct hnae_queue *qs); u32 hns_rcb_get_rx_coalesced_frames( struct rcb_common_cb *rcb_common, u32 port_idx); u32 hns_rcb_get_tx_coalesced_frames( diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h index 46a52d9bb1963..6d20e4eb7402c 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h @@ -464,6 +464,7 @@ #define RCB_RING_INTMSK_TX_OVERTIME_REG 0x000C4 #define RCB_RING_INTSTS_TX_OVERTIME_REG 0x000C8 +#define GMAC_FIFO_STATE_REG 0x0000UL #define GMAC_DUPLEX_TYPE_REG 0x0008UL #define GMAC_FD_FC_TYPE_REG 0x000CUL #define GMAC_TX_WATER_LINE_REG 0x0010UL diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 25a9732afc842..07d6a9cf2c556 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1212,11 +1212,26 @@ static void hns_nic_adjust_link(struct net_device *ndev) struct hnae_handle *h = priv->ae_handle; int state = 1; + /* If there is no phy, do not need adjust link */ if (ndev->phydev) { - h->dev->ops->adjust_link(h, ndev->phydev->speed, - ndev->phydev->duplex); - state = ndev->phydev->link; + /* When phy link down, do nothing */ + if (ndev->phydev->link == 0) + return; + + if (h->dev->ops->need_adjust_link(h, ndev->phydev->speed, + ndev->phydev->duplex)) { + /* because Hi161X chip don't support to change gmac + * speed and duplex with traffic. Delay 200ms to + * make sure there is no more data in chip FIFO. + */ + netif_carrier_off(ndev); + msleep(200); + h->dev->ops->adjust_link(h, ndev->phydev->speed, + ndev->phydev->duplex); + netif_carrier_on(ndev); + } } + state = state && h->dev->ops->get_status(h); if (state != priv->link) { -- GitLab From e7577a1f1a65956fe6981b97f3002478e293dc8d Mon Sep 17 00:00:00 2001 From: Peng Li Date: Mon, 27 Aug 2018 09:59:30 +0800 Subject: [PATCH 0733/2269] net: hns: add netif_carrier_off before change speed and duplex [ Upstream commit 455c4401fe7a538facaffb35b906ce19f1ece474 ] If there are packets in hardware when changing the speed or duplex, it may cause hardware hang up. This patch adds netif_carrier_off before change speed and duplex in ethtool_ops.set_link_ksettings, and adds netif_carrier_on after complete the change. Signed-off-by: Peng Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 2e14a3ae1d8be..c1e947bb852ff 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -243,7 +243,9 @@ static int hns_nic_set_link_ksettings(struct net_device *net_dev, } if (h->dev->ops->adjust_link) { + netif_carrier_off(net_dev); h->dev->ops->adjust_link(h, (int)speed, cmd->base.duplex); + netif_carrier_on(net_dev); return 0; } -- GitLab From b22a5d20aab1968d9fd896971ca1706e7c999af9 Mon Sep 17 00:00:00 2001 From: Arunk Khandavalli Date: Thu, 30 Aug 2018 00:40:16 +0300 Subject: [PATCH 0734/2269] cfg80211: nl80211_update_ft_ies() to validate NL80211_ATTR_IE [ Upstream commit 4f0223bfe9c3e62d8f45a85f1ef1b18a8a263ef9 ] nl80211_update_ft_ies() tried to validate NL80211_ATTR_IE with is_valid_ie_attr() before dereferencing it, but that helper function returns true in case of NULL pointer (i.e., attribute not included). This can result to dereferencing a NULL pointer. Fix that by explicitly checking that NL80211_ATTR_IE is included. Fixes: 355199e02b83 ("cfg80211: Extend support for IEEE 802.11r Fast BSS Transition") Signed-off-by: Arunk Khandavalli Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 753f3e73c4980..3de415bca3915 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -11679,6 +11679,7 @@ static int nl80211_update_ft_ies(struct sk_buff *skb, struct genl_info *info) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_MDID] || + !info->attrs[NL80211_ATTR_IE] || !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE])) return -EINVAL; -- GitLab From 2592adfe326bc98674b53da76fdec7a02a51de07 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 29 Aug 2018 21:03:25 +0200 Subject: [PATCH 0735/2269] mac80211: do not convert to A-MSDU if frag/subframe limited [ Upstream commit 1eb507903665442360a959136dfa3234c43db085 ] Do not start to aggregate packets in a A-MSDU frame (converting the first subframe to A-MSDU, adding the header) if max_tx_fragments or max_amsdu_subframes limits are already exceeded by it. In particular, this happens when drivers set the limit to 1 to avoid A-MSDUs at all. Signed-off-by: Lorenzo Bianconi [reword commit message to be more precise] Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/tx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 2064234500cfd..fd8dafe73aeca 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3157,9 +3157,6 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, if (skb->len + head->len > max_amsdu_len) goto out; - if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head)) - goto out; - nfrags = 1 + skb_shinfo(skb)->nr_frags; nfrags += 1 + skb_shinfo(head)->nr_frags; frag_tail = &skb_shinfo(head)->frag_list; @@ -3175,6 +3172,9 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, if (max_frags && nfrags > max_frags) goto out; + if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head)) + goto out; + /* * Pad out the previous subframe to a multiple of 4 by adding the * padding to the next one, that's being added. Note that head->len -- GitLab From 0d66ce6878690b600bf03ad1bfa9ebf5c043b778 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 30 Aug 2018 10:55:49 +0200 Subject: [PATCH 0736/2269] mac80211: always account for A-MSDU header changes [ Upstream commit aa58acf325b4aadeecae2bfc90658273b47dbace ] In the error path of changing the SKB headroom of the second A-MSDU subframe, we would not account for the already-changed length of the first frame that just got converted to be in A-MSDU format and thus is a bit longer now. Fix this by doing the necessary accounting. It would be possible to reorder the operations, but that would make the code more complex (to calculate the necessary pad), and the headroom expansion should not fail frequently enough to make that worthwhile. Fixes: 6e0456b54545 ("mac80211: add A-MSDU tx support") Signed-off-by: Johannes Berg Acked-by: Lorenzo Bianconi Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/tx.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index fd8dafe73aeca..8320282ec3b30 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3188,7 +3188,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(rfc1042_header) + 2 + pad)) - goto out; + goto out_recalc; ret = true; data = skb_push(skb, ETH_ALEN + 2); @@ -3205,11 +3205,13 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, head->data_len += skb->len; *frag_tail = skb; - flow->backlog += head->len - orig_len; - tin->backlog_bytes += head->len - orig_len; - - fq_recalc_backlog(fq, tin, flow); +out_recalc: + if (head->len != orig_len) { + flow->backlog += head->len - orig_len; + tin->backlog_bytes += head->len - orig_len; + fq_recalc_backlog(fq, tin, flow); + } out: spin_unlock_bh(&fq->lock); -- GitLab From 52614f7bf1b59d0c93402444cd3e4011429f5371 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Fri, 24 Aug 2018 14:03:55 +0200 Subject: [PATCH 0737/2269] tools/kvm_stat: fix python3 issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 58f33cfe73076b6497bada4f7b5bda961ed68083 ] Python3 returns a float for a regular division - switch to a division operator that returns an integer. Furthermore, filters return a generator object instead of the actual list - wrap result in yet another list, which makes it still work in both, Python2 and 3. Signed-off-by: Stefan Raspl Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/kvm/kvm_stat/kvm_stat | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 32283d88701af..40cf64c7ac3ba 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -724,7 +724,7 @@ class DebugfsProvider(Provider): if len(vms) == 0: self.do_read = False - self.paths = filter(lambda x: "{}-".format(pid) in x, vms) + self.paths = list(filter(lambda x: "{}-".format(pid) in x, vms)) else: self.paths = [] @@ -1119,10 +1119,10 @@ class Tui(object): (x, term_width) = self.screen.getmaxyx() row = 2 for line in text: - start = (term_width - len(line)) / 2 + start = (term_width - len(line)) // 2 self.screen.addstr(row, start, line) row += 1 - self.screen.addstr(row + 1, (term_width - len(hint)) / 2, hint, + self.screen.addstr(row + 1, (term_width - len(hint)) // 2, hint, curses.A_STANDOUT) self.screen.getkey() -- GitLab From 3b83a52796cda8bbcfad6e0e4710ff17f0d09b8d Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Fri, 24 Aug 2018 14:03:56 +0200 Subject: [PATCH 0738/2269] tools/kvm_stat: fix handling of invalid paths in debugfs provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 617c66b9f236d20f11cecbb3f45e6d5675b2fae1 ] When filtering by guest, kvm_stat displays garbage when the guest is destroyed - see sample output below. We add code to remove the invalid paths from the providers, so at least no more garbage is displayed. Here's a sample output to illustrate: kvm statistics - pid 13986 (foo) Event Total %Total CurAvg/s diagnose_258 -2 0.0 0 deliver_program_interruption -3 0.0 0 diagnose_308 -4 0.0 0 halt_poll_invalid -91 0.0 -6 deliver_service_signal -244 0.0 -16 halt_successful_poll -250 0.1 -17 exit_pei -285 0.1 -19 exit_external_request -312 0.1 -21 diagnose_9c -328 0.1 -22 userspace_handled -713 0.1 -47 halt_attempted_poll -939 0.2 -62 deliver_emergency_signal -3126 0.6 -208 halt_wakeup -7199 1.5 -481 exit_wait_state -7379 1.5 -493 diagnose_500 -56499 11.5 -3757 exit_null -85491 17.4 -5685 diagnose_44 -133300 27.1 -8874 exit_instruction -195898 39.8 -13037 Total -492063 Signed-off-by: Stefan Raspl Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/kvm/kvm_stat/kvm_stat | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 40cf64c7ac3ba..c0d653d36c0f7 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -731,6 +731,13 @@ class DebugfsProvider(Provider): self.do_read = True self.reset() + def _verify_paths(self): + """Remove invalid paths""" + for path in self.paths: + if not os.path.exists(os.path.join(PATH_DEBUGFS_KVM, path)): + self.paths.remove(path) + continue + def read(self, reset=0, by_guest=0): """Returns a dict with format:'file name / field -> current value'. @@ -745,6 +752,7 @@ class DebugfsProvider(Provider): # If no debugfs filtering support is available, then don't read. if not self.do_read: return results + self._verify_paths() paths = self.paths if self._pid == 0: -- GitLab From 7e259a0537be13317661f96e472e24a70e7d760c Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Fri, 31 Aug 2018 09:04:18 +0200 Subject: [PATCH 0739/2269] gpio: Fix crash due to registration race [ Upstream commit d49b48f088c323dbacae44dfbe56d9c985c8a2a1 ] gpiochip_add_data_with_key() adds the gpiochip to the gpio_devices list before of_gpiochip_add() is called, but it's only the latter which sets the ->of_xlate function pointer. gpiochip_find() can be called by someone else between these two actions, and it can find the chip and call of_gpiochip_match_node_and_xlate() which leads to the following crash due to a NULL ->of_xlate(). Unhandled prefetch abort: page domain fault (0x01b) at 0x00000000 Modules linked in: leds_gpio(+) gpio_generic(+) CPU: 0 PID: 830 Comm: insmod Not tainted 4.18.0+ #43 Hardware name: ARM-Versatile Express PC is at (null) LR is at of_gpiochip_match_node_and_xlate+0x2c/0x38 Process insmod (pid: 830, stack limit = 0x(ptrval)) (of_gpiochip_match_node_and_xlate) from (gpiochip_find+0x48/0x84) (gpiochip_find) from (of_get_named_gpiod_flags+0xa8/0x238) (of_get_named_gpiod_flags) from (gpiod_get_from_of_node+0x2c/0xc8) (gpiod_get_from_of_node) from (devm_fwnode_get_index_gpiod_from_child+0xb8/0x144) (devm_fwnode_get_index_gpiod_from_child) from (gpio_led_probe+0x208/0x3c4 [leds_gpio]) (gpio_led_probe [leds_gpio]) from (platform_drv_probe+0x48/0x9c) (platform_drv_probe) from (really_probe+0x1d0/0x3d4) (really_probe) from (driver_probe_device+0x78/0x1c0) (driver_probe_device) from (__driver_attach+0x120/0x13c) (__driver_attach) from (bus_for_each_dev+0x68/0xb4) (bus_for_each_dev) from (bus_add_driver+0x1a8/0x268) (bus_add_driver) from (driver_register+0x78/0x10c) (driver_register) from (do_one_initcall+0x54/0x1fc) (do_one_initcall) from (do_init_module+0x64/0x1f4) (do_init_module) from (load_module+0x2198/0x26ac) (load_module) from (sys_finit_module+0xe0/0x110) (sys_finit_module) from (ret_fast_syscall+0x0/0x54) One way to fix this would be to rework the hairy registration sequence in gpiochip_add_data_with_key(), but since I'd probably introduce a couple of new bugs if I attempted that, simply add a check for a non-NULL of_xlate function pointer in of_gpiochip_match_node_and_xlate(). This works since the driver looking for the gpio will simply fail to find the gpio and defer its probe and be reprobed when the driver which is registering the gpiochip has fully completed its probe. Signed-off-by: Vincent Whitchurch Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib-of.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index ba38f530e4037..ee8c046cab62e 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -31,6 +31,7 @@ static int of_gpiochip_match_node_and_xlate(struct gpio_chip *chip, void *data) struct of_phandle_args *gpiospec = data; return chip->gpiodev->dev.of_node == gpiospec->np && + chip->of_xlate && chip->of_xlate(chip, gpiospec, NULL) >= 0; } -- GitLab From dc492842b7001b299299ffafe078ba5777096edd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 30 Aug 2018 13:52:38 -0700 Subject: [PATCH 0740/2269] ARC: atomics: unbork atomic_fetch_##op() [ Upstream commit 3fcbb8260a87efb691d837e8cd24e81f65b3eb70 ] In 4.19-rc1, Eugeniy reported weird boot and IO errors on ARC HSDK | INFO: task syslogd:77 blocked for more than 10 seconds. | Not tainted 4.19.0-rc1-00007-gf213acea4e88 #40 | "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this | message. | syslogd D 0 77 76 0x00000000 | | Stack Trace: | __switch_to+0x0/0xac | __schedule+0x1b2/0x730 | io_schedule+0x5c/0xc0 | __lock_page+0x98/0xdc | find_lock_entry+0x38/0x100 | shmem_getpage_gfp.isra.3+0x82/0xbfc | shmem_fault+0x46/0x138 | handle_mm_fault+0x5bc/0x924 | do_page_fault+0x100/0x2b8 | ret_from_exception+0x0/0x8 He bisected to 84c6591103db ("locking/atomics, asm-generic/bitops/lock.h: Rewrite using atomic_fetch_*()") This commit however only unmasked the real issue introduced by commit 4aef66c8ae9 ("locking/atomic, arch/arc: Fix build") which missed the retry-if-scond-failed branch in atomic_fetch_##op() macros. The bisected commit started using atomic_fetch_##op() macros for building the rest of atomics. Fixes: 4aef66c8ae9 ("locking/atomic, arch/arc: Fix build") Reported-by: Eugeniy Paltsev Acked-by: Peter Zijlstra (Intel) Signed-off-by: Will Deacon Signed-off-by: Vineet Gupta [vgupta: wrote changelog] Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 11859287c52af..c98b59ac06123 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -84,7 +84,7 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \ "1: llock %[orig], [%[ctr]] \n" \ " " #asm_op " %[val], %[orig], %[i] \n" \ " scond %[val], [%[ctr]] \n" \ - " \n" \ + " bnz 1b \n" \ : [val] "=&r" (val), \ [orig] "=&r" (orig) \ : [ctr] "r" (&v->counter), \ -- GitLab From 36fadeb87be802a609f03564e950a9f50b9e3855 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 29 Aug 2018 11:05:42 -0700 Subject: [PATCH 0741/2269] md/raid5-cache: disable reshape completely [ Upstream commit e254de6bcf3f5b6e78a92ac95fb91acef8adfe1a ] We don't support reshape yet if an array supports log device. Previously we determine the fact by checking ->log. However, ->log could be NULL after a log device is removed, but the array is still marked to support log device. Don't allow reshape in this case too. User can disable log device support by setting 'consistency_policy' to 'resync' then do reshape. Reported-by: Xiao Ni Tested-by: Xiao Ni Signed-off-by: Shaohua Li Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5-log.h | 5 +++++ drivers/md/raid5.c | 6 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/md/raid5-log.h b/drivers/md/raid5-log.h index 284578b0a349c..5c908c510c774 100644 --- a/drivers/md/raid5-log.h +++ b/drivers/md/raid5-log.h @@ -43,6 +43,11 @@ extern void ppl_write_stripe_run(struct r5conf *conf); extern void ppl_stripe_write_finished(struct stripe_head *sh); extern int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add); +static inline bool raid5_has_log(struct r5conf *conf) +{ + return test_bit(MD_HAS_JOURNAL, &conf->mddev->flags); +} + static inline bool raid5_has_ppl(struct r5conf *conf) { return test_bit(MD_HAS_PPL, &conf->mddev->flags); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 5018fb2352c27..dbf51b4c21b32 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -736,7 +736,7 @@ static bool stripe_can_batch(struct stripe_head *sh) { struct r5conf *conf = sh->raid_conf; - if (conf->log || raid5_has_ppl(conf)) + if (raid5_has_log(conf) || raid5_has_ppl(conf)) return false; return test_bit(STRIPE_BATCH_READY, &sh->state) && !test_bit(STRIPE_BITMAP_PENDING, &sh->state) && @@ -7717,7 +7717,7 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors) sector_t newsize; struct r5conf *conf = mddev->private; - if (conf->log || raid5_has_ppl(conf)) + if (raid5_has_log(conf) || raid5_has_ppl(conf)) return -EINVAL; sectors &= ~((sector_t)conf->chunk_sectors - 1); newsize = raid5_size(mddev, sectors, mddev->raid_disks); @@ -7768,7 +7768,7 @@ static int check_reshape(struct mddev *mddev) { struct r5conf *conf = mddev->private; - if (conf->log || raid5_has_ppl(conf)) + if (raid5_has_log(conf) || raid5_has_ppl(conf)) return -EINVAL; if (mddev->delta_disks == 0 && mddev->new_layout == mddev->layout && -- GitLab From da26e5729c04de59b41da0e245cae3742bfaaefb Mon Sep 17 00:00:00 2001 From: Xiao Ni Date: Thu, 30 Aug 2018 15:57:09 +0800 Subject: [PATCH 0742/2269] RAID10 BUG_ON in raise_barrier when force is true and conf->barrier is 0 [ Upstream commit 1d0ffd264204eba1861865560f1f7f7a92919384 ] In raid10 reshape_request it gets max_sectors in read_balance. If the underlayer disks have bad blocks, the max_sectors is less than last. It will call goto read_more many times. It calls raise_barrier(conf, sectors_done != 0) every time. In this condition sectors_done is not 0. So the value passed to the argument force of raise_barrier is true. In raise_barrier it checks conf->barrier when force is true. If force is true and conf->barrier is 0, it panic. In this case reshape_request submits bio to under layer disks. And in the callback function of the bio it calls lower_barrier. If the bio finishes before calling raise_barrier again, it can trigger the BUG_ON. Add one pair of raise_barrier/lower_barrier to fix this bug. Signed-off-by: Xiao Ni Suggested-by: Neil Brown Signed-off-by: Shaohua Li Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 262a0f0f8fd5c..927b60e9d3ca2 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4394,11 +4394,12 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, allow_barrier(conf); } + raise_barrier(conf, 0); read_more: /* Now schedule reads for blocks from sector_nr to last */ r10_bio = raid10_alloc_init_r10buf(conf); r10_bio->state = 0; - raise_barrier(conf, sectors_done != 0); + raise_barrier(conf, 1); atomic_set(&r10_bio->remaining, 0); r10_bio->mddev = mddev; r10_bio->sector = sector_nr; @@ -4494,6 +4495,8 @@ read_more: if (sector_nr <= last) goto read_more; + lower_barrier(conf); + /* Now that we have done the whole section we can * update reshape_progress */ -- GitLab From 82fc9c6b7b9a355b3e794386c7630a076d762072 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 31 Aug 2018 23:30:47 +0900 Subject: [PATCH 0743/2269] i2c: uniphier: issue STOP only for last message or I2C_M_STOP [ Upstream commit 38f5d8d8cbb2ffa2b54315118185332329ec891c ] This driver currently emits a STOP if the next message is not I2C_MD_RD. It should not do it because it disturbs the I2C_RDWR ioctl, where read/write transactions are combined without STOP between. Issue STOP only when the message is the last one _or_ flagged with I2C_M_STOP. Fixes: dd6fd4a32793 ("i2c: uniphier: add UniPhier FIFO-less I2C driver") Signed-off-by: Masahiro Yamada Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-uniphier.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-uniphier.c b/drivers/i2c/busses/i2c-uniphier.c index bb181b0882919..454f914ae66db 100644 --- a/drivers/i2c/busses/i2c-uniphier.c +++ b/drivers/i2c/busses/i2c-uniphier.c @@ -248,11 +248,8 @@ static int uniphier_i2c_master_xfer(struct i2c_adapter *adap, return ret; for (msg = msgs; msg < emsg; msg++) { - /* If next message is read, skip the stop condition */ - bool stop = !(msg + 1 < emsg && msg[1].flags & I2C_M_RD); - /* but, force it if I2C_M_STOP is set */ - if (msg->flags & I2C_M_STOP) - stop = true; + /* Emit STOP if it is the last message or I2C_M_STOP is set. */ + bool stop = (msg + 1 == emsg) || (msg->flags & I2C_M_STOP); ret = uniphier_i2c_master_xfer_one(adap, msg, stop); if (ret) -- GitLab From b08d15cc921f9bdd472dc4b264998e88cd7d0071 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 31 Aug 2018 23:30:48 +0900 Subject: [PATCH 0744/2269] i2c: uniphier-f: issue STOP only for last message or I2C_M_STOP [ Upstream commit 4c85609b08c4761eca0a40fd7beb06bc650f252d ] This driver currently emits a STOP if the next message is not I2C_MD_RD. It should not do it because it disturbs the I2C_RDWR ioctl, where read/write transactions are combined without STOP between. Issue STOP only when the message is the last one _or_ flagged with I2C_M_STOP. Fixes: 6a62974b667f ("i2c: uniphier_f: add UniPhier FIFO-builtin I2C driver") Signed-off-by: Masahiro Yamada Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-uniphier-f.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-uniphier-f.c b/drivers/i2c/busses/i2c-uniphier-f.c index 9918bdd816196..a403e8579b652 100644 --- a/drivers/i2c/busses/i2c-uniphier-f.c +++ b/drivers/i2c/busses/i2c-uniphier-f.c @@ -401,11 +401,8 @@ static int uniphier_fi2c_master_xfer(struct i2c_adapter *adap, return ret; for (msg = msgs; msg < emsg; msg++) { - /* If next message is read, skip the stop condition */ - bool stop = !(msg + 1 < emsg && msg[1].flags & I2C_M_RD); - /* but, force it if I2C_M_STOP is set */ - if (msg->flags & I2C_M_STOP) - stop = true; + /* Emit STOP if it is the last message or I2C_M_STOP is set. */ + bool stop = (msg + 1 == emsg) || (msg->flags & I2C_M_STOP); ret = uniphier_fi2c_master_xfer_one(adap, msg, stop); if (ret) -- GitLab From 8590e6fecb5e672e522daa840f01e33272c13b88 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Sat, 1 Sep 2018 20:11:05 +0800 Subject: [PATCH 0745/2269] net: cadence: Fix a sleep-in-atomic-context bug in macb_halt_tx() [ Upstream commit 16fe10cf92783ed9ceb182d6ea2b8adf5e8ec1b8 ] The kernel module may sleep with holding a spinlock. The function call paths (from bottom to top) in Linux-4.16 are: [FUNC] usleep_range drivers/net/ethernet/cadence/macb_main.c, 648: usleep_range in macb_halt_tx drivers/net/ethernet/cadence/macb_main.c, 730: macb_halt_tx in macb_tx_error_task drivers/net/ethernet/cadence/macb_main.c, 721: _raw_spin_lock_irqsave in macb_tx_error_task To fix this bug, usleep_range() is replaced with udelay(). This bug is found by my static analysis tool DSAC. Signed-off-by: Jia-Ju Bai Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cadence/macb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index dfef4ec167c18..c1787be6a2582 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -642,7 +642,7 @@ static int macb_halt_tx(struct macb *bp) if (!(status & MACB_BIT(TGO))) return 0; - usleep_range(10, 250); + udelay(250); } while (time_before(halt_time, timeout)); return -ETIMEDOUT; -- GitLab From 25cb8544342a23784f89a5e856c27509f30d99be Mon Sep 17 00:00:00 2001 From: Jon Kuhn Date: Mon, 9 Jul 2018 14:33:14 +0000 Subject: [PATCH 0746/2269] fs/cifs: don't translate SFM_SLASH (U+F026) to backslash [ Upstream commit c15e3f19a6d5c89b1209dc94b40e568177cb0921 ] When a Mac client saves an item containing a backslash to a file server the backslash is represented in the CIFS/SMB protocol as as U+F026. Before this change, listing a directory containing an item with a backslash in its name will return that item with the backslash represented with a true backslash character (U+005C) because convert_sfm_character mapped U+F026 to U+005C when interpretting the CIFS/SMB protocol response. However, attempting to open or stat the path using a true backslash will result in an error because convert_to_sfm_char does not map U+005C back to U+F026 causing the CIFS/SMB request to be made with the backslash represented as U+005C. This change simply prevents the U+F026 to U+005C conversion from happenning. This is analogous to how the code does not do any translation of UNI_SLASH (U+F000). Signed-off-by: Jon Kuhn Signed-off-by: Steve French Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifs_unicode.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index b380e0871372d..a2b2355e7f019 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -105,9 +105,6 @@ convert_sfm_char(const __u16 src_char, char *target) case SFM_LESSTHAN: *target = '<'; break; - case SFM_SLASH: - *target = '\\'; - break; case SFM_SPACE: *target = ' '; break; -- GitLab From 43a01409ef4c401c955d4a3f55523a6a77fdee5d Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 31 Aug 2018 01:04:13 +0200 Subject: [PATCH 0747/2269] mac80211: fix an off-by-one issue in A-MSDU max_subframe computation [ Upstream commit 66eb02d839e8495ae6b612e2d09ff599374b80e2 ] Initialize 'n' to 2 in order to take into account also the first packet in the estimation of max_subframe limit for a given A-MSDU since frag_tail pointer is NULL when ieee80211_amsdu_aggregate routine analyzes the second frame. Fixes: 6e0456b54545 ("mac80211: add A-MSDU tx support") Signed-off-by: Lorenzo Bianconi Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 8320282ec3b30..d8fddd88bf468 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3123,7 +3123,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, void *data; bool ret = false; unsigned int orig_len; - int n = 1, nfrags, pad = 0; + int n = 2, nfrags, pad = 0; u16 hdrlen; if (!ieee80211_hw_check(&local->hw, TX_AMSDU)) -- GitLab From 4fa55f6d29fd1bc3fa366751cc7ac90975b07ca2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 31 Aug 2018 11:10:55 +0300 Subject: [PATCH 0748/2269] cfg80211: fix a type issue in ieee80211_chandef_to_operating_class() [ Upstream commit 8442938c3a2177ba16043b3a935f2c78266ad399 ] The "chandef->center_freq1" variable is a u32 but "freq" is a u16 so we are truncating away the high bits. I noticed this bug because in commit 9cf0a0b4b64a ("cfg80211: Add support for 60GHz band channels 5 and 6") we made "freq <= 56160 + 2160 * 6" a valid requency when before it was only "freq <= 56160 + 2160 * 4" that was valid. It introduces a static checker warning: net/wireless/util.c:1571 ieee80211_chandef_to_operating_class() warn: always true condition '(freq <= 56160 + 2160 * 6) => (0-u16max <= 69120)' But really we probably shouldn't have been truncating the high bits away to begin with. Signed-off-by: Dan Carpenter Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/wireless/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index c1238d582fd19..ca3361a3e750e 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1449,7 +1449,7 @@ bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef, u8 *op_class) { u8 vht_opclass; - u16 freq = chandef->center_freq1; + u32 freq = chandef->center_freq1; if (freq >= 2412 && freq <= 2472) { if (chandef->width > NL80211_CHAN_WIDTH_40) -- GitLab From 37cdc7e35ae4b4499826216ee54499f572bc6eb4 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Fri, 31 Aug 2018 11:31:06 +0300 Subject: [PATCH 0749/2269] mac80211: fix a race between restart and CSA flows [ Upstream commit f3ffb6c3a28963657eb8b02a795d75f2ebbd5ef4 ] We hit a problem with iwlwifi that was caused by a bug in mac80211. A bug in iwlwifi caused the firwmare to crash in certain cases in channel switch. Because of that bug, drv_pre_channel_switch would fail and trigger the restart flow. Now we had the hw restart worker which runs on the system's workqueue and the csa_connection_drop_work worker that runs on mac80211's workqueue that can run together. This is obviously problematic since the restart work wants to reconfigure the connection, while the csa_connection_drop_work worker does the exact opposite: it tries to disconnect. Fix this by cancelling the csa_connection_drop_work worker in the restart worker. Note that this can sound racy: we could have: driver iface_work CSA_work restart_work +++++++++++++++++++++++++++++++++++++++++++++ | <--drv_cs ---| -CS FAILED--> | | | cancel_work(CSA) schedule | CSA work | | | Race between those 2 But this is not possible because we flush the workqueue in the restart worker before we cancel the CSA worker. That would be bullet proof if we could guarantee that we schedule the CSA worker only from the iface_work which runs on the workqueue (and not on the system's workqueue), but unfortunately we do have an instance in which we schedule the CSA work outside the context of the workqueue (ieee80211_chswitch_done). Note also that we should probably cancel other workers like beacon_connection_loss_work and possibly others for different types of interfaces, at the very least, IBSS should suffer from the exact same problem, but for now, do the minimum to fix the actual bug that was actually experienced and reproduced. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/main.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 9a3dd1467e24e..8a51f94ec1ce4 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -255,8 +255,27 @@ static void ieee80211_restart_work(struct work_struct *work) flush_work(&local->radar_detected_work); rtnl_lock(); - list_for_each_entry(sdata, &local->interfaces, list) + list_for_each_entry(sdata, &local->interfaces, list) { + /* + * XXX: there may be more work for other vif types and even + * for station mode: a good thing would be to run most of + * the iface type's dependent _stop (ieee80211_mg_stop, + * ieee80211_ibss_stop) etc... + * For now, fix only the specific bug that was seen: race + * between csa_connection_drop_work and us. + */ + if (sdata->vif.type == NL80211_IFTYPE_STATION) { + /* + * This worker is scheduled from the iface worker that + * runs on mac80211's workqueue, so we can't be + * scheduling this worker after the cancel right here. + * The exception is ieee80211_chswitch_done. + * Then we can have a race... + */ + cancel_work_sync(&sdata->u.mgd.csa_connection_drop_work); + } flush_delayed_work(&sdata->dec_tailroom_needed_wk); + } ieee80211_scan_cancel(local); /* make sure any new ROC will consider local->in_reconfig */ -- GitLab From 8788737af389eeb82dea693abf726bca61971bfc Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Fri, 31 Aug 2018 11:31:10 +0300 Subject: [PATCH 0750/2269] mac80211: Fix station bandwidth setting after channel switch [ Upstream commit 0007e94355fdb71a1cf5dba0754155cba08f0666 ] When performing a channel switch flow for a managed interface, the flow did not update the bandwidth of the AP station and the rate scale algorithm. In case of a channel width downgrade, this would result with the rate scale algorithm using a bandwidth that does not match the interface channel configuration. Fix this by updating the AP station bandwidth and rate scaling algorithm before the actual channel change in case of a bandwidth downgrade, or after the actual channel change in case of a bandwidth upgrade. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mlme.c | 53 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 052dbd4fa3664..a287b38916438 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -988,6 +988,10 @@ static void ieee80211_chswitch_work(struct work_struct *work) */ if (sdata->reserved_chanctx) { + struct ieee80211_supported_band *sband = NULL; + struct sta_info *mgd_sta = NULL; + enum ieee80211_sta_rx_bandwidth bw = IEEE80211_STA_RX_BW_20; + /* * with multi-vif csa driver may call ieee80211_csa_finish() * many times while waiting for other interfaces to use their @@ -996,6 +1000,48 @@ static void ieee80211_chswitch_work(struct work_struct *work) if (sdata->reserved_ready) goto out; + if (sdata->vif.bss_conf.chandef.width != + sdata->csa_chandef.width) { + /* + * For managed interface, we need to also update the AP + * station bandwidth and align the rate scale algorithm + * on the bandwidth change. Here we only consider the + * bandwidth of the new channel definition (as channel + * switch flow does not have the full HT/VHT/HE + * information), assuming that if additional changes are + * required they would be done as part of the processing + * of the next beacon from the AP. + */ + switch (sdata->csa_chandef.width) { + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + default: + bw = IEEE80211_STA_RX_BW_20; + break; + case NL80211_CHAN_WIDTH_40: + bw = IEEE80211_STA_RX_BW_40; + break; + case NL80211_CHAN_WIDTH_80: + bw = IEEE80211_STA_RX_BW_80; + break; + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_160: + bw = IEEE80211_STA_RX_BW_160; + break; + } + + mgd_sta = sta_info_get(sdata, ifmgd->bssid); + sband = + local->hw.wiphy->bands[sdata->csa_chandef.chan->band]; + } + + if (sdata->vif.bss_conf.chandef.width > + sdata->csa_chandef.width) { + mgd_sta->sta.bandwidth = bw; + rate_control_rate_update(local, sband, mgd_sta, + IEEE80211_RC_BW_CHANGED); + } + ret = ieee80211_vif_use_reserved_context(sdata); if (ret) { sdata_info(sdata, @@ -1006,6 +1052,13 @@ static void ieee80211_chswitch_work(struct work_struct *work) goto out; } + if (sdata->vif.bss_conf.chandef.width < + sdata->csa_chandef.width) { + mgd_sta->sta.bandwidth = bw; + rate_control_rate_update(local, sband, mgd_sta, + IEEE80211_RC_BW_CHANGED); + } + goto out; } -- GitLab From e132eb09fdd2b8a2db7af79e01b2d798604f63d9 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Fri, 31 Aug 2018 11:31:12 +0300 Subject: [PATCH 0751/2269] mac80211: don't Tx a deauth frame if the AP forbade Tx [ Upstream commit 6c18b27d6e5c6a7206364eae2b47bc8d8b2fa68f ] If the driver fails to properly prepare for the channel switch, mac80211 will disconnect. If the CSA IE had mode set to 1, it means that the clients are not allowed to send any Tx on the current channel, and that includes the deauthentication frame. Make sure that we don't send the deauthentication frame in this case. In iwlwifi, this caused a failure to flush queues since the firmware already closed the queues after having parsed the CSA IE. Then mac80211 would wait until the deauthentication frame would go out (drv_flush(drop=false)) and that would never happen. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mlme.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a287b38916438..328ac10084e46 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1280,6 +1280,16 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, cbss->beacon_interval)); return; drop_connection: + /* + * This is just so that the disconnect flow will know that + * we were trying to switch channel and failed. In case the + * mode is 1 (we are not allowed to Tx), we will know not to + * send a deauthentication frame. Those two fields will be + * reset when the disconnection worker runs. + */ + sdata->vif.csa_active = true; + sdata->csa_block_tx = csa_ie.mode; + ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work); mutex_unlock(&local->chanctx_mtx); mutex_unlock(&local->mtx); @@ -2450,6 +2460,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; + bool tx; sdata_lock(sdata); if (!ifmgd->associated) { @@ -2457,6 +2468,8 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) return; } + tx = !sdata->csa_block_tx; + /* AP is probably out of range (or not reachable for another reason) so * remove the bss struct for that AP. */ @@ -2464,7 +2477,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, - true, frame_buf); + tx, frame_buf); mutex_lock(&local->mtx); sdata->vif.csa_active = false; ifmgd->csa_waiting_bcn = false; @@ -2475,7 +2488,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) } mutex_unlock(&local->mtx); - ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, + ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), tx, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY); sdata_unlock(sdata); -- GitLab From 27c4ad84fd015d1a4e9dae5f949e19507afb75c1 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Fri, 31 Aug 2018 11:31:13 +0300 Subject: [PATCH 0752/2269] mac80211: shorten the IBSS debug messages [ Upstream commit c6e57b3896fc76299913b8cfd82d853bee8a2c84 ] When tracing is enabled, all the debug messages are recorded and must not exceed MAX_MSG_LEN (100) columns. Longer debug messages grant the user with: WARNING: CPU: 3 PID: 32642 at /tmp/wifi-core-20180806094828/src/iwlwifi-stack-dev/net/mac80211/./trace_msg.h:32 trace_event_raw_event_mac80211_msg_event+0xab/0xc0 [mac80211] Workqueue: phy1 ieee80211_iface_work [mac80211] RIP: 0010:trace_event_raw_event_mac80211_msg_event+0xab/0xc0 [mac80211] Call Trace: __sdata_dbg+0xbd/0x120 [mac80211] ieee80211_ibss_rx_queued_mgmt+0x15f/0x510 [mac80211] ieee80211_iface_work+0x21d/0x320 [mac80211] Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/ibss.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index e9c6aa3ed05b8..3d0d12fbd8dd1 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -947,8 +947,8 @@ static void ieee80211_rx_mgmt_deauth_ibss(struct ieee80211_sub_if_data *sdata, if (len < IEEE80211_DEAUTH_FRAME_LEN) return; - ibss_dbg(sdata, "RX DeAuth SA=%pM DA=%pM BSSID=%pM (reason: %d)\n", - mgmt->sa, mgmt->da, mgmt->bssid, reason); + ibss_dbg(sdata, "RX DeAuth SA=%pM DA=%pM\n", mgmt->sa, mgmt->da); + ibss_dbg(sdata, "\tBSSID=%pM (reason: %d)\n", mgmt->bssid, reason); sta_info_destroy_addr(sdata, mgmt->sa); } @@ -966,9 +966,9 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg); auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); - ibss_dbg(sdata, - "RX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=%d)\n", - mgmt->sa, mgmt->da, mgmt->bssid, auth_transaction); + ibss_dbg(sdata, "RX Auth SA=%pM DA=%pM\n", mgmt->sa, mgmt->da); + ibss_dbg(sdata, "\tBSSID=%pM (auth_transaction=%d)\n", + mgmt->bssid, auth_transaction); if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1) return; @@ -1175,10 +1175,10 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, rx_timestamp = drv_get_tsf(local, sdata); } - ibss_dbg(sdata, - "RX beacon SA=%pM BSSID=%pM TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n", + ibss_dbg(sdata, "RX beacon SA=%pM BSSID=%pM TSF=0x%llx\n", mgmt->sa, mgmt->bssid, - (unsigned long long)rx_timestamp, + (unsigned long long)rx_timestamp); + ibss_dbg(sdata, "\tBCN=0x%llx diff=%lld @%lu\n", (unsigned long long)beacon_timestamp, (unsigned long long)(rx_timestamp - beacon_timestamp), jiffies); @@ -1537,9 +1537,9 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, tx_last_beacon = drv_tx_last_beacon(local); - ibss_dbg(sdata, - "RX ProbeReq SA=%pM DA=%pM BSSID=%pM (tx_last_beacon=%d)\n", - mgmt->sa, mgmt->da, mgmt->bssid, tx_last_beacon); + ibss_dbg(sdata, "RX ProbeReq SA=%pM DA=%pM\n", mgmt->sa, mgmt->da); + ibss_dbg(sdata, "\tBSSID=%pM (tx_last_beacon=%d)\n", + mgmt->bssid, tx_last_beacon); if (!tx_last_beacon && is_multicast_ether_addr(mgmt->da)) return; -- GitLab From 5cbf015b971c8bf9bb9b6796dc92d78b51bc62b5 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Tue, 4 Sep 2018 15:45:48 -0700 Subject: [PATCH 0753/2269] tools/vm/slabinfo.c: fix sign-compare warning [ Upstream commit 904506562e0856f2535d876407d087c9459d345b ] Currently we get the following compiler warning: slabinfo.c:854:22: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] if (s->object_size < min_objsize) ^ due to the mismatch of signed/unsigned comparison. ->object_size and ->slab_size are never expected to be negative, so let's define them as unsigned int. [n-horiguchi@ah.jp.nec.com: convert everything - none of these can be negative] Link: http://lkml.kernel.org/r/20180826234947.GA9787@hori1.linux.bs1.fc.nec.co.jp Link: http://lkml.kernel.org/r/1535103134-20239-1-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi Reviewed-by: Andrew Morton Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/vm/slabinfo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c index b0b7ef6d0de1c..3fe0932543850 100644 --- a/tools/vm/slabinfo.c +++ b/tools/vm/slabinfo.c @@ -30,8 +30,8 @@ struct slabinfo { int alias; int refs; int aliases, align, cache_dma, cpu_slabs, destroy_by_rcu; - int hwcache_align, object_size, objs_per_slab; - int sanity_checks, slab_size, store_user, trace; + unsigned int hwcache_align, object_size, objs_per_slab; + unsigned int sanity_checks, slab_size, store_user, trace; int order, poison, reclaim_account, red_zone; unsigned long partial, objects, slabs, objects_partial, objects_total; unsigned long alloc_fastpath, alloc_slowpath; -- GitLab From ee0516c4a1fec5d52d2d7702d22787fca1ed8fc0 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Tue, 4 Sep 2018 15:45:51 -0700 Subject: [PATCH 0754/2269] tools/vm/page-types.c: fix "defined but not used" warning [ Upstream commit 7ab660f8baecfe26c1c267fa8e64d2073feae2bb ] debugfs_known_mountpoints[] is not used any more, so let's remove it. Link: http://lkml.kernel.org/r/1535102651-19418-1-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi Reviewed-by: Andrew Morton Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/vm/page-types.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index e92903fc71138..6d5bcbaf61935 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -155,12 +155,6 @@ static const char * const page_flag_names[] = { }; -static const char * const debugfs_known_mountpoints[] = { - "/sys/kernel/debug", - "/debug", - 0, -}; - /* * data structures */ -- GitLab From dcc89aaf5a8dbe431daa026ad41d87a139fd5542 Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Fri, 5 Oct 2018 15:52:19 -0700 Subject: [PATCH 0755/2269] mm: madvise(MADV_DODUMP): allow hugetlbfs pages commit d41aa5252394c065d1f04d1ceea885b70d00c9c6 upstream. Reproducer, assuming 2M of hugetlbfs available: Hugetlbfs mounted, size=2M and option user=testuser # mount | grep ^hugetlbfs hugetlbfs on /dev/hugepages type hugetlbfs (rw,pagesize=2M,user=dan) # sysctl vm.nr_hugepages=1 vm.nr_hugepages = 1 # grep Huge /proc/meminfo AnonHugePages: 0 kB ShmemHugePages: 0 kB HugePages_Total: 1 HugePages_Free: 1 HugePages_Rsvd: 0 HugePages_Surp: 0 Hugepagesize: 2048 kB Hugetlb: 2048 kB Code: #include #include #define SIZE 2*1024*1024 int main() { void *ptr; ptr = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_HUGETLB | MAP_ANONYMOUS, -1, 0); madvise(ptr, SIZE, MADV_DONTDUMP); madvise(ptr, SIZE, MADV_DODUMP); } Compile and strace: mmap(NULL, 2097152, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0) = 0x7ff7c9200000 madvise(0x7ff7c9200000, 2097152, MADV_DONTDUMP) = 0 madvise(0x7ff7c9200000, 2097152, MADV_DODUMP) = -1 EINVAL (Invalid argument) hugetlbfs pages have VM_DONTEXPAND in the VmFlags driver pages based on author testing with analysis from Florian Weimer[1]. The inclusion of VM_DONTEXPAND into the VM_SPECIAL defination was a consequence of the large useage of VM_DONTEXPAND in device drivers. A consequence of [2] is that VM_DONTEXPAND marked pages are unable to be marked DODUMP. A user could quite legitimately madvise(MADV_DONTDUMP) their hugetlbfs memory for a while and later request that madvise(MADV_DODUMP) on the same memory. We correct this omission by allowing madvice(MADV_DODUMP) on hugetlbfs pages. [1] https://stackoverflow.com/questions/52548260/madvisedodump-on-the-same-ptr-size-as-a-successful-madvisedontdump-fails-wit [2] commit 0103bd16fb90 ("mm: prepare VM_DONTDUMP for using in drivers") Link: http://lkml.kernel.org/r/20180930054629.29150-1-daniel@linux.ibm.com Link: https://lists.launchpad.net/maria-discuss/msg05245.html Fixes: 0103bd16fb90 ("mm: prepare VM_DONTDUMP for using in drivers") Reported-by: Kenneth Penza Signed-off-by: Daniel Black Reviewed-by: Mike Kravetz Cc: Konstantin Khlebnikov Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- mm/madvise.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/madvise.c b/mm/madvise.c index 751e97aa22106..576b753be4285 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -96,7 +96,7 @@ static long madvise_behavior(struct vm_area_struct *vma, new_flags |= VM_DONTDUMP; break; case MADV_DODUMP: - if (new_flags & VM_SPECIAL) { + if (!is_vm_hugetlb_page(vma) && new_flags & VM_SPECIAL) { error = -EINVAL; goto out; } -- GitLab From 10fdfea70d4667abf3724c31443e5d5922fecebd Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 5 Oct 2018 18:17:59 +0200 Subject: [PATCH 0756/2269] bpf: 32-bit RSH verification must truncate input before the ALU op commit b799207e1e1816b09e7a5920fbb2d5fcf6edd681 upstream. When I wrote commit 468f6eafa6c4 ("bpf: fix 32-bit ALU op verification"), I assumed that, in order to emulate 64-bit arithmetic with 32-bit logic, it is sufficient to just truncate the output to 32 bits; and so I just moved the register size coercion that used to be at the start of the function to the end of the function. That assumption is true for almost every op, but not for 32-bit right shifts, because those can propagate information towards the least significant bit. Fix it by always truncating inputs for 32-bit ops to 32 bits. Also get rid of the coerce_reg_to_size() after the ALU op, since that has no effect. Fixes: 468f6eafa6c4 ("bpf: fix 32-bit ALU op verification") Acked-by: Daniel Borkmann Signed-off-by: Jann Horn Signed-off-by: Daniel Borkmann Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 450e2cd31ed60..a0ffc62e76774 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2076,6 +2076,15 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, u64 umin_val, umax_val; u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; + if (insn_bitness == 32) { + /* Relevant for 32-bit RSH: Information can propagate towards + * LSB, so it isn't sufficient to only truncate the output to + * 32 bits. + */ + coerce_reg_to_size(dst_reg, 4); + coerce_reg_to_size(&src_reg, 4); + } + smin_val = src_reg.smin_value; smax_val = src_reg.smax_value; umin_val = src_reg.umin_value; @@ -2295,7 +2304,6 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, if (BPF_CLASS(insn->code) != BPF_ALU64) { /* 32-bit ALU ops are (32,32)->32 */ coerce_reg_to_size(dst_reg, 4); - coerce_reg_to_size(&src_reg, 4); } __reg_deduce_bounds(dst_reg); -- GitLab From b969656b46626a674232c0eadf92a394b89df07c Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Wed, 22 Aug 2018 10:27:17 +0200 Subject: [PATCH 0757/2269] netfilter: xt_cluster: add dependency on conntrack module [ Upstream commit c1dc2912059901f97345d9e10c96b841215fdc0f ] The cluster match requires conntrack for matching packets. If the netns does not have conntrack hooks registered, the match does not work at all. Implicitly load the conntrack hook for the family, exactly as many other extensions do. This ensures that the match works even if the hooks have not been registered by other means. Signed-off-by: Martin Willi Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/netfilter/xt_cluster.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c index 57ef175dfbfaa..504d5f730f4e3 100644 --- a/net/netfilter/xt_cluster.c +++ b/net/netfilter/xt_cluster.c @@ -133,6 +133,7 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par) static int xt_cluster_mt_checkentry(const struct xt_mtchk_param *par) { struct xt_cluster_match_info *info = par->matchinfo; + int ret; if (info->total_nodes > XT_CLUSTER_NODES_MAX) { pr_info("you have exceeded the maximum " @@ -145,7 +146,17 @@ static int xt_cluster_mt_checkentry(const struct xt_mtchk_param *par) "higher than the total number of nodes\n"); return -EDOM; } - return 0; + + ret = nf_ct_netns_get(par->net, par->family); + if (ret < 0) + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); + return ret; +} + +static void xt_cluster_mt_destroy(const struct xt_mtdtor_param *par) +{ + nf_ct_netns_put(par->net, par->family); } static struct xt_match xt_cluster_match __read_mostly = { @@ -154,6 +165,7 @@ static struct xt_match xt_cluster_match __read_mostly = { .match = xt_cluster_mt, .checkentry = xt_cluster_mt_checkentry, .matchsize = sizeof(struct xt_cluster_match_info), + .destroy = xt_cluster_mt_destroy, .me = THIS_MODULE, }; -- GitLab From b6cc0ba2cbf4b4e04f9531f256496a07e82b1177 Mon Sep 17 00:00:00 2001 From: Sean O'Brien Date: Mon, 27 Aug 2018 13:02:15 -0700 Subject: [PATCH 0758/2269] HID: add support for Apple Magic Keyboards [ Upstream commit ee345492437043a79db058a3d4f029ebcb52089a ] USB device Vendor 05ac (Apple) Device 026c (Magic Keyboard with Numeric Keypad) Bluetooth devices Vendor 004c (Apple) Device 0267 (Magic Keyboard) Device 026c (Magic Keyboard with Numeric Keypad) Support already exists for the Magic Keyboard over USB connection. Add support for the Magic Keyboard over Bluetooth connection, and for the Magic Keyboard with Numeric Keypad over Bluetooth and USB connection. Signed-off-by: Sean O'Brien Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-apple.c | 9 ++++++++- drivers/hid/hid-ids.h | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index 25b7bd56ae115..1cb41992aaa1f 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -335,7 +335,8 @@ static int apple_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { - if (usage->hid == (HID_UP_CUSTOM | 0x0003)) { + if (usage->hid == (HID_UP_CUSTOM | 0x0003) || + usage->hid == (HID_UP_MSVENDOR | 0x0003)) { /* The fn key on Apple USB keyboards */ set_bit(EV_REP, hi->input->evbit); hid_map_usage_clear(hi, usage, bit, max, EV_KEY, KEY_FN); @@ -472,6 +473,12 @@ static const struct hid_device_id apple_devices[] = { .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_ANSI), .driver_data = APPLE_HAS_FN }, + { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_ANSI), + .driver_data = APPLE_HAS_FN }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_ANSI), + .driver_data = APPLE_HAS_FN }, + { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_ANSI), + .driver_data = APPLE_HAS_FN }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_ANSI), .driver_data = APPLE_HAS_FN }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_ISO), diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 81ee1d026648b..431e88960af26 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -85,6 +85,7 @@ #define USB_DEVICE_ID_ANTON_TOUCH_PAD 0x3101 #define USB_VENDOR_ID_APPLE 0x05ac +#define BT_VENDOR_ID_APPLE 0x004c #define USB_DEVICE_ID_APPLE_MIGHTYMOUSE 0x0304 #define USB_DEVICE_ID_APPLE_MAGICMOUSE 0x030d #define USB_DEVICE_ID_APPLE_MAGICTRACKPAD 0x030e @@ -154,6 +155,7 @@ #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ISO 0x0256 #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_JIS 0x0257 #define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_ANSI 0x0267 +#define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_ANSI 0x026c #define USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI 0x0290 #define USB_DEVICE_ID_APPLE_WELLSPRING8_ISO 0x0291 #define USB_DEVICE_ID_APPLE_WELLSPRING8_JIS 0x0292 -- GitLab From 81c823c22355df0f4133637b6abd83621c7555a7 Mon Sep 17 00:00:00 2001 From: Anton Vasilyev Date: Tue, 7 Aug 2018 14:44:48 +0300 Subject: [PATCH 0759/2269] usb: gadget: fotg210-udc: Fix memory leak of fotg210->ep[i] [ Upstream commit c37bd52836296ecc9a0fc8060b819089aebdbcde ] There is no deallocation of fotg210->ep[i] elements, allocated at fotg210_udc_probe. The patch adds deallocation of fotg210->ep array elements and simplifies error path of fotg210_udc_probe(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Anton Vasilyev Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/fotg210-udc.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/udc/fotg210-udc.c b/drivers/usb/gadget/udc/fotg210-udc.c index 78d0204e3e20a..d17d7052605ba 100644 --- a/drivers/usb/gadget/udc/fotg210-udc.c +++ b/drivers/usb/gadget/udc/fotg210-udc.c @@ -1066,12 +1066,15 @@ static const struct usb_gadget_ops fotg210_gadget_ops = { static int fotg210_udc_remove(struct platform_device *pdev) { struct fotg210_udc *fotg210 = platform_get_drvdata(pdev); + int i; usb_del_gadget_udc(&fotg210->gadget); iounmap(fotg210->reg); free_irq(platform_get_irq(pdev, 0), fotg210); fotg210_ep_free_request(&fotg210->ep[0]->ep, fotg210->ep0_req); + for (i = 0; i < FOTG210_MAX_NUM_EP; i++) + kfree(fotg210->ep[i]); kfree(fotg210); return 0; @@ -1102,7 +1105,7 @@ static int fotg210_udc_probe(struct platform_device *pdev) /* initialize udc */ fotg210 = kzalloc(sizeof(struct fotg210_udc), GFP_KERNEL); if (fotg210 == NULL) - goto err_alloc; + goto err; for (i = 0; i < FOTG210_MAX_NUM_EP; i++) { _ep[i] = kzalloc(sizeof(struct fotg210_ep), GFP_KERNEL); @@ -1114,7 +1117,7 @@ static int fotg210_udc_probe(struct platform_device *pdev) fotg210->reg = ioremap(res->start, resource_size(res)); if (fotg210->reg == NULL) { pr_err("ioremap error.\n"); - goto err_map; + goto err_alloc; } spin_lock_init(&fotg210->lock); @@ -1162,7 +1165,7 @@ static int fotg210_udc_probe(struct platform_device *pdev) fotg210->ep0_req = fotg210_ep_alloc_request(&fotg210->ep[0]->ep, GFP_KERNEL); if (fotg210->ep0_req == NULL) - goto err_req; + goto err_map; fotg210_init(fotg210); @@ -1190,12 +1193,14 @@ err_req: fotg210_ep_free_request(&fotg210->ep[0]->ep, fotg210->ep0_req); err_map: - if (fotg210->reg) - iounmap(fotg210->reg); + iounmap(fotg210->reg); err_alloc: + for (i = 0; i < FOTG210_MAX_NUM_EP; i++) + kfree(fotg210->ep[i]); kfree(fotg210); +err: return ret; } -- GitLab From b6515e0f915b747593a863b60486e04d8af1e520 Mon Sep 17 00:00:00 2001 From: Harry Mallon Date: Tue, 28 Aug 2018 22:51:29 +0100 Subject: [PATCH 0760/2269] HID: hid-saitek: Add device ID for RAT 7 Contagion [ Upstream commit 43822c98f2ebb2cbd5e467ab72bbcdae7f0caa22 ] Signed-off-by: Harry Mallon Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-saitek.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 431e88960af26..3fc8c0d675928 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -926,6 +926,7 @@ #define USB_DEVICE_ID_SAITEK_RUMBLEPAD 0xff17 #define USB_DEVICE_ID_SAITEK_PS1000 0x0621 #define USB_DEVICE_ID_SAITEK_RAT7_OLD 0x0ccb +#define USB_DEVICE_ID_SAITEK_RAT7_CONTAGION 0x0ccd #define USB_DEVICE_ID_SAITEK_RAT7 0x0cd7 #define USB_DEVICE_ID_SAITEK_RAT9 0x0cfa #define USB_DEVICE_ID_SAITEK_MMO7 0x0cd0 diff --git a/drivers/hid/hid-saitek.c b/drivers/hid/hid-saitek.c index 39e642686ff04..683861f324e3c 100644 --- a/drivers/hid/hid-saitek.c +++ b/drivers/hid/hid-saitek.c @@ -183,6 +183,8 @@ static const struct hid_device_id saitek_devices[] = { .driver_data = SAITEK_RELEASE_MODE_RAT7 }, { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_RAT7), .driver_data = SAITEK_RELEASE_MODE_RAT7 }, + { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_RAT7_CONTAGION), + .driver_data = SAITEK_RELEASE_MODE_RAT7 }, { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_RAT9), .driver_data = SAITEK_RELEASE_MODE_RAT7 }, { HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_RAT9), -- GitLab From dd44c35cc16c4307409c180c35e29c64809289ac Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Mon, 27 Aug 2018 14:45:15 -0500 Subject: [PATCH 0761/2269] scsi: iscsi: target: Set conn->sess to NULL when iscsi_login_set_conn_values fails [ Upstream commit 7915919bb94e12460c58e27c708472e6f85f6699 ] Fixes a use-after-free reported by KASAN when later iscsi_target_login_sess_out gets called and it tries to access conn->sess->se_sess: Disabling lock debugging due to kernel taint iSCSI Login timeout on Network Portal [::]:3260 iSCSI Login negotiation failed. ================================================================== BUG: KASAN: use-after-free in iscsi_target_login_sess_out.cold.12+0x58/0xff [iscsi_target_mod] Read of size 8 at addr ffff880109d070c8 by task iscsi_np/980 CPU: 1 PID: 980 Comm: iscsi_np Tainted: G O 4.17.8kasan.sess.connops+ #4 Hardware name: To be filled by O.E.M. To be filled by O.E.M./Aptio CRB, BIOS 5.6.5 05/19/2014 Call Trace: dump_stack+0x71/0xac print_address_description+0x65/0x22e ? iscsi_target_login_sess_out.cold.12+0x58/0xff [iscsi_target_mod] kasan_report.cold.6+0x241/0x2fd iscsi_target_login_sess_out.cold.12+0x58/0xff [iscsi_target_mod] iscsi_target_login_thread+0x1086/0x1710 [iscsi_target_mod] ? __sched_text_start+0x8/0x8 ? iscsi_target_login_sess_out+0x250/0x250 [iscsi_target_mod] ? __kthread_parkme+0xcc/0x100 ? parse_args.cold.14+0xd3/0xd3 ? iscsi_target_login_sess_out+0x250/0x250 [iscsi_target_mod] kthread+0x1a0/0x1c0 ? kthread_bind+0x30/0x30 ret_from_fork+0x35/0x40 Allocated by task 980: kasan_kmalloc+0xbf/0xe0 kmem_cache_alloc_trace+0x112/0x210 iscsi_target_login_thread+0x816/0x1710 [iscsi_target_mod] kthread+0x1a0/0x1c0 ret_from_fork+0x35/0x40 Freed by task 980: __kasan_slab_free+0x125/0x170 kfree+0x90/0x1d0 iscsi_target_login_thread+0x1577/0x1710 [iscsi_target_mod] kthread+0x1a0/0x1c0 ret_from_fork+0x35/0x40 The buggy address belongs to the object at ffff880109d06f00 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 456 bytes inside of 512-byte region [ffff880109d06f00, ffff880109d07100) The buggy address belongs to the page: page:ffffea0004274180 count:1 mapcount:0 mapping:0000000000000000 index:0x0 compound_mapcount: 0 flags: 0x17fffc000008100(slab|head) raw: 017fffc000008100 0000000000000000 0000000000000000 00000001000c000c raw: dead000000000100 dead000000000200 ffff88011b002e00 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff880109d06f80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff880109d07000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff880109d07080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff880109d07100: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff880109d07180: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ================================================================== Signed-off-by: Vincent Pelletier [rebased against idr/ida changes and to handle ret review comments from Matthew] Signed-off-by: Mike Christie Cc: Matthew Wilcox Reviewed-by: Matthew Wilcox Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target_login.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 98e27da34f3cb..27893d90c4efa 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -310,11 +310,9 @@ static int iscsi_login_zero_tsih_s1( return -ENOMEM; } - ret = iscsi_login_set_conn_values(sess, conn, pdu->cid); - if (unlikely(ret)) { - kfree(sess); - return ret; - } + if (iscsi_login_set_conn_values(sess, conn, pdu->cid)) + goto free_sess; + sess->init_task_tag = pdu->itt; memcpy(&sess->isid, pdu->isid, 6); sess->exp_cmd_sn = be32_to_cpu(pdu->cmdsn); -- GitLab From 8b98b7eeb45d92a3037dc4258b98b344c73f31b9 Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Wed, 29 Aug 2018 23:55:53 -0700 Subject: [PATCH 0762/2269] scsi: qedi: Add the CRC size within iSCSI NVM image [ Upstream commit c77a2fa3ff8f73d1a485e67e6f81c64823739d59 ] The QED driver commit, 1ac4329a1cff ("qed: Add configuration information to register dump and debug data"), removes the CRC length validation causing nvm_get_image failure while loading qedi driver: [qed_mcp_get_nvm_image:2700(host_10-0)]Image [0] is too big - 00006008 bytes where only 00006004 are available [qedi_get_boot_info:2253]:10: Could not get NVM image. ret = -12 Hence add and adjust the CRC size to iSCSI NVM image to read boot info at qedi load time. Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qedi/qedi.h | 7 ++++++- drivers/scsi/qedi/qedi_main.c | 28 +++++++++++++++------------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/scsi/qedi/qedi.h b/drivers/scsi/qedi/qedi.h index b8b22ce60ecc1..95141066c3fa8 100644 --- a/drivers/scsi/qedi/qedi.h +++ b/drivers/scsi/qedi/qedi.h @@ -77,6 +77,11 @@ enum qedi_nvm_tgts { QEDI_NVM_TGT_SEC, }; +struct qedi_nvm_iscsi_image { + struct nvm_iscsi_cfg iscsi_cfg; + u32 crc; +}; + struct qedi_uio_ctrl { /* meta data */ u32 uio_hsi_version; @@ -294,7 +299,7 @@ struct qedi_ctx { void *bdq_pbl_list; dma_addr_t bdq_pbl_list_dma; u8 bdq_pbl_list_num_entries; - struct nvm_iscsi_cfg *iscsi_cfg; + struct qedi_nvm_iscsi_image *iscsi_image; dma_addr_t nvm_buf_dma; void __iomem *bdq_primary_prod; void __iomem *bdq_secondary_prod; diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index e7daadc089fcb..24b945b555ba3 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -1147,23 +1147,26 @@ exit_setup_int: static void qedi_free_nvm_iscsi_cfg(struct qedi_ctx *qedi) { - if (qedi->iscsi_cfg) + if (qedi->iscsi_image) dma_free_coherent(&qedi->pdev->dev, - sizeof(struct nvm_iscsi_cfg), - qedi->iscsi_cfg, qedi->nvm_buf_dma); + sizeof(struct qedi_nvm_iscsi_image), + qedi->iscsi_image, qedi->nvm_buf_dma); } static int qedi_alloc_nvm_iscsi_cfg(struct qedi_ctx *qedi) { - qedi->iscsi_cfg = dma_zalloc_coherent(&qedi->pdev->dev, - sizeof(struct nvm_iscsi_cfg), - &qedi->nvm_buf_dma, GFP_KERNEL); - if (!qedi->iscsi_cfg) { + struct qedi_nvm_iscsi_image nvm_image; + + qedi->iscsi_image = dma_zalloc_coherent(&qedi->pdev->dev, + sizeof(nvm_image), + &qedi->nvm_buf_dma, + GFP_KERNEL); + if (!qedi->iscsi_image) { QEDI_ERR(&qedi->dbg_ctx, "Could not allocate NVM BUF.\n"); return -ENOMEM; } QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO, - "NVM BUF addr=0x%p dma=0x%llx.\n", qedi->iscsi_cfg, + "NVM BUF addr=0x%p dma=0x%llx.\n", qedi->iscsi_image, qedi->nvm_buf_dma); return 0; @@ -1716,7 +1719,7 @@ qedi_get_nvram_block(struct qedi_ctx *qedi) struct nvm_iscsi_block *block; pf = qedi->dev_info.common.abs_pf_id; - block = &qedi->iscsi_cfg->block[0]; + block = &qedi->iscsi_image->iscsi_cfg.block[0]; for (i = 0; i < NUM_OF_ISCSI_PF_SUPPORTED; i++, block++) { flags = ((block->id) & NVM_ISCSI_CFG_BLK_CTRL_FLAG_MASK) >> NVM_ISCSI_CFG_BLK_CTRL_FLAG_OFFSET; @@ -2008,15 +2011,14 @@ static void qedi_boot_release(void *data) static int qedi_get_boot_info(struct qedi_ctx *qedi) { int ret = 1; - u16 len; - - len = sizeof(struct nvm_iscsi_cfg); + struct qedi_nvm_iscsi_image nvm_image; QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO, "Get NVM iSCSI CFG image\n"); ret = qedi_ops->common->nvm_get_image(qedi->cdev, QED_NVM_IMAGE_ISCSI_CFG, - (char *)qedi->iscsi_cfg, len); + (char *)qedi->iscsi_image, + sizeof(nvm_image)); if (ret) QEDI_ERR(&qedi->dbg_ctx, "Could not get NVM image. ret = %d\n", ret); -- GitLab From 9d7bc329c123bbdc7db20a02e20e788a91c7c775 Mon Sep 17 00:00:00 2001 From: Hisao Tanabe Date: Sat, 25 Aug 2018 00:45:56 +0900 Subject: [PATCH 0763/2269] perf evsel: Fix potential null pointer dereference in perf_evsel__new_idx() [ Upstream commit fd8d2702791a970c751f8b526a17d8e725a05b46 ] If evsel is NULL, we should return NULL to avoid a NULL pointer dereference a bit later in the code. Signed-off-by: Hisao Tanabe Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Wang Nan Fixes: 03e0a7df3efd ("perf tools: Introduce bpf-output event") LPU-Reference: 20180824154556.23428-1-xtanabe@gmail.com Link: https://lkml.kernel.org/n/tip-e5plzjhx6595a5yjaf22jss3@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/evsel.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2227ee92d8e21..44c2f62b47a31 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -259,8 +259,9 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) { struct perf_evsel *evsel = zalloc(perf_evsel__object.size); - if (evsel != NULL) - perf_evsel__init(evsel, attr, idx); + if (!evsel) + return NULL; + perf_evsel__init(evsel, attr, idx); if (perf_evsel__is_bpf_output(evsel)) { evsel->attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | -- GitLab From 4095fd29fee754299221613d0646a93636c48df0 Mon Sep 17 00:00:00 2001 From: Chris Phlipot Date: Tue, 28 Aug 2018 23:19:54 -0700 Subject: [PATCH 0764/2269] perf util: Fix bad memory access in trace info. [ Upstream commit a72f64261359b7451f8478f2a2bf357b4e6c757f ] In the write to the output_fd in the error condition of record_saved_cmdline(), we are writing 8 bytes from a memory location on the stack that contains a primitive that is only 4 bytes in size. Change the primitive to 8 bytes in size to match the size of the write in order to avoid reading unknown memory from the stack. Signed-off-by: Chris Phlipot Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180829061954.18871-1-cphlipot0@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/trace-event-info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index e7d60d05596d2..8f3b7ef221f2f 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -379,7 +379,7 @@ out: static int record_saved_cmdline(void) { - unsigned int size; + unsigned long long size; char *path; struct stat st; int ret, err = 0; -- GitLab From c00f01c402115960bb9592ed842b42964cb3c069 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Tue, 28 Aug 2018 14:38:48 +0530 Subject: [PATCH 0765/2269] perf probe powerpc: Ignore SyS symbols irrespective of endianness [ Upstream commit fa694160cca6dbba17c57dc7efec5f93feaf8795 ] This makes sure that the SyS symbols are ignored for any powerpc system, not just the big endian ones. Reported-by: Naveen N. Rao Signed-off-by: Sandipan Das Reviewed-by: Kamalesh Babulal Acked-by: Naveen N. Rao Cc: Jiri Olsa Cc: Ravi Bangoria Fixes: fb6d59423115 ("perf probe ppc: Use the right prefix when ignoring SyS symbols on ppc") Link: http://lkml.kernel.org/r/20180828090848.1914-1-sandipan@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/arch/powerpc/util/sym-handling.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index 20e7d74d86cd1..10a44e946f773 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -22,15 +22,16 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) #endif -#if !defined(_CALL_ELF) || _CALL_ELF != 2 int arch__choose_best_symbol(struct symbol *syma, struct symbol *symb __maybe_unused) { char *sym = syma->name; +#if !defined(_CALL_ELF) || _CALL_ELF != 2 /* Skip over any initial dot */ if (*sym == '.') sym++; +#endif /* Avoid "SyS" kernel syscall aliases */ if (strlen(sym) >= 3 && !strncmp(sym, "SyS", 3)) @@ -41,6 +42,7 @@ int arch__choose_best_symbol(struct symbol *syma, return SYMBOL_A; } +#if !defined(_CALL_ELF) || _CALL_ELF != 2 /* Allow matching against dot variants */ int arch__compare_symbol_names(const char *namea, const char *nameb) { -- GitLab From 1173678a4f4ad4f7fd5f2a88f522ef98418c9e87 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 26 Aug 2018 02:35:44 +0900 Subject: [PATCH 0766/2269] netfilter: nf_tables: release chain in flushing set [ Upstream commit 7acfda539c0b9636a58bfee56abfb3aeee806d96 ] When element of verdict map is deleted, the delete routine should release chain. however, flush element of verdict map routine doesn't release chain. test commands: %nft add table ip filter %nft add chain ip filter c1 %nft add map ip filter map1 { type ipv4_addr : verdict \; } %nft add element ip filter map1 { 1 : jump c1 } %nft flush map ip filter map1 %nft flush ruleset splat looks like: [ 4895.170899] kernel BUG at net/netfilter/nf_tables_api.c:1415! [ 4895.178114] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI [ 4895.178880] CPU: 0 PID: 1670 Comm: nft Not tainted 4.18.0+ #55 [ 4895.178880] RIP: 0010:nf_tables_chain_destroy.isra.28+0x39/0x220 [nf_tables] [ 4895.178880] Code: fc ff df 53 48 89 fb 48 83 c7 50 48 89 fa 48 c1 ea 03 0f b6 04 02 84 c0 74 09 3c 03 7f 05 e8 3e 4c 25 e1 8b 43 50 85 c0 74 02 <0f> 0b 48 89 da 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 80 3c 02 [ 4895.228342] RSP: 0018:ffff88010b98f4c0 EFLAGS: 00010202 [ 4895.234841] RAX: 0000000000000001 RBX: ffff8801131c6968 RCX: ffff8801146585b0 [ 4895.234841] RDX: 1ffff10022638d37 RSI: ffff8801191a9348 RDI: ffff8801131c69b8 [ 4895.234841] RBP: ffff8801146585a8 R08: 1ffff1002323526a R09: 0000000000000000 [ 4895.234841] R10: 0000000000000000 R11: 0000000000000000 R12: dead000000000200 [ 4895.234841] R13: dead000000000100 R14: ffffffffa3638af8 R15: dffffc0000000000 [ 4895.234841] FS: 00007f6d188e6700(0000) GS:ffff88011b600000(0000) knlGS:0000000000000000 [ 4895.234841] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 4895.234841] CR2: 00007ffe72b8df88 CR3: 000000010e2d4000 CR4: 00000000001006f0 [ 4895.234841] Call Trace: [ 4895.234841] nf_tables_commit+0x2704/0x2c70 [nf_tables] [ 4895.234841] ? nfnetlink_rcv_batch+0xa4f/0x11b0 [nfnetlink] [ 4895.234841] ? nf_tables_setelem_notify.constprop.48+0x1a0/0x1a0 [nf_tables] [ 4895.323824] ? __lock_is_held+0x9d/0x130 [ 4895.323824] ? kasan_unpoison_shadow+0x30/0x40 [ 4895.333299] ? kasan_kmalloc+0xa9/0xc0 [ 4895.333299] ? kmem_cache_alloc_trace+0x2c0/0x310 [ 4895.333299] ? nfnetlink_rcv_batch+0xa4f/0x11b0 [nfnetlink] [ 4895.333299] nfnetlink_rcv_batch+0xdb9/0x11b0 [nfnetlink] [ 4895.333299] ? debug_show_all_locks+0x290/0x290 [ 4895.333299] ? nfnetlink_net_init+0x150/0x150 [nfnetlink] [ 4895.333299] ? sched_clock_cpu+0xe5/0x170 [ 4895.333299] ? sched_clock_local+0xff/0x130 [ 4895.333299] ? sched_clock_cpu+0xe5/0x170 [ 4895.333299] ? find_held_lock+0x39/0x1b0 [ 4895.333299] ? sched_clock_local+0xff/0x130 [ 4895.333299] ? memset+0x1f/0x40 [ 4895.333299] ? nla_parse+0x33/0x260 [ 4895.333299] ? ns_capable_common+0x6e/0x110 [ 4895.333299] nfnetlink_rcv+0x2c0/0x310 [nfnetlink] [ ... ] Fixes: 591054469b3e ("netfilter: nf_tables: revisit chain/object refcounting from elements") Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 742aacb317e50..3ae365f92bff5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4250,6 +4250,7 @@ static int nft_flush_set(const struct nft_ctx *ctx, } set->ndeact++; + nft_set_elem_deactivate(ctx->net, set, elem); nft_trans_elem_set(trans) = set; nft_trans_elem(trans) = *elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); -- GitLab From 60ea8815d6e82dd7b35944f0b563c70227304118 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Sat, 25 Aug 2018 02:00:48 -0700 Subject: [PATCH 0767/2269] Revert "iio: temperature: maxim_thermocouple: add MAX31856 part" [ Upstream commit 65099ea85e885c3ea1272eca8774b771419d8ce8 ] This reverts commit 535fba29b3e1afef4ba201b3c69a6992583ec0bd. Seems the submitter (er me, hang head in shame) didn't look at the datasheet enough to see that the registers are quite different. This needs to be reverted because a) would never work b) to open it be added to a Maxim RTDs (Resistance Temperature Detectors) under development by author Signed-off-by: Matt Ranostay Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/iio/temperature/maxim_thermocouple.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iio/temperature/maxim_thermocouple.c b/drivers/iio/temperature/maxim_thermocouple.c index d70e2e53d6a78..557214202eff3 100644 --- a/drivers/iio/temperature/maxim_thermocouple.c +++ b/drivers/iio/temperature/maxim_thermocouple.c @@ -267,7 +267,6 @@ static int maxim_thermocouple_remove(struct spi_device *spi) static const struct spi_device_id maxim_thermocouple_id[] = { {"max6675", MAX6675}, {"max31855", MAX31855}, - {"max31856", MAX31855}, {}, }; MODULE_DEVICE_TABLE(spi, maxim_thermocouple_id); -- GitLab From d4da7122031726329bd444ad51d9944c821c9af4 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 3 Sep 2018 18:54:14 +0200 Subject: [PATCH 0768/2269] RDMA/ucma: check fd type in ucma_migrate_id() [ Upstream commit 0d23ba6034b9cf48b8918404367506da3e4b3ee5 ] The current code grabs the private_data of whatever file descriptor userspace has supplied and implicitly casts it to a `struct ucma_file *`, potentially causing a type confusion. This is probably fine in practice because the pointer is only used for comparisons, it is never actually dereferenced; and even in the comparisons, it is unlikely that a file from another filesystem would have a ->private_data pointer that happens to also be valid in this context. But ->private_data is not always guaranteed to be a valid pointer to an object owned by the file's filesystem; for example, some filesystems just cram numbers in there. Check the type of the supplied file descriptor to be safe, analogous to how other places in the kernel do it. Fixes: 88314e4dda1e ("RDMA/cma: add support for rdma_migrate_id()") Signed-off-by: Jann Horn Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/ucma.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index a22b992cde38c..16423d7ab599a 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -124,6 +124,8 @@ static DEFINE_MUTEX(mut); static DEFINE_IDR(ctx_idr); static DEFINE_IDR(multicast_idr); +static const struct file_operations ucma_fops; + static inline struct ucma_context *_ucma_find_context(int id, struct ucma_file *file) { @@ -1564,6 +1566,10 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file, f = fdget(cmd.fd); if (!f.file) return -ENOENT; + if (f.file->f_op != &ucma_fops) { + ret = -EINVAL; + goto file_put; + } /* Validate current fd and prevent destruction of id. */ ctx = ucma_get_ctx(f.file->private_data, cmd.id); -- GitLab From 2c423318f07ccde11162e40f2bd4c47bbe9a00cf Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 18 Aug 2018 10:12:08 +0200 Subject: [PATCH 0769/2269] HID: sensor-hub: Restore fixup for Lenovo ThinkPad Helix 2 sensor hub report [ Upstream commit ade573eb1e03d1ee5abcb3359b1259469ab6e8ed ] Commit b0f847e16c1e ("HID: hid-sensor-hub: Force logical minimum to 1 for power and report state") not only replaced the descriptor fixup done for devices with the HID_SENSOR_HUB_ENUM_QUIRK with a generic fix, but also accidentally removed the unrelated descriptor fixup for the Lenovo ThinkPad Helix 2 sensor hub. This commit restores this fixup. Restoring this fixup not only fixes the Lenovo ThinkPad Helix 2's sensors, but also the Lenovo ThinkPad 8's sensors. Fixes: b0f847e16c1e ("HID: hid-sensor-hub: Force logical minimum ...") Cc: Srinivas Pandruvada Cc: Fernando D S Lima Acked-by: Srinivas Pandruvada Signed-off-by: Hans de Goede Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-sensor-hub.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c index 25363fc571bcc..faba542d1b074 100644 --- a/drivers/hid/hid-sensor-hub.c +++ b/drivers/hid/hid-sensor-hub.c @@ -579,6 +579,28 @@ void sensor_hub_device_close(struct hid_sensor_hub_device *hsdev) } EXPORT_SYMBOL_GPL(sensor_hub_device_close); +static __u8 *sensor_hub_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) +{ + /* + * Checks if the report descriptor of Thinkpad Helix 2 has a logical + * minimum for magnetic flux axis greater than the maximum. + */ + if (hdev->product == USB_DEVICE_ID_TEXAS_INSTRUMENTS_LENOVO_YOGA && + *rsize == 2558 && rdesc[913] == 0x17 && rdesc[914] == 0x40 && + rdesc[915] == 0x81 && rdesc[916] == 0x08 && + rdesc[917] == 0x00 && rdesc[918] == 0x27 && + rdesc[921] == 0x07 && rdesc[922] == 0x00) { + /* Sets negative logical minimum for mag x, y and z */ + rdesc[914] = rdesc[935] = rdesc[956] = 0xc0; + rdesc[915] = rdesc[936] = rdesc[957] = 0x7e; + rdesc[916] = rdesc[937] = rdesc[958] = 0xf7; + rdesc[917] = rdesc[938] = rdesc[959] = 0xff; + } + + return rdesc; +} + static int sensor_hub_probe(struct hid_device *hdev, const struct hid_device_id *id) { @@ -742,6 +764,7 @@ static struct hid_driver sensor_hub_driver = { .probe = sensor_hub_probe, .remove = sensor_hub_remove, .raw_event = sensor_hub_raw_event, + .report_fixup = sensor_hub_report_fixup, #ifdef CONFIG_PM .suspend = sensor_hub_suspend, .resume = sensor_hub_resume, -- GitLab From a90a52c51ad46b8c1407b1113aabd8cf6e7c197d Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 15 Aug 2018 21:45:37 +0100 Subject: [PATCH 0770/2269] USB: yurex: Check for truncation in yurex_read() [ Upstream commit 14427b86837a4baf1c121934c6599bdb67dfa9fc ] snprintf() always returns the full length of the string it could have printed, even if it was truncated because the buffer was too small. So in case the counter value is truncated, we will over-read from in_buffer and over-write to the caller's buffer. I don't think it's actually possible for this to happen, but in case truncation occurs, WARN and return -EIO. Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/yurex.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c index 0673f286afbd4..4f48f5730e126 100644 --- a/drivers/usb/misc/yurex.c +++ b/drivers/usb/misc/yurex.c @@ -417,6 +417,9 @@ static ssize_t yurex_read(struct file *file, char __user *buffer, size_t count, spin_unlock_irqrestore(&dev->lock, flags); mutex_unlock(&dev->io_mutex); + if (WARN_ON_ONCE(len >= sizeof(in_buffer))) + return -EIO; + return simple_read_from_buffer(buffer, count, ppos, in_buffer, len); } -- GitLab From f36f3ebdf1e1a211c3be521e187d69a94b30db77 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 3 Sep 2018 03:47:07 -0700 Subject: [PATCH 0771/2269] nvmet-rdma: fix possible bogus dereference under heavy load [ Upstream commit 8407879c4e0d7731f6e7e905893cecf61a7762c7 ] Currently we always repost the recv buffer before we send a response capsule back to the host. Since ordering is not guaranteed for send and recv completions, it is posible that we will receive a new request from the host before we got a send completion for the response capsule. Today, we pre-allocate 2x rsps the length of the queue, but in reality, under heavy load there is nothing that is really preventing the gap to expand until we exhaust all our rsps. To fix this, if we don't have any pre-allocated rsps left, we dynamically allocate a rsp and make sure to free it when we are done. If under memory pressure we fail to allocate a rsp, we silently drop the command and wait for the host to retry. Reported-by: Steve Wise Tested-by: Steve Wise Signed-off-by: Sagi Grimberg [hch: dropped a superflous assignment] Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/target/rdma.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 3333d417b248b..a70b3d24936d3 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -65,6 +65,7 @@ struct nvmet_rdma_rsp { struct nvmet_req req; + bool allocated; u8 n_rdma; u32 flags; u32 invalidate_rkey; @@ -167,11 +168,19 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue) unsigned long flags; spin_lock_irqsave(&queue->rsps_lock, flags); - rsp = list_first_entry(&queue->free_rsps, + rsp = list_first_entry_or_null(&queue->free_rsps, struct nvmet_rdma_rsp, free_list); - list_del(&rsp->free_list); + if (likely(rsp)) + list_del(&rsp->free_list); spin_unlock_irqrestore(&queue->rsps_lock, flags); + if (unlikely(!rsp)) { + rsp = kmalloc(sizeof(*rsp), GFP_KERNEL); + if (unlikely(!rsp)) + return NULL; + rsp->allocated = true; + } + return rsp; } @@ -180,6 +189,11 @@ nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp) { unsigned long flags; + if (rsp->allocated) { + kfree(rsp); + return; + } + spin_lock_irqsave(&rsp->queue->rsps_lock, flags); list_add_tail(&rsp->free_list, &rsp->queue->free_rsps); spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags); @@ -756,6 +770,15 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) cmd->queue = queue; rsp = nvmet_rdma_get_rsp(queue); + if (unlikely(!rsp)) { + /* + * we get here only under memory pressure, + * silently drop and have the host retry + * as we can't even fail it. + */ + nvmet_rdma_post_recv(queue->dev, cmd); + return; + } rsp->queue = queue; rsp->cmd = cmd; rsp->flags = 0; -- GitLab From 34d54566ae4ab95d9d4c199f0f9b47c8a0afd00d Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Mon, 27 Aug 2018 09:09:46 -0500 Subject: [PATCH 0772/2269] net/mlx5: Consider PCI domain in search for next dev [ Upstream commit df7ddb2396cd162e64aaff9401be05e31e438961 ] The PCI BDF is not unique. PCI domain must also be considered when searching for the next physical device during lag setup. Example below: mlx5_core 0000:01:00.0: MLX5E: StrdRq(1) RqSz(8) StrdSz(128) RxCqeCmprss(0) mlx5_core 0000:01:00.1: MLX5E: StrdRq(1) RqSz(8) StrdSz(128) RxCqeCmprss(0) mlx5_core 0001:01:00.0: MLX5E: StrdRq(1) RqSz(8) StrdSz(128) RxCqeCmprss(0) mlx5_core 0001:01:00.1: MLX5E: StrdRq(1) RqSz(8) StrdSz(128) RxCqeCmprss(0) Signed-off-by: Daniel Jurgens Reviewed-by: Aviv Heller Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 9f9c9ff107354..07fda3984e102 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -388,16 +388,17 @@ void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol) } } -static u16 mlx5_gen_pci_id(struct mlx5_core_dev *dev) +static u32 mlx5_gen_pci_id(struct mlx5_core_dev *dev) { - return (u16)((dev->pdev->bus->number << 8) | + return (u32)((pci_domain_nr(dev->pdev->bus) << 16) | + (dev->pdev->bus->number << 8) | PCI_SLOT(dev->pdev->devfn)); } /* Must be called with intf_mutex held */ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) { - u16 pci_id = mlx5_gen_pci_id(dev); + u32 pci_id = mlx5_gen_pci_id(dev); struct mlx5_core_dev *res = NULL; struct mlx5_core_dev *tmp_dev; struct mlx5_priv *priv; -- GitLab From 1a255bf1e749d7f4556272365c35578e210e4bed Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 4 Sep 2018 15:56:57 +1000 Subject: [PATCH 0773/2269] drm/nouveau/TBDdevinit: don't fail when PMU/PRE_OS is missing from VBIOS [ Upstream commit 0a6986c6595e9afd20ff7280dab36431c1e467f8 ] This Falcon application doesn't appear to be present on some newer systems, so let's not fail init if we can't find it. TBD: is there a way to determine whether it *should* be there? Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c index 1730371933df7..be0dd6074b57d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c @@ -158,7 +158,8 @@ gm200_devinit_post(struct nvkm_devinit *base, bool post) } /* load and execute some other ucode image (bios therm?) */ - return pmu_load(init, 0x01, post, NULL, NULL); + pmu_load(init, 0x01, post, NULL, NULL); + return 0; } static const struct nvkm_devinit_func -- GitLab From 112d65a51f2bd14b0e090201def35a3a34643217 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 4 Sep 2018 15:57:09 +1000 Subject: [PATCH 0774/2269] drm/nouveau/disp: fix DP disable race [ Upstream commit e04cfdc9b7398c60dbc70212415ea63b6c6a93ae ] If a HPD pulse signalling the need to retrain the link occurs between the KMS driver releasing the output and the supervisor interrupt that finishes the teardown, it was possible get a NULL-ptr deref. Avoid this by marking the link as inactive earlier. Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c | 17 ++++++++++++----- drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c | 6 +++--- drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c | 2 ++ drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h | 3 ++- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c index 7c5bed29ffef1..6160a6158cf26 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c @@ -412,14 +412,10 @@ nvkm_dp_train(struct nvkm_dp *dp, u32 dataKBps) } static void -nvkm_dp_release(struct nvkm_outp *outp, struct nvkm_ior *ior) +nvkm_dp_disable(struct nvkm_outp *outp, struct nvkm_ior *ior) { struct nvkm_dp *dp = nvkm_dp(outp); - /* Prevent link from being retrained if sink sends an IRQ. */ - atomic_set(&dp->lt.done, 0); - ior->dp.nr = 0; - /* Execute DisableLT script from DP Info Table. */ nvbios_init(&ior->disp->engine.subdev, dp->info.script[4], init.outp = &dp->outp.info; @@ -428,6 +424,16 @@ nvkm_dp_release(struct nvkm_outp *outp, struct nvkm_ior *ior) ); } +static void +nvkm_dp_release(struct nvkm_outp *outp) +{ + struct nvkm_dp *dp = nvkm_dp(outp); + + /* Prevent link from being retrained if sink sends an IRQ. */ + atomic_set(&dp->lt.done, 0); + dp->outp.ior->dp.nr = 0; +} + static int nvkm_dp_acquire(struct nvkm_outp *outp) { @@ -576,6 +582,7 @@ nvkm_dp_func = { .fini = nvkm_dp_fini, .acquire = nvkm_dp_acquire, .release = nvkm_dp_release, + .disable = nvkm_dp_disable, }; static int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c index 0c570dbd3021b..bc18a96bc61ae 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c @@ -436,11 +436,11 @@ nv50_disp_super_2_0(struct nv50_disp *disp, struct nvkm_head *head) nv50_disp_super_ied_off(head, ior, 2); /* If we're shutting down the OR's only active head, execute - * the output path's release function. + * the output path's disable function. */ if (ior->arm.head == (1 << head->id)) { - if ((outp = ior->arm.outp) && outp->func->release) - outp->func->release(outp, ior); + if ((outp = ior->arm.outp) && outp->func->disable) + outp->func->disable(outp, ior); } } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c index be9e7f8c3b239..bbba77ff93857 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c @@ -93,6 +93,8 @@ nvkm_outp_release(struct nvkm_outp *outp, u8 user) if (ior) { outp->acquired &= ~user; if (!outp->acquired) { + if (outp->func->release && outp->ior) + outp->func->release(outp); outp->ior->asy.outp = NULL; outp->ior = NULL; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h index ea84d7d5741ad..97196f8029243 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h @@ -41,7 +41,8 @@ struct nvkm_outp_func { void (*init)(struct nvkm_outp *); void (*fini)(struct nvkm_outp *); int (*acquire)(struct nvkm_outp *); - void (*release)(struct nvkm_outp *, struct nvkm_ior *); + void (*release)(struct nvkm_outp *); + void (*disable)(struct nvkm_outp *, struct nvkm_ior *); }; #define OUTP_MSG(o,l,f,a...) do { \ -- GitLab From 3941dbe190ba7ba704a7c932aef20b87e5ac199c Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Thu, 6 Sep 2018 18:33:40 +0200 Subject: [PATCH 0775/2269] dm raid: fix rebuild of specific devices by updating superblock [ Upstream commit c44a5ee803d2b7ed8c2e6ce24a5c4dd60778886e ] Update superblock when particular devices are requested via rebuild (e.g. lvconvert --replace ...) to avoid spurious failure with the "New device injected into existing raid set without 'delta_disks' or 'rebuild' parameter specified" error message. Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-raid.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 38a2ac24428e5..151211b4cb1ba 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3061,6 +3061,11 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); rs_set_new(rs); } else if (rs_is_recovering(rs)) { + /* Rebuild particular devices */ + if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) { + set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); + rs_setup_recovery(rs, MaxSector); + } /* A recovering raid set may be resized */ ; /* skip setup rs */ } else if (rs_is_reshaping(rs)) { -- GitLab From a5bdc726e5ffa32d7fbf1a2b646c4e59ac075c09 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 3 Sep 2018 13:15:58 +1000 Subject: [PATCH 0776/2269] fs/cifs: suppress a string overflow warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit bcfb84a996f6fa90b5e6e2954b2accb7a4711097 ] A powerpc build of cifs with gcc v8.2.0 produces this warning: fs/cifs/cifssmb.c: In function ‘CIFSSMBNegotiate’: fs/cifs/cifssmb.c:605:3: warning: ‘strncpy’ writing 16 bytes into a region of size 1 overflows the destination [-Wstringop-overflow=] strncpy(pSMB->DialectsArray+count, protocols[i].name, 16); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Since we are already doing a strlen() on the source, change the strncpy to a memcpy(). Signed-off-by: Stephen Rothwell Signed-off-by: Steve French Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifssmb.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index b5a4365834693..2e936f94f1024 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -589,10 +589,15 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) } count = 0; + /* + * We know that all the name entries in the protocols array + * are short (< 16 bytes anyway) and are NUL terminated. + */ for (i = 0; i < CIFS_NUM_PROT; i++) { - strncpy(pSMB->DialectsArray+count, protocols[i].name, 16); - count += strlen(protocols[i].name) + 1; - /* null at end of source and target buffers anyway */ + size_t len = strlen(protocols[i].name) + 1; + + memcpy(pSMB->DialectsArray+count, protocols[i].name, len); + count += len; } inc_rfc1001_len(pSMB, count); pSMB->ByteCount = cpu_to_le16(count); -- GitLab From 3e2cc5bd61fe7f105657611c3105c0f2bddc120c Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Sun, 9 Sep 2018 08:15:21 +0000 Subject: [PATCH 0777/2269] net: ena: fix driver when PAGE_SIZE == 64kB [ Upstream commit ef5b0771d247379c90c8bf1332ff32f7f74bff7f ] The buffer length field in the ena rx descriptor is 16 bit, and the current driver passes a full page in each ena rx descriptor. When PAGE_SIZE equals 64kB or more, the buffer length field becomes zero. To solve this issue, limit the ena Rx descriptor to use 16kB even when allocating 64kB kernel pages. This change would not impact ena device functionality, as 16kB is still larger than maximum MTU. Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 10 +++++----- drivers/net/ethernet/amazon/ena/ena_netdev.h | 11 +++++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 67df5053dc30a..60b3ee29d82c8 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -456,7 +456,7 @@ static inline int ena_alloc_rx_page(struct ena_ring *rx_ring, return -ENOMEM; } - dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, + dma = dma_map_page(rx_ring->dev, page, 0, ENA_PAGE_SIZE, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(rx_ring->dev, dma))) { u64_stats_update_begin(&rx_ring->syncp); @@ -473,7 +473,7 @@ static inline int ena_alloc_rx_page(struct ena_ring *rx_ring, rx_info->page_offset = 0; ena_buf = &rx_info->ena_buf; ena_buf->paddr = dma; - ena_buf->len = PAGE_SIZE; + ena_buf->len = ENA_PAGE_SIZE; return 0; } @@ -490,7 +490,7 @@ static void ena_free_rx_page(struct ena_ring *rx_ring, return; } - dma_unmap_page(rx_ring->dev, ena_buf->paddr, PAGE_SIZE, + dma_unmap_page(rx_ring->dev, ena_buf->paddr, ENA_PAGE_SIZE, DMA_FROM_DEVICE); __free_page(page); @@ -910,10 +910,10 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, do { dma_unmap_page(rx_ring->dev, dma_unmap_addr(&rx_info->ena_buf, paddr), - PAGE_SIZE, DMA_FROM_DEVICE); + ENA_PAGE_SIZE, DMA_FROM_DEVICE); skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page, - rx_info->page_offset, len, PAGE_SIZE); + rx_info->page_offset, len, ENA_PAGE_SIZE); netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, "rx skb updated. len %d. data_len %d\n", diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 29bb5704260bc..3404376c28ca3 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -350,4 +350,15 @@ void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf); int ena_get_sset_count(struct net_device *netdev, int sset); +/* The ENA buffer length fields is 16 bit long. So when PAGE_SIZE == 64kB the + * driver passas 0. + * Since the max packet size the ENA handles is ~9kB limit the buffer length to + * 16kB. + */ +#if PAGE_SIZE > SZ_16K +#define ENA_PAGE_SIZE SZ_16K +#else +#define ENA_PAGE_SIZE PAGE_SIZE +#endif + #endif /* !(ENA_H) */ -- GitLab From 36918e899e3cd1f920b45cbb3d8b4c90f9e952a1 Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Sun, 9 Sep 2018 08:15:25 +0000 Subject: [PATCH 0778/2269] net: ena: fix missing calls to READ_ONCE [ Upstream commit 28abf4e9c9201eda5c4d29ea609d07e877b464b8 ] Add READ_ONCE calls where necessary (for example when iterating over a memory field that gets updated by the hardware). Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/amazon/ena/ena_com.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index e3b7a71fcad94..1a4ffc5d3da4e 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -457,7 +457,7 @@ static void ena_com_handle_admin_completion(struct ena_com_admin_queue *admin_qu cqe = &admin_queue->cq.entries[head_masked]; /* Go over all the completions */ - while ((cqe->acq_common_descriptor.flags & + while ((READ_ONCE(cqe->acq_common_descriptor.flags) & ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) { /* Do not read the rest of the completion entry before the * phase bit was validated @@ -633,7 +633,7 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset) writel(mmio_read_reg, ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF); for (i = 0; i < timeout; i++) { - if (read_resp->req_id == mmio_read->seq_num) + if (READ_ONCE(read_resp->req_id) == mmio_read->seq_num) break; udelay(1); @@ -1790,8 +1790,8 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data) aenq_common = &aenq_e->aenq_common_desc; /* Go over all the events */ - while ((aenq_common->flags & ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == - phase) { + while ((READ_ONCE(aenq_common->flags) & + ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) { pr_debug("AENQ! Group[%x] Syndrom[%x] timestamp: [%llus]\n", aenq_common->group, aenq_common->syndrom, (u64)aenq_common->timestamp_low + -- GitLab From 1e9054e75d22d9c3087d100397cc4eae9be53bf7 Mon Sep 17 00:00:00 2001 From: Jacek Tomaka Date: Thu, 2 Aug 2018 09:38:30 +0800 Subject: [PATCH 0779/2269] perf/x86/intel: Add support/quirk for the MISPREDICT bit on Knights Landing CPUs [ Upstream commit 16160c1946b702dcfa95ef63389a56deb2f1c7cb ] Problem: perf did not show branch predicted/mispredicted bit in brstack. Output of perf -F brstack for profile collected Before: 0x4fdbcd/0x4fdc03/-/-/-/0 0x45f4c1/0x4fdba0/-/-/-/0 0x45f544/0x45f4bb/-/-/-/0 0x45f555/0x45f53c/-/-/-/0 0x7f66901cc24b/0x45f555/-/-/-/0 0x7f66901cc22e/0x7f66901cc23d/-/-/-/0 0x7f66901cc1ff/0x7f66901cc20f/-/-/-/0 0x7f66901cc1e8/0x7f66901cc1fc/-/-/-/0 After: 0x4fdbcd/0x4fdc03/P/-/-/0 0x45f4c1/0x4fdba0/P/-/-/0 0x45f544/0x45f4bb/P/-/-/0 0x45f555/0x45f53c/P/-/-/0 0x7f66901cc24b/0x45f555/P/-/-/0 0x7f66901cc22e/0x7f66901cc23d/P/-/-/0 0x7f66901cc1ff/0x7f66901cc20f/P/-/-/0 0x7f66901cc1e8/0x7f66901cc1fc/P/-/-/0 Cause: As mentioned in Software Development Manual vol 3, 17.4.8.1, IA32_PERF_CAPABILITIES[5:0] indicates the format of the address that is stored in the LBR stack. Knights Landing reports 1 (LBR_FORMAT_LIP) as its format. Despite that, registers containing FROM address of the branch, do have MISPREDICT bit but because of the format indicated in IA32_PERF_CAPABILITIES[5:0], LBR did not read MISPREDICT bit. Solution: Teach LBR about above Knights Landing quirk and make it read MISPREDICT bit. Signed-off-by: Jacek Tomaka Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180802013830.10600-1-jacekt@dugeo.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/lbr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index a4170048a30bc..17fbd07e4245b 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1250,4 +1250,8 @@ void intel_pmu_lbr_init_knl(void) x86_pmu.lbr_sel_mask = LBR_SEL_MASK; x86_pmu.lbr_sel_map = snb_lbr_sel_map; + + /* Knights Landing does have MISPREDICT bit */ + if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_LIP) + x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS; } -- GitLab From 1484d4ff277007f1a852e4c02f4edc5eb3e1e7d6 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Mon, 10 Sep 2018 16:50:09 +0100 Subject: [PATCH 0780/2269] dm thin metadata: try to avoid ever aborting transactions [ Upstream commit 3ab91828166895600efd9cdc3a0eb32001f7204a ] Committing a transaction can consume some metadata of it's own, we now reserve a small amount of metadata to cover this. Free metadata reported by the kernel will not include this reserve. If any of the reserve has been used after a commit we enter a new internal state PM_OUT_OF_METADATA_SPACE. This is reported as PM_READ_ONLY, so no userland changes are needed. If the metadata device is resized the pool will move back to PM_WRITE. These changes mean we never need to abort and rollback a transaction due to running out of metadata space. This is particularly important because there have been a handful of reports of data corruption against DM thin-provisioning that can all be attributed to the thin-pool having ran out of metadata space. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-thin-metadata.c | 36 ++++++++++++++++- drivers/md/dm-thin.c | 73 +++++++++++++++++++++++++++++++---- 2 files changed, 100 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 36ef284ad086b..9f621b0c0e172 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -188,6 +188,12 @@ struct dm_pool_metadata { unsigned long flags; sector_t data_block_size; + /* + * We reserve a section of the metadata for commit overhead. + * All reported space does *not* include this. + */ + dm_block_t metadata_reserve; + /* * Set if a transaction has to be aborted but the attempt to roll back * to the previous (good) transaction failed. The only pool metadata @@ -825,6 +831,22 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) return dm_tm_commit(pmd->tm, sblock); } +static void __set_metadata_reserve(struct dm_pool_metadata *pmd) +{ + int r; + dm_block_t total; + dm_block_t max_blocks = 4096; /* 16M */ + + r = dm_sm_get_nr_blocks(pmd->metadata_sm, &total); + if (r) { + DMERR("could not get size of metadata device"); + pmd->metadata_reserve = max_blocks; + } else { + sector_div(total, 10); + pmd->metadata_reserve = min(max_blocks, total); + } +} + struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, sector_t data_block_size, bool format_device) @@ -858,6 +880,8 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, return ERR_PTR(r); } + __set_metadata_reserve(pmd); + return pmd; } @@ -1829,6 +1853,13 @@ int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd, down_read(&pmd->root_lock); if (!pmd->fail_io) r = dm_sm_get_nr_free(pmd->metadata_sm, result); + + if (!r) { + if (*result < pmd->metadata_reserve) + *result = 0; + else + *result -= pmd->metadata_reserve; + } up_read(&pmd->root_lock); return r; @@ -1941,8 +1972,11 @@ int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_cou int r = -EINVAL; down_write(&pmd->root_lock); - if (!pmd->fail_io) + if (!pmd->fail_io) { r = __resize_space_map(pmd->metadata_sm, new_count); + if (!r) + __set_metadata_reserve(pmd); + } up_write(&pmd->root_lock); return r; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 6cf9ad4e4e164..699c40c7fe608 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -200,7 +200,13 @@ struct dm_thin_new_mapping; enum pool_mode { PM_WRITE, /* metadata may be changed */ PM_OUT_OF_DATA_SPACE, /* metadata may be changed, though data may not be allocated */ + + /* + * Like READ_ONLY, except may switch back to WRITE on metadata resize. Reported as READ_ONLY. + */ + PM_OUT_OF_METADATA_SPACE, PM_READ_ONLY, /* metadata may not be changed */ + PM_FAIL, /* all I/O fails */ }; @@ -1382,7 +1388,35 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode); static void requeue_bios(struct pool *pool); -static void check_for_space(struct pool *pool) +static bool is_read_only_pool_mode(enum pool_mode mode) +{ + return (mode == PM_OUT_OF_METADATA_SPACE || mode == PM_READ_ONLY); +} + +static bool is_read_only(struct pool *pool) +{ + return is_read_only_pool_mode(get_pool_mode(pool)); +} + +static void check_for_metadata_space(struct pool *pool) +{ + int r; + const char *ooms_reason = NULL; + dm_block_t nr_free; + + r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free); + if (r) + ooms_reason = "Could not get free metadata blocks"; + else if (!nr_free) + ooms_reason = "No free metadata blocks"; + + if (ooms_reason && !is_read_only(pool)) { + DMERR("%s", ooms_reason); + set_pool_mode(pool, PM_OUT_OF_METADATA_SPACE); + } +} + +static void check_for_data_space(struct pool *pool) { int r; dm_block_t nr_free; @@ -1408,14 +1442,16 @@ static int commit(struct pool *pool) { int r; - if (get_pool_mode(pool) >= PM_READ_ONLY) + if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE) return -EINVAL; r = dm_pool_commit_metadata(pool->pmd); if (r) metadata_operation_failed(pool, "dm_pool_commit_metadata", r); - else - check_for_space(pool); + else { + check_for_metadata_space(pool); + check_for_data_space(pool); + } return r; } @@ -1481,6 +1517,19 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) return r; } + r = dm_pool_get_free_metadata_block_count(pool->pmd, &free_blocks); + if (r) { + metadata_operation_failed(pool, "dm_pool_get_free_metadata_block_count", r); + return r; + } + + if (!free_blocks) { + /* Let's commit before we use up the metadata reserve. */ + r = commit(pool); + if (r) + return r; + } + return 0; } @@ -1512,6 +1561,7 @@ static blk_status_t should_error_unserviceable_bio(struct pool *pool) case PM_OUT_OF_DATA_SPACE: return pool->pf.error_if_no_space ? BLK_STS_NOSPC : 0; + case PM_OUT_OF_METADATA_SPACE: case PM_READ_ONLY: case PM_FAIL: return BLK_STS_IOERR; @@ -2475,8 +2525,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) error_retry_list(pool); break; + case PM_OUT_OF_METADATA_SPACE: case PM_READ_ONLY: - if (old_mode != new_mode) + if (!is_read_only_pool_mode(old_mode)) notify_of_pool_mode_change(pool, "read-only"); dm_pool_metadata_read_only(pool->pmd); pool->process_bio = process_bio_read_only; @@ -3412,6 +3463,10 @@ static int maybe_resize_metadata_dev(struct dm_target *ti, bool *need_commit) DMINFO("%s: growing the metadata device from %llu to %llu blocks", dm_device_name(pool->pool_md), sb_metadata_dev_size, metadata_dev_size); + + if (get_pool_mode(pool) == PM_OUT_OF_METADATA_SPACE) + set_pool_mode(pool, PM_WRITE); + r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size); if (r) { metadata_operation_failed(pool, "dm_pool_resize_metadata_dev", r); @@ -3715,7 +3770,7 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv) struct pool_c *pt = ti->private; struct pool *pool = pt->pool; - if (get_pool_mode(pool) >= PM_READ_ONLY) { + if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE) { DMERR("%s: unable to service pool target messages in READ_ONLY or FAIL mode", dm_device_name(pool->pool_md)); return -EOPNOTSUPP; @@ -3789,6 +3844,7 @@ static void pool_status(struct dm_target *ti, status_type_t type, dm_block_t nr_blocks_data; dm_block_t nr_blocks_metadata; dm_block_t held_root; + enum pool_mode mode; char buf[BDEVNAME_SIZE]; char buf2[BDEVNAME_SIZE]; struct pool_c *pt = ti->private; @@ -3859,9 +3915,10 @@ static void pool_status(struct dm_target *ti, status_type_t type, else DMEMIT("- "); - if (pool->pf.mode == PM_OUT_OF_DATA_SPACE) + mode = get_pool_mode(pool); + if (mode == PM_OUT_OF_DATA_SPACE) DMEMIT("out_of_data_space "); - else if (pool->pf.mode == PM_READ_ONLY) + else if (is_read_only_pool_mode(mode)) DMEMIT("ro "); else DMEMIT("rw "); -- GitLab From 2eb3072b2785047aa7cbbd5bcbeb8da7db82ecf7 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 20 Jul 2018 20:17:35 -0700 Subject: [PATCH 0781/2269] arch/hexagon: fix kernel/dma.c build warning [ Upstream commit 200f351e27f014fcbf69b544b0b4b72aeaf45fd3 ] Fix build warning in arch/hexagon/kernel/dma.c by casting a void * to unsigned long to match the function parameter type. ../arch/hexagon/kernel/dma.c: In function 'arch_dma_alloc': ../arch/hexagon/kernel/dma.c:51:5: warning: passing argument 2 of 'gen_pool_add' makes integer from pointer without a cast [enabled by default] ../include/linux/genalloc.h:112:19: note: expected 'long unsigned int' but argument is of type 'void *' Signed-off-by: Randy Dunlap Cc: Yoshinori Sato Cc: Rich Felker Cc: linux-sh@vger.kernel.org Patch-mainline: linux-kernel @ 07/20/2018, 20:17 [rkuo@codeaurora.org: fixed architecture name] Signed-off-by: Richard Kuo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/hexagon/kernel/dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c index 546792d176a48..564651bded425 100644 --- a/arch/hexagon/kernel/dma.c +++ b/arch/hexagon/kernel/dma.c @@ -59,7 +59,7 @@ static void *hexagon_dma_alloc_coherent(struct device *dev, size_t size, panic("Can't create %s() memory pool!", __func__); else gen_pool_add(coherent_pool, - pfn_to_virt(max_low_pfn), + (unsigned long)pfn_to_virt(max_low_pfn), hexagon_coherent_pool_size, -1); } -- GitLab From 7a2df42a5371a09cca5a272f1edb29ced625221e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 22 Jul 2018 16:03:58 -0700 Subject: [PATCH 0782/2269] hexagon: modify ffs() and fls() to return int [ Upstream commit 5c41aaad409c097cf1ef74f2c649fed994744ef5 ] Building drivers/mtd/nand/raw/nandsim.c on arch/hexagon/ produces a printk format build warning. This is due to hexagon's ffs() being coded as returning long instead of int. Fix the printk format warning by changing all of hexagon's ffs() and fls() functions to return int instead of long. The variables that they return are already int instead of long. This return type matches the return type in . ../drivers/mtd/nand/raw/nandsim.c: In function 'init_nandsim': ../drivers/mtd/nand/raw/nandsim.c:760:2: warning: format '%u' expects argument of type 'unsigned int', but argument 2 has type 'long int' [-Wformat] There are no ffs() or fls() allmodconfig build errors after making this change. Signed-off-by: Randy Dunlap Cc: Richard Kuo Cc: linux-hexagon@vger.kernel.org Cc: Geert Uytterhoeven Patch-mainline: linux-kernel @ 07/22/2018, 16:03 Signed-off-by: Richard Kuo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/hexagon/include/asm/bitops.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/hexagon/include/asm/bitops.h b/arch/hexagon/include/asm/bitops.h index 5e4a59b3ec1bb..2691a1857d203 100644 --- a/arch/hexagon/include/asm/bitops.h +++ b/arch/hexagon/include/asm/bitops.h @@ -211,7 +211,7 @@ static inline long ffz(int x) * This is defined the same way as ffs. * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. */ -static inline long fls(int x) +static inline int fls(int x) { int r; @@ -232,7 +232,7 @@ static inline long fls(int x) * the libc and compiler builtin ffs routines, therefore * differs in spirit from the above ffz (man ffs). */ -static inline long ffs(int x) +static inline int ffs(int x) { int r; -- GitLab From f2c9d68ed3c2cda6df03afcc150e1c6ad1d072d5 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sun, 9 Sep 2018 17:47:31 +0200 Subject: [PATCH 0783/2269] arm64: jump_label.h: use asm_volatile_goto macro instead of "asm goto" [ Upstream commit 13aceef06adfaf93d52e01e28a8bc8a0ad471d83 ] All other uses of "asm goto" go through asm_volatile_goto, which avoids a miscompile when using GCC < 4.8.2. Replace our open-coded "asm goto" statements with the asm_volatile_goto macro to avoid issues with older toolchains. Cc: Catalin Marinas Reviewed-by: Nick Desaulniers Signed-off-by: Miguel Ojeda Signed-off-by: Will Deacon Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/jump_label.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index 1b5e0e843c3af..7e2b3e3600863 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -28,7 +28,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm goto("1: nop\n\t" + asm_volatile_goto("1: nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".align 3\n\t" ".quad 1b, %l[l_yes], %c0\n\t" @@ -42,7 +42,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm goto("1: b %l[l_yes]\n\t" + asm_volatile_goto("1: b %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".align 3\n\t" ".quad 1b, %l[l_yes], %c0\n\t" -- GitLab From f7b86faf0bd11ae44d009d7d68ec8af5491119fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 10 Sep 2018 15:52:55 +0200 Subject: [PATCH 0784/2269] drm/amdgpu: fix error handling in amdgpu_cs_user_fence_chunk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0165de983272d1fae0809ed9db47c46a412279bc ] Slowly leaking memory one page at a time :) Signed-off-by: Christian König Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 5f892ad6476ed..44aa58ab55d0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -37,6 +37,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, { struct drm_gem_object *gobj; unsigned long size; + int r; gobj = drm_gem_object_lookup(p->filp, data->handle); if (gobj == NULL) @@ -48,20 +49,26 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, p->uf_entry.tv.shared = true; p->uf_entry.user_pages = NULL; - size = amdgpu_bo_size(p->uf_entry.robj); - if (size != PAGE_SIZE || (data->offset + 8) > size) - return -EINVAL; - - *offset = data->offset; - drm_gem_object_put_unlocked(gobj); + size = amdgpu_bo_size(p->uf_entry.robj); + if (size != PAGE_SIZE || (data->offset + 8) > size) { + r = -EINVAL; + goto error_unref; + } + if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) { - amdgpu_bo_unref(&p->uf_entry.robj); - return -EINVAL; + r = -EINVAL; + goto error_unref; } + *offset = data->offset; + return 0; + +error_unref: + amdgpu_bo_unref(&p->uf_entry.robj); + return r; } static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) -- GitLab From ad297898159ff167a7ea0ebc6f2508a49ade6262 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 11 Sep 2018 01:51:43 +0800 Subject: [PATCH 0785/2269] r8169: Clear RTL_FLAG_TASK_*_PENDING when clearing RTL_FLAG_TASK_ENABLED [ Upstream commit 6ad569019999300afd8e614d296fdc356550b77f ] After system suspend, sometimes the r8169 doesn't work when ethernet cable gets pluggued. This issue happens because rtl_reset_work() doesn't get called from rtl8169_runtime_resume(), after system suspend. In rtl_task(), RTL_FLAG_TASK_* only gets cleared if this condition is met: if (!netif_running(dev) || !test_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags)) ... If RTL_FLAG_TASK_ENABLED was cleared during system suspend while RTL_FLAG_TASK_RESET_PENDING was set, the next rtl_schedule_task() won't schedule task as the flag is still there. So in addition to clearing RTL_FLAG_TASK_ENABLED, also clears other flags. Cc: Heiner Kallweit Signed-off-by: Kai-Heng Feng Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 3669005b9294a..f7e540eeb877e 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -760,7 +760,7 @@ struct rtl8169_tc_offsets { }; enum rtl_flag { - RTL_FLAG_TASK_ENABLED, + RTL_FLAG_TASK_ENABLED = 0, RTL_FLAG_TASK_SLOW_PENDING, RTL_FLAG_TASK_RESET_PENDING, RTL_FLAG_TASK_PHY_PENDING, @@ -7657,7 +7657,8 @@ static int rtl8169_close(struct net_device *dev) rtl8169_update_counters(dev); rtl_lock_work(tp); - clear_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags); + /* Clear all task flags */ + bitmap_zero(tp->wk.flags, RTL_FLAG_MAX); rtl8169_down(dev); rtl_unlock_work(tp); @@ -7838,7 +7839,9 @@ static void rtl8169_net_suspend(struct net_device *dev) rtl_lock_work(tp); napi_disable(&tp->napi); - clear_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags); + /* Clear all task flags */ + bitmap_zero(tp->wk.flags, RTL_FLAG_MAX); + rtl_unlock_work(tp); rtl_pll_power_down(tp); -- GitLab From d5afd6b6eae543467398e4c35c344a163deec37a Mon Sep 17 00:00:00 2001 From: Wenjia Zhang Date: Wed, 12 Sep 2018 15:31:34 +0200 Subject: [PATCH 0786/2269] s390/qeth: use vzalloc for QUERY OAT buffer [ Upstream commit aec45e857c5538664edb76a60dd452e3265f37d1 ] qeth_query_oat_command() currently allocates the kernel buffer for the SIOC_QETH_QUERY_OAT ioctl with kzalloc. So on systems with fragmented memory, large allocations may fail (eg. the qethqoat tool by default uses 132KB). Solve this issue by using vzalloc, backing the allocation with non-contiguous memory. Signed-off-by: Wenjia Zhang Reviewed-by: Julian Wiedmann Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_core_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 0a6afd4b283d4..4f2747cd15a62 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -4728,7 +4729,7 @@ static int qeth_query_oat_command(struct qeth_card *card, char __user *udata) priv.buffer_len = oat_data.buffer_len; priv.response_len = 0; - priv.buffer = kzalloc(oat_data.buffer_len, GFP_KERNEL); + priv.buffer = vzalloc(oat_data.buffer_len); if (!priv.buffer) { rc = -ENOMEM; goto out; @@ -4769,7 +4770,7 @@ static int qeth_query_oat_command(struct qeth_card *card, char __user *udata) rc = -EFAULT; out_free: - kfree(priv.buffer); + vfree(priv.buffer); out: return rc; } -- GitLab From 7d60f98cde7a1ed5e4ebc508c9d2e0d415d4e71d Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 12 Sep 2018 15:31:35 +0200 Subject: [PATCH 0787/2269] s390/qeth: don't dump past end of unknown HW header [ Upstream commit 0ac1487c4b2de383b91ecad1be561b8f7a2c15f4 ] For inbound data with an unsupported HW header format, only dump the actual HW header. We have no idea how much payload follows it, and what it contains. Worst case, we dump past the end of the Inbound Buffer and access whatever is located next in memory. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_l2_main.c | 2 +- drivers/s390/net/qeth_l3_main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 521293b1f4fab..11ae67842edf0 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -484,7 +484,7 @@ static int qeth_l2_process_inbound_buffer(struct qeth_card *card, default: dev_kfree_skb_any(skb); QETH_CARD_TEXT(card, 3, "inbunkno"); - QETH_DBF_HEX(CTRL, 3, hdr, QETH_DBF_CTRL_LEN); + QETH_DBF_HEX(CTRL, 3, hdr, sizeof(*hdr)); continue; } work_done++; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 1c62cbbaa66f6..cd73172bff477 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1793,7 +1793,7 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card, default: dev_kfree_skb_any(skb); QETH_CARD_TEXT(card, 3, "inbunkno"); - QETH_DBF_HEX(CTRL, 3, hdr, QETH_DBF_CTRL_LEN); + QETH_DBF_HEX(CTRL, 3, hdr, sizeof(*hdr)); continue; } work_done++; -- GitLab From dfb29d69e4d85de6dbc95672892113cb225bf1f2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 6 Sep 2018 12:47:01 +0300 Subject: [PATCH 0788/2269] cifs: read overflow in is_valid_oplock_break() [ Upstream commit 097f5863b1a0c9901f180bbd56ae7d630655faaa ] We need to verify that the "data_offset" is within bounds. Reported-by: Dr Silvio Cesare of InfoSect Signed-off-by: Dan Carpenter Signed-off-by: Steve French Reviewed-by: Aurelien Aptel Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/cifs/misc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 460084a8eac54..bcab30d4a6c74 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -398,9 +398,17 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) (struct smb_com_transaction_change_notify_rsp *)buf; struct file_notify_information *pnotify; __u32 data_offset = 0; + size_t len = srv->total_read - sizeof(pSMBr->hdr.smb_buf_length); + if (get_bcc(buf) > sizeof(struct file_notify_information)) { data_offset = le32_to_cpu(pSMBr->DataOffset); + if (data_offset > + len - sizeof(struct file_notify_information)) { + cifs_dbg(FYI, "invalid data_offset %u\n", + data_offset); + return true; + } pnotify = (struct file_notify_information *) ((char *)&pSMBr->hdr.Protocol + data_offset); cifs_dbg(FYI, "dnotify on %s Action: 0x%x\n", -- GitLab From 4e1494794ebcbbace76c0da4e2d0d7a2857a13f0 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 6 Sep 2018 13:26:08 +0200 Subject: [PATCH 0789/2269] xen/manage: don't complain about an empty value in control/sysrq node [ Upstream commit 87dffe86d406bee8782cac2db035acb9a28620a7 ] When guest receives a sysrq request from the host it acknowledges it by writing '\0' to control/sysrq xenstore node. This, however, make xenstore watch fire again but xenbus_scanf() fails to parse empty value with "%c" format string: sysrq: SysRq : Emergency Sync Emergency Sync complete xen:manage: Error -34 reading sysrq code in control/sysrq Ignore -ERANGE the same way we already ignore -ENOENT, empty value in control/sysrq is totally legal. Signed-off-by: Vitaly Kuznetsov Reviewed-by: Wei Liu Signed-off-by: Boris Ostrovsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/xen/manage.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 587d12829925b..0444ebdda3f01 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -283,9 +283,11 @@ static void sysrq_handler(struct xenbus_watch *watch, const char *path, /* * The Xenstore watch fires directly after registering it and * after a suspend/resume cycle. So ENOENT is no error but - * might happen in those cases. + * might happen in those cases. ERANGE is observed when we get + * an empty value (''), this happens when we acknowledge the + * request by writing '\0' below. */ - if (err != -ENOENT) + if (err != -ENOENT && err != -ERANGE) pr_err("Error %d reading sysrq code in control/sysrq\n", err); xenbus_transaction_end(xbt, 1); -- GitLab From a502165dae0960615efb0b1dbc45c09f5dc1db3d Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Fri, 7 Sep 2018 16:31:35 +0200 Subject: [PATCH 0790/2269] xen: avoid crash in disable_hotplug_cpu [ Upstream commit 3366cdb6d350d95466ee430ac50f3c8415ca8f46 ] The command 'xl vcpu-set 0 0', issued in dom0, will crash dom0: BUG: unable to handle kernel NULL pointer dereference at 00000000000002d8 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 7 PID: 65 Comm: xenwatch Not tainted 4.19.0-rc2-1.ga9462db-default #1 openSUSE Tumbleweed (unreleased) Hardware name: Intel Corporation S5520UR/S5520UR, BIOS S5500.86B.01.00.0050.050620101605 05/06/2010 RIP: e030:device_offline+0x9/0xb0 Code: 77 24 00 e9 ce fe ff ff 48 8b 13 e9 68 ff ff ff 48 8b 13 e9 29 ff ff ff 48 8b 13 e9 ea fe ff ff 90 66 66 66 66 90 41 54 55 53 87 d8 02 00 00 01 0f 85 88 00 00 00 48 c7 c2 20 09 60 81 31 f6 RSP: e02b:ffffc90040f27e80 EFLAGS: 00010203 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff8801f3800000 RSI: ffffc90040f27e70 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffffffff820e47b3 R09: 0000000000000000 R10: 0000000000007ff0 R11: 0000000000000000 R12: ffffffff822e6d30 R13: dead000000000200 R14: dead000000000100 R15: ffffffff8158b4e0 FS: 00007ffa595158c0(0000) GS:ffff8801f39c0000(0000) knlGS:0000000000000000 CS: e033 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000002d8 CR3: 00000001d9602000 CR4: 0000000000002660 Call Trace: handle_vcpu_hotplug_event+0xb5/0xc0 xenwatch_thread+0x80/0x140 ? wait_woken+0x80/0x80 kthread+0x112/0x130 ? kthread_create_worker_on_cpu+0x40/0x40 ret_from_fork+0x3a/0x50 This happens because handle_vcpu_hotplug_event is called twice. In the first iteration cpu_present is still true, in the second iteration cpu_present is false which causes get_cpu_device to return NULL. In case of cpu#0, cpu_online is apparently always true. Fix this crash by checking if the cpu can be hotplugged, which is false for a cpu that was just removed. Also check if the cpu was actually offlined by device_remove, otherwise leave the cpu_present state as it is. Rearrange to code to do all work with device_hotplug_lock held. Signed-off-by: Olaf Hering Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/xen/cpu_hotplug.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c index d4265c8ebb22a..b1357aa4bc552 100644 --- a/drivers/xen/cpu_hotplug.c +++ b/drivers/xen/cpu_hotplug.c @@ -19,15 +19,16 @@ static void enable_hotplug_cpu(int cpu) static void disable_hotplug_cpu(int cpu) { - if (cpu_online(cpu)) { - lock_device_hotplug(); + if (!cpu_is_hotpluggable(cpu)) + return; + lock_device_hotplug(); + if (cpu_online(cpu)) device_offline(get_cpu_device(cpu)); - unlock_device_hotplug(); - } - if (cpu_present(cpu)) + if (!cpu_online(cpu) && cpu_present(cpu)) { xen_arch_unregister_cpu(cpu); - - set_cpu_present(cpu, false); + set_cpu_present(cpu, false); + } + unlock_device_hotplug(); } static int vcpu_online(unsigned int cpu) -- GitLab From aa41fb9593afc819483826aef6e4c7937bdf62e4 Mon Sep 17 00:00:00 2001 From: Josh Abraham Date: Wed, 12 Sep 2018 15:13:54 -1000 Subject: [PATCH 0791/2269] xen: fix GCC warning and remove duplicate EVTCHN_ROW/EVTCHN_COL usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4dca864b59dd150a221730775e2f21f49779c135 ] This patch removes duplicate macro useage in events_base.c. It also fixes gcc warning: variable ‘col’ set but not used [-Wunused-but-set-variable] Signed-off-by: Joshua Abraham Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/xen/events/events_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 08e4af04d6f2c..e6c1934734b7d 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -138,7 +138,7 @@ static int set_evtchn_to_irq(unsigned evtchn, unsigned irq) clear_evtchn_to_irq_row(row); } - evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)] = irq; + evtchn_to_irq[row][col] = irq; return 0; } -- GitLab From be406434737b1bc806517a5163941292f1a84fa0 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 28 Sep 2018 21:00:48 +0300 Subject: [PATCH 0792/2269] ovl: fix access beyond unterminated strings commit 601350ff58d5415a001769532f6b8333820e5786 upstream. KASAN detected slab-out-of-bounds access in printk from overlayfs, because string format used %*s instead of %.*s. > BUG: KASAN: slab-out-of-bounds in string+0x298/0x2d0 lib/vsprintf.c:604 > Read of size 1 at addr ffff8801c36c66ba by task syz-executor2/27811 > > CPU: 0 PID: 27811 Comm: syz-executor2 Not tainted 4.19.0-rc5+ #36 ... > printk+0xa7/0xcf kernel/printk/printk.c:1996 > ovl_lookup_index.cold.15+0xe8/0x1f8 fs/overlayfs/namei.c:689 Reported-by: syzbot+376cea2b0ef340db3dd4@syzkaller.appspotmail.com Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi Fixes: 359f392ca53e ("ovl: lookup index entry for copy up origin") Cc: # v4.13 Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 8a10506db993b..d9468de3c9510 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -519,7 +519,7 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, index = NULL; goto out; } - pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n" + pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n" "overlayfs: mount with '-o index=off' to disable inodes index.\n", d_inode(origin)->i_ino, name.len, name.name, err); -- GitLab From 8d75ecc13fdc58f39451f99f62d4d6940fd5397f Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 18 Sep 2018 16:34:31 +0300 Subject: [PATCH 0793/2269] ovl: fix memory leak on unlink of indexed file commit 63e132528032ce937126aba591a7b37ec593a6bb upstream. The memory leak was detected by kmemleak when running xfstests overlay/051,053 Fixes: caf70cb2ba5d ("ovl: cleanup orphan index entries") Cc: # v4.13 Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/util.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index f60ce2e04df0e..afdc2533ce74d 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -438,7 +438,7 @@ static void ovl_cleanup_index(struct dentry *dentry) struct dentry *upperdentry = ovl_dentry_upper(dentry); struct dentry *index = NULL; struct inode *inode; - struct qstr name; + struct qstr name = { }; int err; err = ovl_get_index_name(lowerdentry, &name); @@ -477,6 +477,7 @@ static void ovl_cleanup_index(struct dentry *dentry) goto fail; out: + kfree(name.name); dput(index); return; -- GitLab From fa7d75f64b8011b33d50296e0d3805fa91dfb5e7 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 4 Oct 2018 14:49:10 +0200 Subject: [PATCH 0794/2269] ovl: fix format of setxattr debug commit 1a8f8d2a443ef9ad9a3065ba8c8119df714240fa upstream. Format has a typo: it was meant to be "%.*s", not "%*s". But at some point callers grew nonprintable values as well, so use "%*pE" instead with a maximized length. Reported-by: Amir Goldstein Signed-off-by: Miklos Szeredi Fixes: 3a1e819b4e80 ("ovl: store file handle of lower inode on copy up") Cc: # v4.12 Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/overlayfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index d9a0edd4e57e4..e1e7430055833 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -136,8 +136,8 @@ static inline int ovl_do_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { int err = vfs_setxattr(dentry, name, value, size, flags); - pr_debug("setxattr(%pd2, \"%s\", \"%*s\", 0x%x) = %i\n", - dentry, name, (int) size, (char *) value, flags, err); + pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i\n", + dentry, name, min((int)size, 48), value, size, flags, err); return err; } -- GitLab From b420b7b7923b9fd1a955eb176b43d88175065262 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 18 Sep 2018 00:36:36 -0400 Subject: [PATCH 0795/2269] sysfs: Do not return POSIX ACL xattrs via listxattr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ffc4c92227db5699493e43eb140b4cb5904c30ff upstream. Commit 786534b92f3c introduced a regression that caused listxattr to return the POSIX ACL attribute names even though sysfs doesn't support POSIX ACLs. This happens because simple_xattr_list checks for NULL i_acl / i_default_acl, but inode_init_always initializes those fields to ACL_NOT_CACHED ((void *)-1). For example: $ getfattr -m- -d /sys /sys: system.posix_acl_access: Operation not supported /sys: system.posix_acl_default: Operation not supported Fix this in simple_xattr_list by checking if the filesystem supports POSIX ACLs. Fixes: 786534b92f3c ("tmpfs: listxattr should include POSIX ACL xattrs") Reported-by: Marc Aurèle La France Tested-by: Marc Aurèle La France Signed-off-by: Andreas Gruenbacher Cc: stable@vger.kernel.org # v4.5+ Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/xattr.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/fs/xattr.c b/fs/xattr.c index be2ce57cd6ad0..50029811fbe38 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -951,17 +951,19 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, int err = 0; #ifdef CONFIG_FS_POSIX_ACL - if (inode->i_acl) { - err = xattr_list_one(&buffer, &remaining_size, - XATTR_NAME_POSIX_ACL_ACCESS); - if (err) - return err; - } - if (inode->i_default_acl) { - err = xattr_list_one(&buffer, &remaining_size, - XATTR_NAME_POSIX_ACL_DEFAULT); - if (err) - return err; + if (IS_POSIXACL(inode)) { + if (inode->i_acl) { + err = xattr_list_one(&buffer, &remaining_size, + XATTR_NAME_POSIX_ACL_ACCESS); + if (err) + return err; + } + if (inode->i_default_acl) { + err = xattr_list_one(&buffer, &remaining_size, + XATTR_NAME_POSIX_ACL_DEFAULT); + if (err) + return err; + } } #endif -- GitLab From dcdb2262d3892c60d171208cd63071ce69281585 Mon Sep 17 00:00:00 2001 From: Aurelien Aptel Date: Thu, 17 May 2018 16:35:07 +0200 Subject: [PATCH 0796/2269] smb2: fix missing files in root share directory listing commit 0595751f267994c3c7027377058e4185b3a28e75 upstream. When mounting a Windows share that is the root of a drive (eg. C$) the server does not return . and .. directory entries. This results in the smb2 code path erroneously skipping the 2 first entries. Pseudo-code of the readdir() code path: cifs_readdir(struct file, struct dir_context) initiate_cifs_search <-- if no reponse cached yet server->ops->query_dir_first dir_emit_dots dir_emit <-- adds "." and ".." if we're at pos=0 find_cifs_entry initiate_cifs_search <-- if pos < start of current response (restart search) server->ops->query_dir_next <-- if pos > end of current response (fetch next search res) for(...) <-- loops over cur response entries starting at pos cifs_filldir <-- skip . and .., emit entry cifs_fill_dirent dir_emit pos++ A) dir_emit_dots() always adds . & .. and sets the current dir pos to 2 (0 and 1 are done). Therefore we always want the index_to_find to be 2 regardless of if the response has . and .. B) smb1 code initializes index_of_last_entry with a +2 offset in cifssmb.c CIFSFindFirst(): psrch_inf->index_of_last_entry = 2 /* skip . and .. */ + psrch_inf->entries_in_buffer; Later in find_cifs_entry() we want to find the next dir entry at pos=2 as a result of (A) first_entry_in_buffer = cfile->srch_inf.index_of_last_entry - cfile->srch_inf.entries_in_buffer; This var is the dir pos that the first entry in the buffer will have therefore it must be 2 in the first call. If we don't offset index_of_last_entry by 2 (like in (B)), first_entry_in_buffer=0 but we were instructed to get pos=2 so this code in find_cifs_entry() skips the 2 first which is ok for non-root shares, as it skips . and .. from the response but is not ok for root shares where the 2 first are actual files pos_in_buf = index_to_find - first_entry_in_buffer; // pos_in_buf=2 // we skip 2 first response entries :( for (i = 0; (i < (pos_in_buf)) && (cur_ent != NULL); i++) { /* go entry by entry figuring out which is first */ cur_ent = nxt_dir_entry(cur_ent, end_of_smb, cfile->srch_inf.info_level); } C) cifs_filldir() skips . and .. so we can safely ignore them for now. Sample program: int main(int argc, char **argv) { const char *path = argc >= 2 ? argv[1] : "."; DIR *dh; struct dirent *de; printf("listing path <%s>\n", path); dh = opendir(path); if (!dh) { printf("opendir error %d\n", errno); return 1; } while (1) { de = readdir(dh); if (!de) { if (errno) { printf("readdir error %d\n", errno); return 1; } printf("end of listing\n"); break; } printf("off=%lu <%s>\n", de->d_off, de->d_name); } return 0; } Before the fix with SMB1 on root shares: <.> off=1 <..> off=2 <$Recycle.Bin> off=3 off=4 and on non-root shares: <.> off=1 <..> off=4 <-- after adding .., the offsets jumps to +2 because <2536> off=5 we skipped . and .. from response buffer (C) <411> off=6 but still incremented pos off=7 off=8 Therefore the fix for smb2 is to mimic smb1 behaviour and offset the index_of_last_entry by 2. Test results comparing smb1 and smb2 before/after the fix on root share, non-root shares and on large directories (ie. multi-response dir listing): PRE FIX ======= pre-1-root VS pre-2-root: ERR pre-2-root is missing [bootmgr, $Recycle.Bin] pre-1-nonroot VS pre-2-nonroot: OK~ same files, same order, different offsets pre-1-nonroot-large VS pre-2-nonroot-large: OK~ same files, same order, different offsets POST FIX ======== post-1-root VS post-2-root: OK same files, same order, same offsets post-1-nonroot VS post-2-nonroot: OK same files, same order, same offsets post-1-nonroot-large VS post-2-nonroot-large: OK same files, same order, same offsets REGRESSION? =========== pre-1-root VS post-1-root: OK same files, same order, same offsets pre-1-nonroot VS post-1-nonroot: OK same files, same order, same offsets BugLink: https://bugzilla.samba.org/show_bug.cgi?id=13107 Signed-off-by: Aurelien Aptel Signed-off-by: Paulo Alcantara Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 759cbbf7b1af1..4e5b05263e4a3 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1239,7 +1239,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, } srch_inf->entries_in_buffer = 0; - srch_inf->index_of_last_entry = 0; + srch_inf->index_of_last_entry = 2; rc = SMB2_query_directory(xid, tcon, fid->persistent_fid, fid->volatile_fid, 0, srch_inf); -- GitLab From 06f93e40f939f4df9efa57467f1bf195ce50d138 Mon Sep 17 00:00:00 2001 From: "Singh, Brijesh" Date: Thu, 4 Oct 2018 21:40:23 +0000 Subject: [PATCH 0797/2269] iommu/amd: Clear memory encryption mask from physical address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b3e9b515b08e407ab3a026dc2e4d935c48d05f69 upstream. Boris Ostrovsky reported a memory leak with device passthrough when SME is active. The VFIO driver uses iommu_iova_to_phys() to get the physical address for an iova. This physical address is later passed into vfio_unmap_unpin() to unpin the memory. The vfio_unmap_unpin() uses pfn_valid() before unpinning the memory. The pfn_valid() check was failing because encryption mask was part of the physical address returned. This resulted in the memory not being unpinned and therefore leaked after the guest terminates. The memory encryption mask must be cleared from the physical address in iommu_iova_to_phys(). Fixes: 2543a786aa25 ("iommu/amd: Allow the AMD IOMMU to work with memory encryption") Reported-by: Boris Ostrovsky Cc: Tom Lendacky Cc: Joerg Roedel Cc: Cc: Borislav Petkov Cc: Paolo Bonzini Cc: Radim Krčmář Cc: kvm@vger.kernel.org Cc: Boris Ostrovsky Cc: # 4.14+ Signed-off-by: Brijesh Singh Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 01746e7b90de2..9137030423cdc 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3071,7 +3071,7 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, return 0; offset_mask = pte_pgsize - 1; - __pte = *pte & PM_ADDR_MASK; + __pte = __sme_clr(*pte & PM_ADDR_MASK); return (__pte & ~offset_mask) | (iova & offset_mask); } -- GitLab From a5bb359c078a6475caf267f78392137701f9b890 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 4 Oct 2018 11:39:42 +0800 Subject: [PATCH 0798/2269] ALSA: hda/realtek - Cannot adjust speaker's volume on Dell XPS 27 7760 commit 709ae62e8e6d9ac4df7dadb3b8ae432675c45ef9 upstream. The issue is the same as commit dd9aa335c880 ("ALSA: hda/realtek - Can't adjust speaker's volume on a Dell AIO"), the output requires to connect to a node with Amp-out capability. Applying the same fixup ALC298_FIXUP_SPK_VOLUME can fix the issue. BugLink: https://bugs.launchpad.net/bugs/1775068 Signed-off-by: Kai-Heng Feng Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index dcc9e6551b51e..fe5c741fcc6a2 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6288,6 +6288,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0706, "Dell Inspiron 7559", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER), SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE), SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), + SND_PCI_QUIRK(0x1028, 0x075c, "Dell XPS 27 7760", ALC298_FIXUP_SPK_VOLUME), SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME), SND_PCI_QUIRK(0x1028, 0x07b0, "Dell Precision 7520", ALC295_FIXUP_DISABLE_DAC3), SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER), -- GitLab From 90ecb700345ce38c940e6c8b78ee7369bcca82f9 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sat, 22 Sep 2018 20:41:55 -0400 Subject: [PATCH 0799/2269] crypto: qat - Fix KASAN stack-out-of-bounds bug in adf_probe() commit ba439a6cbfa2936a6713f64cb499de7943673fe3 upstream. The following KASAN warning was printed when booting a 64-bit kernel on some systems with Intel CPUs: [ 44.512826] ================================================================== [ 44.520165] BUG: KASAN: stack-out-of-bounds in find_first_bit+0xb0/0xc0 [ 44.526786] Read of size 8 at addr ffff88041e02fc50 by task kworker/0:2/124 [ 44.535253] CPU: 0 PID: 124 Comm: kworker/0:2 Tainted: G X --------- --- 4.18.0-12.el8.x86_64+debug #1 [ 44.545858] Hardware name: Intel Corporation PURLEY/PURLEY, BIOS BKVDTRL1.86B.0005.D08.1712070559 12/07/2017 [ 44.555682] Workqueue: events work_for_cpu_fn [ 44.560043] Call Trace: [ 44.562502] dump_stack+0x9a/0xe9 [ 44.565832] print_address_description+0x65/0x22e [ 44.570683] ? find_first_bit+0xb0/0xc0 [ 44.570689] kasan_report.cold.6+0x92/0x19f [ 44.578726] find_first_bit+0xb0/0xc0 [ 44.578737] adf_probe+0x9eb/0x19a0 [qat_c62x] [ 44.578751] ? adf_remove+0x110/0x110 [qat_c62x] [ 44.591490] ? mark_held_locks+0xc8/0x140 [ 44.591498] ? _raw_spin_unlock+0x30/0x30 [ 44.591505] ? trace_hardirqs_on_caller+0x381/0x570 [ 44.604418] ? adf_remove+0x110/0x110 [qat_c62x] [ 44.604427] local_pci_probe+0xd4/0x180 [ 44.604432] ? pci_device_shutdown+0x110/0x110 [ 44.617386] work_for_cpu_fn+0x51/0xa0 [ 44.621145] process_one_work+0x8fe/0x16e0 [ 44.625263] ? pwq_dec_nr_in_flight+0x2d0/0x2d0 [ 44.629799] ? lock_acquire+0x14c/0x400 [ 44.633645] ? move_linked_works+0x12e/0x2a0 [ 44.637928] worker_thread+0x536/0xb50 [ 44.641690] ? __kthread_parkme+0xb6/0x180 [ 44.645796] ? process_one_work+0x16e0/0x16e0 [ 44.650160] kthread+0x30c/0x3d0 [ 44.653400] ? kthread_create_worker_on_cpu+0xc0/0xc0 [ 44.658457] ret_from_fork+0x3a/0x50 [ 44.663557] The buggy address belongs to the page: [ 44.668350] page:ffffea0010780bc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 [ 44.676356] flags: 0x17ffffc0000000() [ 44.680023] raw: 0017ffffc0000000 ffffea0010780bc8 ffffea0010780bc8 0000000000000000 [ 44.687769] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 [ 44.695510] page dumped because: kasan: bad access detected [ 44.702578] Memory state around the buggy address: [ 44.707372] ffff88041e02fb00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 44.714593] ffff88041e02fb80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 44.721810] >ffff88041e02fc00: 00 00 00 00 00 00 f1 f1 f1 f1 04 f2 f2 f2 f2 f2 [ 44.729028] ^ [ 44.734864] ffff88041e02fc80: f2 f2 00 00 00 00 f3 f3 f3 f3 00 00 00 00 00 00 [ 44.742082] ffff88041e02fd00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 44.749299] ================================================================== Looking into the code: int ret, bar_mask; : for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask, It is casting a 32-bit integer pointer to a 64-bit unsigned long pointer. There are two problems here. First, the 32-bit pointer address may not be 64-bit aligned. Secondly, it is accessing an extra 4 bytes. This is fixed by changing the bar_mask type to unsigned long. Cc: Signed-off-by: Waiman Long Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/qat/qat_c3xxx/adf_drv.c | 6 +++--- drivers/crypto/qat/qat_c3xxxvf/adf_drv.c | 6 +++--- drivers/crypto/qat/qat_c62x/adf_drv.c | 6 +++--- drivers/crypto/qat/qat_c62xvf/adf_drv.c | 6 +++--- drivers/crypto/qat/qat_dh895xcc/adf_drv.c | 6 +++--- drivers/crypto/qat/qat_dh895xccvf/adf_drv.c | 6 +++--- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/crypto/qat/qat_c3xxx/adf_drv.c b/drivers/crypto/qat/qat_c3xxx/adf_drv.c index f172171668ee9..7c470ae97f60a 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_drv.c @@ -123,7 +123,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct adf_hw_device_data *hw_data; char name[ADF_DEVICE_NAME_LENGTH]; unsigned int i, bar_nr; - int ret, bar_mask; + unsigned long bar_mask; + int ret; switch (ent->device) { case ADF_C3XXX_PCI_DEVICE_ID: @@ -235,8 +236,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Find and map all the device's BARS */ i = 0; bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); - for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask, - ADF_PCI_MAX_BARS * 2) { + for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) { struct adf_bar *bar = &accel_pci_dev->pci_bars[i++]; bar->base_addr = pci_resource_start(pdev, bar_nr); diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c index 24ec908eb26c2..613c7d5644ced 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c @@ -125,7 +125,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct adf_hw_device_data *hw_data; char name[ADF_DEVICE_NAME_LENGTH]; unsigned int i, bar_nr; - int ret, bar_mask; + unsigned long bar_mask; + int ret; switch (ent->device) { case ADF_C3XXXIOV_PCI_DEVICE_ID: @@ -215,8 +216,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Find and map all the device's BARS */ i = 0; bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); - for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask, - ADF_PCI_MAX_BARS * 2) { + for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) { struct adf_bar *bar = &accel_pci_dev->pci_bars[i++]; bar->base_addr = pci_resource_start(pdev, bar_nr); diff --git a/drivers/crypto/qat/qat_c62x/adf_drv.c b/drivers/crypto/qat/qat_c62x/adf_drv.c index 58a984c9c3ec8..cb11d85d7bb3d 100644 --- a/drivers/crypto/qat/qat_c62x/adf_drv.c +++ b/drivers/crypto/qat/qat_c62x/adf_drv.c @@ -123,7 +123,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct adf_hw_device_data *hw_data; char name[ADF_DEVICE_NAME_LENGTH]; unsigned int i, bar_nr; - int ret, bar_mask; + unsigned long bar_mask; + int ret; switch (ent->device) { case ADF_C62X_PCI_DEVICE_ID: @@ -235,8 +236,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Find and map all the device's BARS */ i = (hw_data->fuses & ADF_DEVICE_FUSECTL_MASK) ? 1 : 0; bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); - for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask, - ADF_PCI_MAX_BARS * 2) { + for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) { struct adf_bar *bar = &accel_pci_dev->pci_bars[i++]; bar->base_addr = pci_resource_start(pdev, bar_nr); diff --git a/drivers/crypto/qat/qat_c62xvf/adf_drv.c b/drivers/crypto/qat/qat_c62xvf/adf_drv.c index b9f3e0e4fde97..278452b8ef81c 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c62xvf/adf_drv.c @@ -125,7 +125,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct adf_hw_device_data *hw_data; char name[ADF_DEVICE_NAME_LENGTH]; unsigned int i, bar_nr; - int ret, bar_mask; + unsigned long bar_mask; + int ret; switch (ent->device) { case ADF_C62XIOV_PCI_DEVICE_ID: @@ -215,8 +216,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Find and map all the device's BARS */ i = 0; bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); - for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask, - ADF_PCI_MAX_BARS * 2) { + for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) { struct adf_bar *bar = &accel_pci_dev->pci_bars[i++]; bar->base_addr = pci_resource_start(pdev, bar_nr); diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c index 2ce01f010c743..07b741aed108a 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c @@ -123,7 +123,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct adf_hw_device_data *hw_data; char name[ADF_DEVICE_NAME_LENGTH]; unsigned int i, bar_nr; - int ret, bar_mask; + unsigned long bar_mask; + int ret; switch (ent->device) { case ADF_DH895XCC_PCI_DEVICE_ID: @@ -237,8 +238,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Find and map all the device's BARS */ i = 0; bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); - for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask, - ADF_PCI_MAX_BARS * 2) { + for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) { struct adf_bar *bar = &accel_pci_dev->pci_bars[i++]; bar->base_addr = pci_resource_start(pdev, bar_nr); diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c index 26ab17bfc6dab..3da0f951cb590 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c @@ -125,7 +125,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct adf_hw_device_data *hw_data; char name[ADF_DEVICE_NAME_LENGTH]; unsigned int i, bar_nr; - int ret, bar_mask; + unsigned long bar_mask; + int ret; switch (ent->device) { case ADF_DH895XCCIOV_PCI_DEVICE_ID: @@ -215,8 +216,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Find and map all the device's BARS */ i = 0; bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); - for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask, - ADF_PCI_MAX_BARS * 2) { + for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) { struct adf_bar *bar = &accel_pci_dev->pci_bars[i++]; bar->base_addr = pci_resource_start(pdev, bar_nr); -- GitLab From 3b1a8535b8e13c7e3493f042d02226328bbceff0 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Fri, 21 Sep 2018 18:03:18 +0300 Subject: [PATCH 0800/2269] crypto: mxs-dcp - Fix wait logic on chan threads commit d80771c08363ad7fbf0f56f5301e7ca65065c582 upstream. When compiling with CONFIG_DEBUG_ATOMIC_SLEEP=y the mxs-dcp driver prints warnings such as: WARNING: CPU: 0 PID: 120 at kernel/sched/core.c:7736 __might_sleep+0x98/0x9c do not call blocking ops when !TASK_RUNNING; state=1 set at [<8081978c>] dcp_chan_thread_sha+0x3c/0x2ec The problem is that blocking ops will manipulate current->state themselves so it is not allowed to call them between set_current_state(TASK_INTERRUPTIBLE) and schedule(). Fix this by converting the per-chan mutex to a spinlock (it only protects tiny list ops anyway) and rearranging the wait logic so that callbacks are called current->state as TASK_RUNNING. Those callbacks will indeed call blocking ops themselves so this is required. Cc: Signed-off-by: Leonard Crestez Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/mxs-dcp.c | 53 +++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index 764be3e6933c8..a98a25733a222 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -63,7 +63,7 @@ struct dcp { struct dcp_coherent_block *coh; struct completion completion[DCP_MAX_CHANS]; - struct mutex mutex[DCP_MAX_CHANS]; + spinlock_t lock[DCP_MAX_CHANS]; struct task_struct *thread[DCP_MAX_CHANS]; struct crypto_queue queue[DCP_MAX_CHANS]; }; @@ -349,13 +349,20 @@ static int dcp_chan_thread_aes(void *data) int ret; - do { - __set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); - mutex_lock(&sdcp->mutex[chan]); + spin_lock(&sdcp->lock[chan]); backlog = crypto_get_backlog(&sdcp->queue[chan]); arq = crypto_dequeue_request(&sdcp->queue[chan]); - mutex_unlock(&sdcp->mutex[chan]); + spin_unlock(&sdcp->lock[chan]); + + if (!backlog && !arq) { + schedule(); + continue; + } + + set_current_state(TASK_RUNNING); if (backlog) backlog->complete(backlog, -EINPROGRESS); @@ -363,11 +370,8 @@ static int dcp_chan_thread_aes(void *data) if (arq) { ret = mxs_dcp_aes_block_crypt(arq); arq->complete(arq, ret); - continue; } - - schedule(); - } while (!kthread_should_stop()); + } return 0; } @@ -409,9 +413,9 @@ static int mxs_dcp_aes_enqueue(struct ablkcipher_request *req, int enc, int ecb) rctx->ecb = ecb; actx->chan = DCP_CHAN_CRYPTO; - mutex_lock(&sdcp->mutex[actx->chan]); + spin_lock(&sdcp->lock[actx->chan]); ret = crypto_enqueue_request(&sdcp->queue[actx->chan], &req->base); - mutex_unlock(&sdcp->mutex[actx->chan]); + spin_unlock(&sdcp->lock[actx->chan]); wake_up_process(sdcp->thread[actx->chan]); @@ -640,13 +644,20 @@ static int dcp_chan_thread_sha(void *data) struct ahash_request *req; int ret, fini; - do { - __set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); - mutex_lock(&sdcp->mutex[chan]); + spin_lock(&sdcp->lock[chan]); backlog = crypto_get_backlog(&sdcp->queue[chan]); arq = crypto_dequeue_request(&sdcp->queue[chan]); - mutex_unlock(&sdcp->mutex[chan]); + spin_unlock(&sdcp->lock[chan]); + + if (!backlog && !arq) { + schedule(); + continue; + } + + set_current_state(TASK_RUNNING); if (backlog) backlog->complete(backlog, -EINPROGRESS); @@ -658,12 +669,8 @@ static int dcp_chan_thread_sha(void *data) ret = dcp_sha_req_to_buf(arq); fini = rctx->fini; arq->complete(arq, ret); - if (!fini) - continue; } - - schedule(); - } while (!kthread_should_stop()); + } return 0; } @@ -721,9 +728,9 @@ static int dcp_sha_update_fx(struct ahash_request *req, int fini) rctx->init = 1; } - mutex_lock(&sdcp->mutex[actx->chan]); + spin_lock(&sdcp->lock[actx->chan]); ret = crypto_enqueue_request(&sdcp->queue[actx->chan], &req->base); - mutex_unlock(&sdcp->mutex[actx->chan]); + spin_unlock(&sdcp->lock[actx->chan]); wake_up_process(sdcp->thread[actx->chan]); mutex_unlock(&actx->mutex); @@ -983,7 +990,7 @@ static int mxs_dcp_probe(struct platform_device *pdev) platform_set_drvdata(pdev, sdcp); for (i = 0; i < DCP_MAX_CHANS; i++) { - mutex_init(&sdcp->mutex[i]); + spin_lock_init(&sdcp->lock[i]); init_completion(&sdcp->completion[i]); crypto_init_queue(&sdcp->queue[i], 50); } -- GitLab From 1df517a4cafde215d3e7f853eb2b6edb7cd72dab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Fri, 14 Sep 2018 18:34:28 +0300 Subject: [PATCH 0801/2269] crypto: caam/jr - fix ablkcipher_edesc pointer arithmetic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 13cc6f48c7434ce46ba6dbc90003a136a263d75a upstream. In some cases the zero-length hw_desc array at the end of ablkcipher_edesc struct requires for 4B of tail padding. Due to tail padding and the way pointers to S/G table and IV are computed: edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) + desc_bytes; iv = (u8 *)edesc->hw_desc + desc_bytes + sec4_sg_bytes; first 4 bytes of IV are overwritten by S/G table. Update computation of pointer to S/G table to rely on offset of hw_desc member and not on sizeof() operator. Cc: # 4.13+ Fixes: 115957bb3e59 ("crypto: caam - fix IV DMA mapping and updating") Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/caam/caamalg.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index a8a2a271b63d2..43fe195f6dca5 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -1511,8 +1511,8 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request edesc->src_nents = src_nents; edesc->dst_nents = dst_nents; edesc->sec4_sg_bytes = sec4_sg_bytes; - edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) + - desc_bytes; + edesc->sec4_sg = (struct sec4_sg_entry *)((u8 *)edesc->hw_desc + + desc_bytes); edesc->iv_dir = DMA_TO_DEVICE; /* Make sure IV is located in a DMAable area */ @@ -1715,8 +1715,8 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc( edesc->src_nents = src_nents; edesc->dst_nents = dst_nents; edesc->sec4_sg_bytes = sec4_sg_bytes; - edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) + - desc_bytes; + edesc->sec4_sg = (struct sec4_sg_entry *)((u8 *)edesc->hw_desc + + desc_bytes); edesc->iv_dir = DMA_FROM_DEVICE; /* Make sure IV is located in a DMAable area */ -- GitLab From 119bf9470be955060893fa1072b097053e8f48a1 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Thu, 13 Sep 2018 15:37:04 +0200 Subject: [PATCH 0802/2269] gpiolib: Free the last requested descriptor commit 19a4fbffc94e41abaa2a623a25ce2641d69eccf0 upstream. The current code only frees N-1 gpios if an error occurs during gpiod_set_transitory, gpiod_direction_output or gpiod_direction_input. Leading to gpios that cannot be used by userspace nor other drivers. Cc: Timur Tabi Cc: stable@vger.kernel.org Fixes: ab3dbcf78f60f46d ("gpioib: do not free unrequested descriptors) Reported-by: Jan Lorenzen Reported-by: Jim Paris Signed-off-by: Ricardo Ribalda Delgado Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 7e0bfd7347f6c..7d5de4ef4f22b 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -489,7 +489,7 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip) if (ret) goto out_free_descs; lh->descs[i] = desc; - count = i; + count = i + 1; if (lflags & GPIOHANDLE_REQUEST_ACTIVE_LOW) set_bit(FLAG_ACTIVE_LOW, &desc->flags); -- GitLab From 1d24e2609002ccab8e00bb23dbd3abc94566091f Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 17 Sep 2018 04:14:54 +0000 Subject: [PATCH 0803/2269] Drivers: hv: vmbus: Use get/put_cpu() in vmbus_connect() commit 41e270f6898e7502be9fd6920ee0a108ca259d36 upstream. With CONFIG_DEBUG_PREEMPT=y, I always see this warning: BUG: using smp_processor_id() in preemptible [00000000] Fix the false warning by using get/put_cpu(). Here vmbus_connect() sends a message to the host and waits for the host's response. The host will deliver the response message and an interrupt on CPU msg->target_vcpu, and later the interrupt handler will wake up vmbus_connect(). vmbus_connect() doesn't really have to run on the same cpu as CPU msg->target_vcpu, so it's safe to call put_cpu() just here. Signed-off-by: Dexuan Cui Cc: stable@vger.kernel.org Cc: K. Y. Srinivasan Cc: Haiyang Zhang Cc: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/connection.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index f41901f80b645..5449fc59b7f5e 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -74,6 +74,7 @@ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, __u32 version) { int ret = 0; + unsigned int cur_cpu; struct vmbus_channel_initiate_contact *msg; unsigned long flags; @@ -96,9 +97,10 @@ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, * the CPU attempting to connect may not be CPU 0. */ if (version >= VERSION_WIN8_1) { - msg->target_vcpu = - hv_cpu_number_to_vp_number(smp_processor_id()); - vmbus_connection.connect_cpu = smp_processor_id(); + cur_cpu = get_cpu(); + msg->target_vcpu = hv_cpu_number_to_vp_number(cur_cpu); + vmbus_connection.connect_cpu = cur_cpu; + put_cpu(); } else { msg->target_vcpu = 0; vmbus_connection.connect_cpu = 0; -- GitLab From 4de0fb95a2875252ab023f63c44762c7219b6da6 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 17 Sep 2018 04:14:55 +0000 Subject: [PATCH 0804/2269] tools: hv: fcopy: set 'error' in case an unknown operation was requested commit c2d68afba86d1ff01e7300c68bc16a9234dcd8e9 upstream. 'error' variable is left uninitialized in case we see an unknown operation. As we don't immediately return and proceed to pwrite() we need to set it to something, HV_E_FAIL sounds good enough. Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Cc: stable Signed-off-by: Greg Kroah-Hartman --- tools/hv/hv_fcopy_daemon.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/hv/hv_fcopy_daemon.c b/tools/hv/hv_fcopy_daemon.c index 785f4e95148cf..7a1039c15e7d9 100644 --- a/tools/hv/hv_fcopy_daemon.c +++ b/tools/hv/hv_fcopy_daemon.c @@ -233,6 +233,7 @@ int main(int argc, char *argv[]) break; default: + error = HV_E_FAIL; syslog(LOG_ERR, "Unknown operation: %d", buffer.hdr.operation); -- GitLab From f8566a92ab75d442a823453414c6158b0b3c5ce7 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 5 Oct 2018 15:51:58 -0700 Subject: [PATCH 0805/2269] proc: restrict kernel stack dumps to root commit f8a00cef17206ecd1b30d3d9f99e10d9fa707aa7 upstream. Currently, you can use /proc/self/task/*/stack to cause a stack walk on a task you control while it is running on another CPU. That means that the stack can change under the stack walker. The stack walker does have guards against going completely off the rails and into random kernel memory, but it can interpret random data from your kernel stack as instruction pointers and stack pointers. This can cause exposure of kernel stack contents to userspace. Restrict the ability to inspect kernel stacks of arbitrary tasks to root in order to prevent a local attacker from exploiting racy stack unwinding to leak kernel task stack contents. See the added comment for a longer rationale. There don't seem to be any users of this userspace API that can't gracefully bail out if reading from the file fails. Therefore, I believe that this change is unlikely to break things. In the case that this patch does end up needing a revert, the next-best solution might be to fake a single-entry stack based on wchan. Link: http://lkml.kernel.org/r/20180927153316.200286-1-jannh@google.com Fixes: 2ec220e27f50 ("proc: add /proc/*/stack") Signed-off-by: Jann Horn Acked-by: Kees Cook Cc: Alexey Dobriyan Cc: Ken Chen Cc: Will Deacon Cc: Laura Abbott Cc: Andy Lutomirski Cc: Catalin Marinas Cc: Josh Poimboeuf Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H . Peter Anvin" Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/proc/base.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/proc/base.c b/fs/proc/base.c index c5c42f3e33d1c..9063738ff1f03 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -431,6 +431,20 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, int err; int i; + /* + * The ability to racily run the kernel stack unwinder on a running task + * and then observe the unwinder output is scary; while it is useful for + * debugging kernel issues, it can also allow an attacker to leak kernel + * stack contents. + * Doing this in a manner that is at least safe from races would require + * some work to ensure that the remote task can not be scheduled; and + * even then, this would still expose the unwinder as local attack + * surface. + * Therefore, this interface is restricted to root. + */ + if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN)) + return -EACCES; + entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); if (!entries) return -ENOMEM; -- GitLab From de0e2a92ccc5460eb8dd115de891cde63ae552bf Mon Sep 17 00:00:00 2001 From: Ashish Samant Date: Fri, 5 Oct 2018 15:52:15 -0700 Subject: [PATCH 0806/2269] ocfs2: fix locking for res->tracking and dlm->tracking_list commit cbe355f57c8074bc4f452e5b6e35509044c6fa23 upstream. In dlm_init_lockres() we access and modify res->tracking and dlm->tracking_list without holding dlm->track_lock. This can cause list corruptions and can end up in kernel panic. Fix this by locking res->tracking and dlm->tracking_list with dlm->track_lock instead of dlm->spinlock. Link: http://lkml.kernel.org/r/1529951192-4686-1-git-send-email-ashish.samant@oracle.com Signed-off-by: Ashish Samant Reviewed-by: Changwei Ge Acked-by: Joseph Qi Acked-by: Jun Piao Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/dlm/dlmmaster.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 3e04279446e8d..44e7d180ebec9 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -589,9 +589,9 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, res->last_used = 0; - spin_lock(&dlm->spinlock); + spin_lock(&dlm->track_lock); list_add_tail(&res->tracking, &dlm->tracking_list); - spin_unlock(&dlm->spinlock); + spin_unlock(&dlm->track_lock); memset(res->lvb, 0, DLM_LVB_LEN); memset(res->refmap, 0, sizeof(res->refmap)); -- GitLab From 07f79b39d474bab2288ef68dc8e64683758fa0ec Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 3 Oct 2018 11:30:35 -0700 Subject: [PATCH 0807/2269] ixgbe: check return value of napi_complete_done() commit 4233cfe6ec4683497d7318f55ce7617e97f2e610 upstream. The NIC driver should only enable interrupts when napi_complete_done() returns true. This patch adds the check for ixgbe. Cc: stable@vger.kernel.org # 4.10+ Suggested-by: Eric Dumazet Signed-off-by: Song Liu Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index b68d94b49a8a6..42183a8b649c7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -3108,11 +3108,13 @@ int ixgbe_poll(struct napi_struct *napi, int budget) return budget; /* all work done, exit the polling mode */ - napi_complete_done(napi, work_done); - if (adapter->rx_itr_setting & 1) - ixgbe_set_itr(q_vector); - if (!test_bit(__IXGBE_DOWN, &adapter->state)) - ixgbe_irq_enable_queues(adapter, BIT_ULL(q_vector->v_idx)); + if (likely(napi_complete_done(napi, work_done))) { + if (adapter->rx_itr_setting & 1) + ixgbe_set_itr(q_vector); + if (!test_bit(__IXGBE_DOWN, &adapter->state)) + ixgbe_irq_enable_queues(adapter, + BIT_ULL(q_vector->v_idx)); + } return min(work_done, budget - 1); } -- GitLab From 4e7ea65127acb4728b975e42764c32455a1aa9f5 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 13 Sep 2018 21:16:20 -0400 Subject: [PATCH 0808/2269] dm thin metadata: fix __udivdi3 undefined on 32-bit commit 013ad043906b2befd4a9bfb06219ed9fedd92716 upstream. sector_div() is only viable for use with sector_t. dm_block_t is typedef'd to uint64_t -- so use div_u64() instead. Fixes: 3ab918281 ("dm thin metadata: try to avoid ever aborting transactions") Signed-off-by: Mike Snitzer Cc: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-thin-metadata.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 9f621b0c0e172..45ff8fd00248d 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -841,10 +841,8 @@ static void __set_metadata_reserve(struct dm_pool_metadata *pmd) if (r) { DMERR("could not get size of metadata device"); pmd->metadata_reserve = max_blocks; - } else { - sector_div(total, 10); - pmd->metadata_reserve = min(max_blocks, total); - } + } else + pmd->metadata_reserve = min(max_blocks, div_u64(total, 10)); } struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, -- GitLab From 8e6a9240b1918c31a90e5d0a02c467ca68b160c6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 10 Oct 2018 08:54:28 +0200 Subject: [PATCH 0809/2269] Linux 4.14.75 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index cc0e65a8d7bf3..7fc373c011c0f 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 74 +SUBLEVEL = 75 EXTRAVERSION = NAME = Petit Gorille -- GitLab From ab18409cf05f8e3d20ae3e5347f431947ecc397c Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Wed, 19 Sep 2018 10:29:06 -0700 Subject: [PATCH 0810/2269] perf/core: Add sanity check to deal with pinned event failure commit befb1b3c2703897c5b8ffb0044dc5d0e5f27c5d7 upstream. It is possible that a failure can occur during the scheduling of a pinned event. The initial portion of perf_event_read_local() contains the various error checks an event should pass before it can be considered valid. Ensure that the potential scheduling failure of a pinned event is checked for and have a credible error. Suggested-by: Peter Zijlstra Signed-off-by: Reinette Chatre Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: fenghua.yu@intel.com Cc: tony.luck@intel.com Cc: acme@kernel.org Cc: gavin.hindman@intel.com Cc: jithu.joseph@intel.com Cc: dave.hansen@intel.com Cc: hpa@zytor.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/6486385d1f30336e9973b24c8c65f5079543d3d3.1537377064.git.reinette.chatre@intel.com Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 812ebf1cbb876..4dbce29a9313d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3757,6 +3757,12 @@ int perf_event_read_local(struct perf_event *event, u64 *value) goto out; } + /* If this is a pinned event it must be running on this CPU */ + if (event->attr.pinned && event->oncpu != smp_processor_id()) { + ret = -EBUSY; + goto out; + } + /* * If the event is currently on this CPU, its either a per-task event, * or local to this CPU. Furthermore it means its ACTIVE (otherwise -- GitLab From 5f4f5b1f4491627f83d6b1bd2ac7b7c5b87e5f0e Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Fri, 5 Oct 2018 15:51:29 -0700 Subject: [PATCH 0811/2269] mm: migration: fix migration of huge PMD shared pages commit 017b1660df89f5fb4bfe66c34e35f7d2031100c7 upstream. The page migration code employs try_to_unmap() to try and unmap the source page. This is accomplished by using rmap_walk to find all vmas where the page is mapped. This search stops when page mapcount is zero. For shared PMD huge pages, the page map count is always 1 no matter the number of mappings. Shared mappings are tracked via the reference count of the PMD page. Therefore, try_to_unmap stops prematurely and does not completely unmap all mappings of the source page. This problem can result is data corruption as writes to the original source page can happen after contents of the page are copied to the target page. Hence, data is lost. This problem was originally seen as DB corruption of shared global areas after a huge page was soft offlined due to ECC memory errors. DB developers noticed they could reproduce the issue by (hotplug) offlining memory used to back huge pages. A simple testcase can reproduce the problem by creating a shared PMD mapping (note that this must be at least PUD_SIZE in size and PUD_SIZE aligned (1GB on x86)), and using migrate_pages() to migrate process pages between nodes while continually writing to the huge pages being migrated. To fix, have the try_to_unmap_one routine check for huge PMD sharing by calling huge_pmd_unshare for hugetlbfs huge pages. If it is a shared mapping it will be 'unshared' which removes the page table entry and drops the reference on the PMD page. After this, flush caches and TLB. mmu notifiers are called before locking page tables, but we can not be sure of PMD sharing until page tables are locked. Therefore, check for the possibility of PMD sharing before locking so that notifiers can prepare for the worst possible case. Link: http://lkml.kernel.org/r/20180823205917.16297-2-mike.kravetz@oracle.com [mike.kravetz@oracle.com: make _range_in_vma() a static inline] Link: http://lkml.kernel.org/r/6063f215-a5c8-2f0c-465a-2c515ddc952d@oracle.com Fixes: 39dde65c9940 ("shared page table for hugetlb page") Signed-off-by: Mike Kravetz Acked-by: Kirill A. Shutemov Reviewed-by: Naoya Horiguchi Acked-by: Michal Hocko Cc: Vlastimil Babka Cc: Davidlohr Bueso Cc: Jerome Glisse Cc: Mike Kravetz Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/hugetlb.h | 14 ++++++++++++++ include/linux/mm.h | 6 ++++++ mm/hugetlb.c | 37 ++++++++++++++++++++++++++++++++++-- mm/rmap.c | 42 ++++++++++++++++++++++++++++++++++++++--- 4 files changed, 94 insertions(+), 5 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 82a25880714ac..7aa2de25c09cf 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -140,6 +140,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz); int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep); +void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, + unsigned long *start, unsigned long *end); struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, int write); struct page *follow_huge_pd(struct vm_area_struct *vma, @@ -169,6 +171,18 @@ static inline unsigned long hugetlb_total_pages(void) return 0; } +static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, + pte_t *ptep) +{ + return 0; +} + +static inline void adjust_range_if_pmd_sharing_possible( + struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) +{ +} + #define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n) ({ BUG(); 0; }) #define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL) #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) diff --git a/include/linux/mm.h b/include/linux/mm.h index a26cf767407e1..58f2263de4de5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2322,6 +2322,12 @@ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm, return vma; } +static inline bool range_in_vma(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + return (vma && vma->vm_start <= start && end <= vma->vm_end); +} + #ifdef CONFIG_MMU pgprot_t vm_get_page_prot(unsigned long vm_flags); void vma_set_page_prot(struct vm_area_struct *vma); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index dfd2947e046e7..9801dc0250e20 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4517,12 +4517,40 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr) /* * check on proper vm_flags and page table alignment */ - if (vma->vm_flags & VM_MAYSHARE && - vma->vm_start <= base && end <= vma->vm_end) + if (vma->vm_flags & VM_MAYSHARE && range_in_vma(vma, base, end)) return true; return false; } +/* + * Determine if start,end range within vma could be mapped by shared pmd. + * If yes, adjust start and end to cover range associated with possible + * shared pmd mappings. + */ +void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) +{ + unsigned long check_addr = *start; + + if (!(vma->vm_flags & VM_MAYSHARE)) + return; + + for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) { + unsigned long a_start = check_addr & PUD_MASK; + unsigned long a_end = a_start + PUD_SIZE; + + /* + * If sharing is possible, adjust start/end if necessary. + */ + if (range_in_vma(vma, a_start, a_end)) { + if (a_start < *start) + *start = a_start; + if (a_end > *end) + *end = a_end; + } + } +} + /* * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() * and returns the corresponding pte. While this is not necessary for the @@ -4620,6 +4648,11 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) { return 0; } + +void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) +{ +} #define want_pmd_share() (0) #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ diff --git a/mm/rmap.c b/mm/rmap.c index 97edcf44d88c3..8bd2ddd8febd5 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1358,11 +1358,21 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, } /* - * We have to assume the worse case ie pmd for invalidation. Note that - * the page can not be free in this function as call of try_to_unmap() - * must hold a reference on the page. + * For THP, we have to assume the worse case ie pmd for invalidation. + * For hugetlb, it could be much worse if we need to do pud + * invalidation in the case of pmd sharing. + * + * Note that the page can not be free in this function as call of + * try_to_unmap() must hold a reference on the page. */ end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page))); + if (PageHuge(page)) { + /* + * If sharing is possible, start and end will be adjusted + * accordingly. + */ + adjust_range_if_pmd_sharing_possible(vma, &start, &end); + } mmu_notifier_invalidate_range_start(vma->vm_mm, start, end); while (page_vma_mapped_walk(&pvmw)) { @@ -1408,6 +1418,32 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); address = pvmw.address; + if (PageHuge(page)) { + if (huge_pmd_unshare(mm, &address, pvmw.pte)) { + /* + * huge_pmd_unshare unmapped an entire PMD + * page. There is no way of knowing exactly + * which PMDs may be cached for this mm, so + * we must flush them all. start/end were + * already adjusted above to cover this range. + */ + flush_cache_range(vma, start, end); + flush_tlb_range(vma, start, end); + mmu_notifier_invalidate_range(mm, start, end); + + /* + * The ref count of the PMD page was dropped + * which is part of the way map counting + * is done for shared PMDs. Return 'true' + * here. When there is no other sharing, + * huge_pmd_unshare returns false and we will + * unmap the actual page and drop map count + * to zero. + */ + page_vma_mapped_walk_done(&pvmw); + break; + } + } if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) && -- GitLab From a2e0493f99e6a65753ec32c39b17c0d1b370f9de Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 5 Oct 2018 15:51:41 -0700 Subject: [PATCH 0812/2269] mm, thp: fix mlocking THP page with migration enabled commit e125fe405abedc1dc8a5b2229b80ee91c1434015 upstream. A transparent huge page is represented by a single entry on an LRU list. Therefore, we can only make unevictable an entire compound page, not individual subpages. If a user tries to mlock() part of a huge page, we want the rest of the page to be reclaimable. We handle this by keeping PTE-mapped huge pages on normal LRU lists: the PMD on border of VM_LOCKED VMA will be split into PTE table. Introduction of THP migration breaks[1] the rules around mlocking THP pages. If we had a single PMD mapping of the page in mlocked VMA, the page will get mlocked, regardless of PTE mappings of the page. For tmpfs/shmem it's easy to fix by checking PageDoubleMap() in remove_migration_pmd(). Anon THP pages can only be shared between processes via fork(). Mlocked page can only be shared if parent mlocked it before forking, otherwise CoW will be triggered on mlock(). For Anon-THP, we can fix the issue by munlocking the page on removing PTE migration entry for the page. PTEs for the page will always come after mlocked PMD: rmap walks VMAs from oldest to newest. Test-case: #include #include #include #include #include int main(void) { unsigned long nodemask = 4; void *addr; addr = mmap((void *)0x20000000UL, 2UL << 20, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_LOCKED, -1, 0); if (fork()) { wait(NULL); return 0; } mlock(addr, 4UL << 10); mbind(addr, 2UL << 20, MPOL_PREFERRED | MPOL_F_RELATIVE_NODES, &nodemask, 4, MPOL_MF_MOVE); return 0; } [1] https://lkml.kernel.org/r/CAOMGZ=G52R-30rZvhGxEbkTw7rLLwBGadVYeo--iizcD3upL3A@mail.gmail.com Link: http://lkml.kernel.org/r/20180917133816.43995-1-kirill.shutemov@linux.intel.com Fixes: 616b8371539a ("mm: thp: enable thp migration in generic path") Signed-off-by: Kirill A. Shutemov Reported-by: Vegard Nossum Reviewed-by: Zi Yan Cc: Naoya Horiguchi Cc: Vlastimil Babka Cc: Andrea Arcangeli Cc: [4.14+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 2 +- mm/migrate.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 255469f78217b..174612f8339cf 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2886,7 +2886,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) flush_cache_range(vma, mmun_start, mmun_start + HPAGE_PMD_SIZE); page_add_anon_rmap(new, vma, mmun_start, true); set_pmd_at(mm, mmun_start, pvmw->pmd, pmde); - if (vma->vm_flags & VM_LOCKED) + if ((vma->vm_flags & VM_LOCKED) && !PageDoubleMap(new)) mlock_vma_page(new); update_mmu_cache_pmd(vma, address, pvmw->pmd); } diff --git a/mm/migrate.c b/mm/migrate.c index 1236449b4777b..cbb025239071c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -274,6 +274,9 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma, if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new)) mlock_vma_page(new); + if (PageTransHuge(page) && PageMlocked(page)) + clear_page_mlock(page); + /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, pvmw.address, pvmw.pte); } -- GitLab From 5178716b55c4d209b3fb624633086371e482a185 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 5 Oct 2018 15:52:07 -0700 Subject: [PATCH 0813/2269] mm/vmstat.c: skip NR_TLB_REMOTE_FLUSH* properly commit 58bc4c34d249bf1bc50730a9a209139347cfacfe upstream. 5dd0b16cdaff ("mm/vmstat: Make NR_TLB_REMOTE_FLUSH_RECEIVED available even on UP") made the availability of the NR_TLB_REMOTE_FLUSH* counters inside the kernel unconditional to reduce #ifdef soup, but (either to avoid showing dummy zero counters to userspace, or because that code was missed) didn't update the vmstat_array, meaning that all following counters would be shown with incorrect values. This only affects kernel builds with CONFIG_VM_EVENT_COUNTERS=y && CONFIG_DEBUG_TLBFLUSH=y && CONFIG_SMP=n. Link: http://lkml.kernel.org/r/20181001143138.95119-2-jannh@google.com Fixes: 5dd0b16cdaff ("mm/vmstat: Make NR_TLB_REMOTE_FLUSH_RECEIVED available even on UP") Signed-off-by: Jann Horn Reviewed-by: Kees Cook Reviewed-by: Andrew Morton Acked-by: Michal Hocko Acked-by: Roman Gushchin Cc: Davidlohr Bueso Cc: Oleg Nesterov Cc: Christoph Lameter Cc: Kemi Wang Cc: Andy Lutomirski Cc: Ingo Molnar Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/vmstat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/vmstat.c b/mm/vmstat.c index 4bb13e72ac97c..2bdc962b2dfe9 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1203,6 +1203,9 @@ const char * const vmstat_text[] = { #ifdef CONFIG_SMP "nr_tlb_remote_flush", "nr_tlb_remote_flush_received", +#else + "", /* nr_tlb_remote_flush */ + "", /* nr_tlb_remote_flush_received */ #endif /* CONFIG_SMP */ "nr_tlb_local_flush_all", "nr_tlb_local_flush_one", -- GitLab From 87a9d1cc2e8f0a442ed1f219d36446398c5e24df Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 25 Sep 2018 13:20:00 -0700 Subject: [PATCH 0814/2269] KVM: x86: fix L1TF's MMIO GFN calculation commit daa07cbc9ae3da2d61b7ce900c0b9107d134f2c1 upstream. One defense against L1TF in KVM is to always set the upper five bits of the *legal* physical address in the SPTEs for non-present and reserved SPTEs, e.g. MMIO SPTEs. In the MMIO case, the GFN of the MMIO SPTE may overlap with the upper five bits that are being usurped to defend against L1TF. To preserve the GFN, the bits of the GFN that overlap with the repurposed bits are shifted left into the reserved bits, i.e. the GFN in the SPTE will be split into high and low parts. When retrieving the GFN from the MMIO SPTE, e.g. to check for an MMIO access, get_mmio_spte_gfn() unshifts the affected bits and restores the original GFN for comparison. Unfortunately, get_mmio_spte_gfn() neglects to mask off the reserved bits in the SPTE that were used to store the upper chunk of the GFN. As a result, KVM fails to detect MMIO accesses whose GPA overlaps the repurprosed bits, which in turn causes guest panics and hangs. Fix the bug by generating a mask that covers the lower chunk of the GFN, i.e. the bits that aren't shifted by the L1TF mitigation. The alternative approach would be to explicitly zero the five reserved bits that are used to store the upper chunk of the GFN, but that requires additional run-time computation and makes an already-ugly bit of code even more inscrutable. I considered adding a WARN_ON_ONCE(low_phys_bits-1 <= PAGE_SHIFT) to warn if GENMASK_ULL() generated a nonsensical value, but that seemed silly since that would mean a system that supports VMX has less than 18 bits of physical address space... Reported-by: Sakari Ailus Fixes: d9b47449c1a1 ("kvm: x86: Set highest physical address bits in non-present/reserved SPTEs") Cc: Junaid Shahid Cc: Jim Mattson Cc: stable@vger.kernel.org Reviewed-by: Junaid Shahid Tested-by: Sakari Ailus Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/mmu.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 1dfb808abd23f..d755e0d44ac1c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -231,6 +231,17 @@ static u64 __read_mostly shadow_nonpresent_or_rsvd_mask; */ static const u64 shadow_nonpresent_or_rsvd_mask_len = 5; +/* + * In some cases, we need to preserve the GFN of a non-present or reserved + * SPTE when we usurp the upper five bits of the physical address space to + * defend against L1TF, e.g. for MMIO SPTEs. To preserve the GFN, we'll + * shift bits of the GFN that overlap with shadow_nonpresent_or_rsvd_mask + * left into the reserved bits, i.e. the GFN in the SPTE will be split into + * high and low parts. This mask covers the lower bits of the GFN. + */ +static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask; + + static void mmu_spte_set(u64 *sptep, u64 spte); static void mmu_free_roots(struct kvm_vcpu *vcpu); @@ -338,9 +349,7 @@ static bool is_mmio_spte(u64 spte) static gfn_t get_mmio_spte_gfn(u64 spte) { - u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask | - shadow_nonpresent_or_rsvd_mask; - u64 gpa = spte & ~mask; + u64 gpa = spte & shadow_nonpresent_or_rsvd_lower_gfn_mask; gpa |= (spte >> shadow_nonpresent_or_rsvd_mask_len) & shadow_nonpresent_or_rsvd_mask; @@ -404,6 +413,8 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); static void kvm_mmu_reset_all_pte_masks(void) { + u8 low_phys_bits; + shadow_user_mask = 0; shadow_accessed_mask = 0; shadow_dirty_mask = 0; @@ -418,12 +429,17 @@ static void kvm_mmu_reset_all_pte_masks(void) * appropriate mask to guard against L1TF attacks. Otherwise, it is * assumed that the CPU is not vulnerable to L1TF. */ + low_phys_bits = boot_cpu_data.x86_phys_bits; if (boot_cpu_data.x86_phys_bits < - 52 - shadow_nonpresent_or_rsvd_mask_len) + 52 - shadow_nonpresent_or_rsvd_mask_len) { shadow_nonpresent_or_rsvd_mask = rsvd_bits(boot_cpu_data.x86_phys_bits - shadow_nonpresent_or_rsvd_mask_len, boot_cpu_data.x86_phys_bits - 1); + low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len; + } + shadow_nonpresent_or_rsvd_lower_gfn_mask = + GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT); } static int is_cpuid_PSE36(void) -- GitLab From 8e2e2192eb35eb24eb99f120b78eed0b104092b2 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 26 Sep 2018 14:35:50 +0200 Subject: [PATCH 0815/2269] blk-mq: I/O and timer unplugs are inverted in blktrace commit 587562d0c7cd6861f4f90a2eb811cccb1a376f5f upstream. trace_block_unplug() takes true for explicit unplugs and false for implicit unplugs. schedule() unplugs are implicit and should be reported as timer unplugs. While correct in the legacy code, this has been inverted in blk-mq since 4.11. Cc: stable@vger.kernel.org Fixes: bd166ef183c2 ("blk-mq-sched: add framework for MQ capable IO schedulers") Reviewed-by: Omar Sandoval Signed-off-by: Ilya Dryomov Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-mq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 49979c095f31c..eac4448047366 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1512,7 +1512,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) BUG_ON(!rq->q); if (rq->mq_ctx != this_ctx) { if (this_ctx) { - trace_block_unplug(this_q, depth, from_schedule); + trace_block_unplug(this_q, depth, !from_schedule); blk_mq_sched_insert_requests(this_q, this_ctx, &ctx_list, from_schedule); @@ -1532,7 +1532,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) * on 'ctx_list'. Do those. */ if (this_ctx) { - trace_block_unplug(this_q, depth, from_schedule); + trace_block_unplug(this_q, depth, !from_schedule); blk_mq_sched_insert_requests(this_q, this_ctx, &ctx_list, from_schedule); } -- GitLab From 887361696fb9e2c5b99e39c8d0dbacbe46ff92f9 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 25 Apr 2018 12:14:39 +0200 Subject: [PATCH 0816/2269] clocksource/drivers/timer-atmel-pit: Properly handle error cases commit 52bf4a900d9cede3eb14982d0f2c5e6db6d97cc3 upstream. The smatch utility reports a possible leak: smatch warnings: drivers/clocksource/timer-atmel-pit.c:183 at91sam926x_pit_dt_init() warn: possible memory leak of 'data' Ensure data is freed before exiting with an error. Reported-by: Dan Carpenter Signed-off-by: Alexandre Belloni Cc: stable@vger.kernel.org Signed-off-by: Daniel Lezcano Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/timer-atmel-pit.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c index ec8a4376f74fb..2fab18fae4fcb 100644 --- a/drivers/clocksource/timer-atmel-pit.c +++ b/drivers/clocksource/timer-atmel-pit.c @@ -180,26 +180,29 @@ static int __init at91sam926x_pit_dt_init(struct device_node *node) data->base = of_iomap(node, 0); if (!data->base) { pr_err("Could not map PIT address\n"); - return -ENXIO; + ret = -ENXIO; + goto exit; } data->mck = of_clk_get(node, 0); if (IS_ERR(data->mck)) { pr_err("Unable to get mck clk\n"); - return PTR_ERR(data->mck); + ret = PTR_ERR(data->mck); + goto exit; } ret = clk_prepare_enable(data->mck); if (ret) { pr_err("Unable to enable mck\n"); - return ret; + goto exit; } /* Get the interrupts property */ data->irq = irq_of_parse_and_map(node, 0); if (!data->irq) { pr_err("Unable to get IRQ from DT\n"); - return -EINVAL; + ret = -EINVAL; + goto exit; } /* @@ -227,7 +230,7 @@ static int __init at91sam926x_pit_dt_init(struct device_node *node) ret = clocksource_register_hz(&data->clksrc, pit_rate); if (ret) { pr_err("Failed to register clocksource\n"); - return ret; + goto exit; } /* Set up irq handler */ @@ -236,7 +239,8 @@ static int __init at91sam926x_pit_dt_init(struct device_node *node) "at91_tick", data); if (ret) { pr_err("Unable to setup IRQ\n"); - return ret; + clocksource_unregister(&data->clksrc); + goto exit; } /* Set up and register clockevents */ @@ -254,6 +258,10 @@ static int __init at91sam926x_pit_dt_init(struct device_node *node) clockevents_register_device(&data->clkevt); return 0; + +exit: + kfree(data); + return ret; } TIMER_OF_DECLARE(at91sam926x_pit, "atmel,at91sam9260-pit", at91sam926x_pit_dt_init); -- GitLab From f66d89483bb301bd7a73ccc96204f1026b15151f Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 26 Sep 2018 18:11:22 +0200 Subject: [PATCH 0817/2269] fbdev/omapfb: fix omapfb_memory_read infoleak commit 1bafcbf59fed92af58955024452f45430d3898c5 upstream. OMAPFB_MEMORY_READ ioctl reads pixels from the LCD's memory and copies them to a userspace buffer. The code has two issues: - The user provided width and height could be large enough to overflow the calculations - The copy_to_user() can copy uninitialized memory to the userspace, which might contain sensitive kernel information. Fix these by limiting the width & height parameters, and only copying the amount of data that we actually received from the LCD. Signed-off-by: Tomi Valkeinen Reported-by: Jann Horn Cc: stable@vger.kernel.org Cc: security@kernel.org Cc: Will Deacon Cc: Jann Horn Cc: Tony Lindgren Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c b/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c index ef69273074ba7..a3edb20ea4c36 100644 --- a/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c +++ b/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c @@ -496,6 +496,9 @@ static int omapfb_memory_read(struct fb_info *fbi, if (!access_ok(VERIFY_WRITE, mr->buffer, mr->buffer_size)) return -EFAULT; + if (mr->w > 4096 || mr->h > 4096) + return -EINVAL; + if (mr->w * mr->h * 3 > mr->buffer_size) return -EINVAL; @@ -509,7 +512,7 @@ static int omapfb_memory_read(struct fb_info *fbi, mr->x, mr->y, mr->w, mr->h); if (r > 0) { - if (copy_to_user(mr->buffer, buf, mr->buffer_size)) + if (copy_to_user(mr->buffer, buf, r)) r = -EFAULT; } -- GitLab From 309a1c5cfc598c162dfc951fac040554164056e4 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 25 Sep 2018 02:12:30 -0600 Subject: [PATCH 0818/2269] xen-netback: fix input validation in xenvif_set_hash_mapping() commit 780e83c259fc33e8959fed8dfdad17e378d72b62 upstream. Both len and off are frontend specified values, so we need to make sure there's no overflow when adding the two for the bounds check. We also want to avoid undefined behavior and hence use off to index into ->hash.mapping[] only after bounds checking. This at the same time allows to take care of not applying off twice for the bounds checking against vif->num_queues. It is also insufficient to bounds check copy_op.len, as this is len truncated to 16 bits. This is XSA-270 / CVE-2018-15471. Reported-by: Felix Wilhelm Signed-off-by: Jan Beulich Reviewed-by: Paul Durrant Tested-by: Paul Durrant Cc: stable@vger.kernel.org [4.7 onwards] Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netback/hash.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/xen-netback/hash.c b/drivers/net/xen-netback/hash.c index 3c4c58b9fe76e..3b6fb5b3bdb23 100644 --- a/drivers/net/xen-netback/hash.c +++ b/drivers/net/xen-netback/hash.c @@ -332,20 +332,22 @@ u32 xenvif_set_hash_mapping_size(struct xenvif *vif, u32 size) u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len, u32 off) { - u32 *mapping = &vif->hash.mapping[off]; + u32 *mapping = vif->hash.mapping; struct gnttab_copy copy_op = { .source.u.ref = gref, .source.domid = vif->domid, - .dest.u.gmfn = virt_to_gfn(mapping), .dest.domid = DOMID_SELF, - .dest.offset = xen_offset_in_page(mapping), - .len = len * sizeof(u32), + .len = len * sizeof(*mapping), .flags = GNTCOPY_source_gref }; - if ((off + len > vif->hash.size) || copy_op.len > XEN_PAGE_SIZE) + if ((off + len < off) || (off + len > vif->hash.size) || + len > XEN_PAGE_SIZE / sizeof(*mapping)) return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; + copy_op.dest.u.gmfn = virt_to_gfn(mapping + off); + copy_op.dest.offset = xen_offset_in_page(mapping + off); + while (len-- != 0) if (mapping[off++] >= vif->num_queues) return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; -- GitLab From 0c0dd182adae698ae108566e1300c498305d5aeb Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Thu, 27 Sep 2018 20:48:39 +0800 Subject: [PATCH 0819/2269] drm/amdgpu: Fix vce work queue was not cancelled when suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 61ea6f5831974ebd1a57baffd7cc30600a2e26fc upstream. The vce cancel_delayed_work_sync never be called. driver call the function in error path. This caused the A+A suspend hang when runtime pm enebled. As we will visit the smu in the idle queue. this will cause smu hang because the dgpu has been suspend, and the dgpu also will be waked up. As the smu has been hang, so the dgpu resume will failed. Reviewed-by: Christian König Reviewed-by: Feifei Xu Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 9fc3d387eae3e..fb36425e21ffa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -231,6 +231,8 @@ int amdgpu_vce_suspend(struct amdgpu_device *adev) { int i; + cancel_delayed_work_sync(&adev->vce.idle_work); + if (adev->vce.vcpu_bo == NULL) return 0; @@ -241,7 +243,6 @@ int amdgpu_vce_suspend(struct amdgpu_device *adev) if (i == AMDGPU_MAX_VCE_HANDLES) return 0; - cancel_delayed_work_sync(&adev->vce.idle_work); /* TODO: suspending running encoding sessions isn't supported */ return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 1612d8aa6ad60..fca1b10628a67 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -155,11 +155,11 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev) unsigned size; void *ptr; + cancel_delayed_work_sync(&adev->vcn.idle_work); + if (adev->vcn.vcpu_bo == NULL) return 0; - cancel_delayed_work_sync(&adev->vcn.idle_work); - size = amdgpu_bo_size(adev->vcn.vcpu_bo); ptr = adev->vcn.cpu_addr; -- GitLab From 71a0556255de125b7e3fc0dc6171fb31fab2b9ad Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 26 Sep 2018 02:17:03 -0500 Subject: [PATCH 0820/2269] drm/syncobj: Don't leak fences when WAIT_FOR_SUBMIT is set commit 337fe9f5c1e7de1f391c6a692531379d2aa2ee11 upstream. We attempt to get fences earlier in the hopes that everything will already have fences and no callbacks will be needed. If we do succeed in getting a fence, getting one a second time will result in a duplicate ref with no unref. This is causing memory leaks in Vulkan applications that create a lot of fences; playing for a few hours can, apparently, bring down the system. Cc: stable@vger.kernel.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107899 Reviewed-by: Chris Wilson Signed-off-by: Jason Ekstrand Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20180926071703.15257-1-jason.ekstrand@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_syncobj.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 7bcf5702c91ce..889c95d4feecc 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -96,6 +96,8 @@ static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, { int ret; + WARN_ON(*fence); + *fence = drm_syncobj_fence_get(syncobj); if (*fence) return 1; @@ -656,6 +658,9 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { for (i = 0; i < count; ++i) { + if (entries[i].fence) + continue; + drm_syncobj_fence_get_or_add_callback(syncobjs[i], &entries[i].fence, &entries[i].syncobj_cb, -- GitLab From 30500cc74a365c2fc90dc9b6d9611bbbc1304af9 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 1 Oct 2018 12:52:15 -0700 Subject: [PATCH 0821/2269] x86/vdso: Fix asm constraints on vDSO syscall fallbacks commit 715bd9d12f84d8f5cc8ad21d888f9bc304a8eb0b upstream. The syscall fallbacks in the vDSO have incorrect asm constraints. They are not marked as writing to their outputs -- instead, they are marked as clobbering "memory", which is useless. In particular, gcc is smart enough to know that the timespec parameter hasn't escaped, so a memory clobber doesn't clobber it. And passing a pointer as an asm *input* does not tell gcc that the pointed-to value is changed. Add in the fact that the asm instructions weren't volatile, and gcc was free to omit them entirely unless their sole output (the return value) is used. Which it is (phew!), but that stops happening with some upcoming patches. As a trivial example, the following code: void test_fallback(struct timespec *ts) { vdso_fallback_gettime(CLOCK_MONOTONIC, ts); } compiles to: 00000000000000c0 : c0: c3 retq To add insult to injury, the RCX and R11 clobbers on 64-bit builds were missing. The "memory" clobber is also unnecessary -- no ordering with respect to other memory operations is needed, but that's going to be fixed in a separate not-for-stable patch. Fixes: 2aae950b21e4 ("x86_64: Add vDSO for x86-64 with gettimeofday/clock_gettime/getcpu") Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/2c0231690551989d2fafa60ed0e7b5cc8b403908.1538422295.git.luto@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/vdso/vclock_gettime.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index fa8dbfcf7ed37..94a301c412fcf 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -43,8 +43,9 @@ extern u8 hvclock_page notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) { long ret; - asm("syscall" : "=a" (ret) : - "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory"); + asm ("syscall" : "=a" (ret), "=m" (*ts) : + "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : + "memory", "rcx", "r11"); return ret; } @@ -52,8 +53,9 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) { long ret; - asm("syscall" : "=a" (ret) : - "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); + asm ("syscall" : "=a" (ret), "=m" (*tv), "=m" (*tz) : + "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : + "memory", "rcx", "r11"); return ret; } @@ -64,12 +66,12 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) { long ret; - asm( + asm ( "mov %%ebx, %%edx \n" "mov %2, %%ebx \n" "call __kernel_vsyscall \n" "mov %%edx, %%ebx \n" - : "=a" (ret) + : "=a" (ret), "=m" (*ts) : "0" (__NR_clock_gettime), "g" (clock), "c" (ts) : "memory", "edx"); return ret; @@ -79,12 +81,12 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) { long ret; - asm( + asm ( "mov %%ebx, %%edx \n" "mov %2, %%ebx \n" "call __kernel_vsyscall \n" "mov %%edx, %%ebx \n" - : "=a" (ret) + : "=a" (ret), "=m" (*tv), "=m" (*tz) : "0" (__NR_gettimeofday), "g" (tv), "c" (tz) : "memory", "edx"); return ret; -- GitLab From 64ff5747e2af415348ca3dd9221ef542ad07fdb9 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 1 Oct 2018 12:52:16 -0700 Subject: [PATCH 0822/2269] selftests/x86: Add clock_gettime() tests to test_vdso commit 7c03e7035ac1cf2a6165754e4f3a49c2f1977838 upstream. Now that the vDSO implementation of clock_gettime() is getting reworked, add a selftest for it. This tests that its output is consistent with the syscall version. This is marked for stable to serve as a test for commit 715bd9d12f84 ("x86/vdso: Fix asm constraints on vDSO syscall fallbacks") Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/082399674de2619b2befd8c0dde49b260605b126.1538422295.git.luto@kernel.org Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/test_vdso.c | 99 +++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c index 2352590117042..49f7294fb382c 100644 --- a/tools/testing/selftests/x86/test_vdso.c +++ b/tools/testing/selftests/x86/test_vdso.c @@ -17,6 +17,7 @@ #include #include #include +#include #ifndef SYS_getcpu # ifdef __x86_64__ @@ -31,6 +32,10 @@ int nerrs = 0; +typedef int (*vgettime_t)(clockid_t, struct timespec *); + +vgettime_t vdso_clock_gettime; + typedef long (*getcpu_t)(unsigned *, unsigned *, void *); getcpu_t vgetcpu; @@ -95,6 +100,10 @@ static void fill_function_pointers() printf("Warning: failed to find getcpu in vDSO\n"); vgetcpu = (getcpu_t) vsyscall_getcpu(); + + vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); + if (!vdso_clock_gettime) + printf("Warning: failed to find clock_gettime in vDSO\n"); } static long sys_getcpu(unsigned * cpu, unsigned * node, @@ -103,6 +112,11 @@ static long sys_getcpu(unsigned * cpu, unsigned * node, return syscall(__NR_getcpu, cpu, node, cache); } +static inline int sys_clock_gettime(clockid_t id, struct timespec *ts) +{ + return syscall(__NR_clock_gettime, id, ts); +} + static void test_getcpu(void) { printf("[RUN]\tTesting getcpu...\n"); @@ -155,10 +169,95 @@ static void test_getcpu(void) } } +static bool ts_leq(const struct timespec *a, const struct timespec *b) +{ + if (a->tv_sec != b->tv_sec) + return a->tv_sec < b->tv_sec; + else + return a->tv_nsec <= b->tv_nsec; +} + +static char const * const clocknames[] = { + [0] = "CLOCK_REALTIME", + [1] = "CLOCK_MONOTONIC", + [2] = "CLOCK_PROCESS_CPUTIME_ID", + [3] = "CLOCK_THREAD_CPUTIME_ID", + [4] = "CLOCK_MONOTONIC_RAW", + [5] = "CLOCK_REALTIME_COARSE", + [6] = "CLOCK_MONOTONIC_COARSE", + [7] = "CLOCK_BOOTTIME", + [8] = "CLOCK_REALTIME_ALARM", + [9] = "CLOCK_BOOTTIME_ALARM", + [10] = "CLOCK_SGI_CYCLE", + [11] = "CLOCK_TAI", +}; + +static void test_one_clock_gettime(int clock, const char *name) +{ + struct timespec start, vdso, end; + int vdso_ret, end_ret; + + printf("[RUN]\tTesting clock_gettime for clock %s (%d)...\n", name, clock); + + if (sys_clock_gettime(clock, &start) < 0) { + if (errno == EINVAL) { + vdso_ret = vdso_clock_gettime(clock, &vdso); + if (vdso_ret == -EINVAL) { + printf("[OK]\tNo such clock.\n"); + } else { + printf("[FAIL]\tNo such clock, but __vdso_clock_gettime returned %d\n", vdso_ret); + nerrs++; + } + } else { + printf("[WARN]\t clock_gettime(%d) syscall returned error %d\n", clock, errno); + } + return; + } + + vdso_ret = vdso_clock_gettime(clock, &vdso); + end_ret = sys_clock_gettime(clock, &end); + + if (vdso_ret != 0 || end_ret != 0) { + printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n", + vdso_ret, errno); + nerrs++; + return; + } + + printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n", + (unsigned long long)start.tv_sec, start.tv_nsec, + (unsigned long long)vdso.tv_sec, vdso.tv_nsec, + (unsigned long long)end.tv_sec, end.tv_nsec); + + if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) { + printf("[FAIL]\tTimes are out of sequence\n"); + nerrs++; + } +} + +static void test_clock_gettime(void) +{ + for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]); + clock++) { + test_one_clock_gettime(clock, clocknames[clock]); + } + + /* Also test some invalid clock ids */ + test_one_clock_gettime(-1, "invalid"); + test_one_clock_gettime(INT_MIN, "invalid"); + test_one_clock_gettime(INT_MAX, "invalid"); +} + int main(int argc, char **argv) { fill_function_pointers(); + test_clock_gettime(); + + /* + * Test getcpu() last so that, if something goes wrong setting affinity, + * we still run the other tests. + */ test_getcpu(); return nerrs ? 1 : 0; -- GitLab From 1194e838b87953e1e4923da585a0e0226d8aecea Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 2 Oct 2018 21:26:50 -0700 Subject: [PATCH 0823/2269] x86/vdso: Only enable vDSO retpolines when enabled and supported commit 4f166564014aba65ad6f15b612f6711fd0f117ee upstream. When I fixed the vDSO build to use inline retpolines, I messed up the Makefile logic and made it unconditional. It should have depended on CONFIG_RETPOLINE and on the availability of compiler support. This broke the build on some older compilers. Reported-by: nikola.ciprich@linuxbox.cz Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: David Woodhouse Cc: Linus Torvalds Cc: Matt Rickard Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: jason.vas.dias@gmail.com Cc: stable@vger.kernel.org Fixes: 2e549b2ee0e3 ("x86/vdso: Fix vDSO build if a retpoline is emitted") Link: http://lkml.kernel.org/r/08a1f29f2c238dd1f493945e702a521f8a5aa3ae.1538540801.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/vdso/Makefile | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index b545bf9d23283..0a550dc5c5251 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -74,7 +74,13 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \ -fno-omit-frame-pointer -foptimize-sibling-calls \ - -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO $(RETPOLINE_VDSO_CFLAGS) + -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO + +ifdef CONFIG_RETPOLINE +ifneq ($(RETPOLINE_VDSO_CFLAGS),) + CFL += $(RETPOLINE_VDSO_CFLAGS) +endif +endif $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) @@ -153,7 +159,13 @@ KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector) KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) KBUILD_CFLAGS_32 += -fno-omit-frame-pointer KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING -KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS) + +ifdef CONFIG_RETPOLINE +ifneq ($(RETPOLINE_VDSO_CFLAGS),) + KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS) +endif +endif + $(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) $(obj)/vdso32.so.dbg: FORCE \ -- GitLab From 25bc6e80f9d6434511a09cb4a41c655b154486d9 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 3 Oct 2018 16:23:49 -0700 Subject: [PATCH 0824/2269] x86/vdso: Fix vDSO syscall fallback asm constraint regression commit 02e425668f5c9deb42787d10001a3b605993ad15 upstream. When I added the missing memory outputs, I failed to update the index of the first argument (ebx) on 32-bit builds, which broke the fallbacks. Somehow I must have screwed up my testing or gotten lucky. Add another test to cover gettimeofday() as well. Signed-off-by: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: 715bd9d12f84 ("x86/vdso: Fix asm constraints on vDSO syscall fallbacks") Link: http://lkml.kernel.org/r/21bd45ab04b6d838278fa5bebfa9163eceffa13c.1538608971.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/vdso/vclock_gettime.c | 8 +-- tools/testing/selftests/x86/test_vdso.c | 73 +++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 4 deletions(-) diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 94a301c412fcf..9c35dc0a9d644 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -68,11 +68,11 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) asm ( "mov %%ebx, %%edx \n" - "mov %2, %%ebx \n" + "mov %[clock], %%ebx \n" "call __kernel_vsyscall \n" "mov %%edx, %%ebx \n" : "=a" (ret), "=m" (*ts) - : "0" (__NR_clock_gettime), "g" (clock), "c" (ts) + : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts) : "memory", "edx"); return ret; } @@ -83,11 +83,11 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) asm ( "mov %%ebx, %%edx \n" - "mov %2, %%ebx \n" + "mov %[tv], %%ebx \n" "call __kernel_vsyscall \n" "mov %%edx, %%ebx \n" : "=a" (ret), "=m" (*tv), "=m" (*tz) - : "0" (__NR_gettimeofday), "g" (tv), "c" (tz) + : "0" (__NR_gettimeofday), [tv] "g" (tv), "c" (tz) : "memory", "edx"); return ret; } diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c index 49f7294fb382c..35edd61d1663e 100644 --- a/tools/testing/selftests/x86/test_vdso.c +++ b/tools/testing/selftests/x86/test_vdso.c @@ -36,6 +36,10 @@ typedef int (*vgettime_t)(clockid_t, struct timespec *); vgettime_t vdso_clock_gettime; +typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz); + +vgtod_t vdso_gettimeofday; + typedef long (*getcpu_t)(unsigned *, unsigned *, void *); getcpu_t vgetcpu; @@ -104,6 +108,11 @@ static void fill_function_pointers() vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); if (!vdso_clock_gettime) printf("Warning: failed to find clock_gettime in vDSO\n"); + + vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday"); + if (!vdso_gettimeofday) + printf("Warning: failed to find gettimeofday in vDSO\n"); + } static long sys_getcpu(unsigned * cpu, unsigned * node, @@ -117,6 +126,11 @@ static inline int sys_clock_gettime(clockid_t id, struct timespec *ts) return syscall(__NR_clock_gettime, id, ts); } +static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + return syscall(__NR_gettimeofday, tv, tz); +} + static void test_getcpu(void) { printf("[RUN]\tTesting getcpu...\n"); @@ -177,6 +191,14 @@ static bool ts_leq(const struct timespec *a, const struct timespec *b) return a->tv_nsec <= b->tv_nsec; } +static bool tv_leq(const struct timeval *a, const struct timeval *b) +{ + if (a->tv_sec != b->tv_sec) + return a->tv_sec < b->tv_sec; + else + return a->tv_usec <= b->tv_usec; +} + static char const * const clocknames[] = { [0] = "CLOCK_REALTIME", [1] = "CLOCK_MONOTONIC", @@ -248,11 +270,62 @@ static void test_clock_gettime(void) test_one_clock_gettime(INT_MAX, "invalid"); } +static void test_gettimeofday(void) +{ + struct timeval start, vdso, end; + struct timezone sys_tz, vdso_tz; + int vdso_ret, end_ret; + + if (!vdso_gettimeofday) + return; + + printf("[RUN]\tTesting gettimeofday...\n"); + + if (sys_gettimeofday(&start, &sys_tz) < 0) { + printf("[FAIL]\tsys_gettimeofday failed (%d)\n", errno); + nerrs++; + return; + } + + vdso_ret = vdso_gettimeofday(&vdso, &vdso_tz); + end_ret = sys_gettimeofday(&end, NULL); + + if (vdso_ret != 0 || end_ret != 0) { + printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n", + vdso_ret, errno); + nerrs++; + return; + } + + printf("\t%llu.%06ld %llu.%06ld %llu.%06ld\n", + (unsigned long long)start.tv_sec, start.tv_usec, + (unsigned long long)vdso.tv_sec, vdso.tv_usec, + (unsigned long long)end.tv_sec, end.tv_usec); + + if (!tv_leq(&start, &vdso) || !tv_leq(&vdso, &end)) { + printf("[FAIL]\tTimes are out of sequence\n"); + nerrs++; + } + + if (sys_tz.tz_minuteswest == vdso_tz.tz_minuteswest && + sys_tz.tz_dsttime == vdso_tz.tz_dsttime) { + printf("[OK]\ttimezones match: minuteswest=%d, dsttime=%d\n", + sys_tz.tz_minuteswest, sys_tz.tz_dsttime); + } else { + printf("[FAIL]\ttimezones do not match\n"); + nerrs++; + } + + /* And make sure that passing NULL for tz doesn't crash. */ + vdso_gettimeofday(&vdso, NULL); +} + int main(int argc, char **argv) { fill_function_pointers(); test_clock_gettime(); + test_gettimeofday(); /* * Test getcpu() last so that, if something goes wrong setting affinity, -- GitLab From 8ebd65583375726670cbecfb47913a41f0a1d0a1 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Thu, 27 Sep 2018 15:47:33 -0500 Subject: [PATCH 0825/2269] PCI: Reprogram bridge prefetch registers on resume commit 083874549fdfefa629dfa752785e20427dde1511 upstream. On 38+ Intel-based ASUS products, the NVIDIA GPU becomes unusable after S3 suspend/resume. The affected products include multiple generations of NVIDIA GPUs and Intel SoCs. After resume, nouveau logs many errors such as: fifo: fault 00 [READ] at 0000005555555000 engine 00 [GR] client 04 [HUB/FE] reason 4a [] on channel -1 [007fa91000 unknown] DRM: failed to idle channel 0 [DRM] Similarly, the NVIDIA proprietary driver also fails after resume (black screen, 100% CPU usage in Xorg process). We shipped a sample to NVIDIA for diagnosis, and their response indicated that it's a problem with the parent PCI bridge (on the Intel SoC), not the GPU. Runtime suspend/resume works fine, only S3 suspend is affected. We found a workaround: on resume, rewrite the Intel PCI bridge 'Prefetchable Base Upper 32 Bits' register (PCI_PREF_BASE_UPPER32). In the cases that I checked, this register has value 0 and we just have to rewrite that value. Linux already saves and restores PCI config space during suspend/resume, but this register was being skipped because upon resume, it already has value 0 (the correct, pre-suspend value). Intel appear to have previously acknowledged this behaviour and the requirement to rewrite this register: https://bugzilla.kernel.org/show_bug.cgi?id=116851#c23 Based on that, rewrite the prefetch register values even when that appears unnecessary. We have confirmed this solution on all the affected models we have in-hands (X542UQ, UX533FD, X530UN, V272UN). Additionally, this solves an issue where r8169 MSI-X interrupts were broken after S3 suspend/resume on ASUS X441UAR. This issue was recently worked around in commit 7bb05b85bc2d ("r8169: don't use MSI-X on RTL8106e"). It also fixes the same issue on RTL6186evl/8111evl on an Aimfor-tech laptop that we had not yet patched. I suspect it will also fix the issue that was worked around in commit 7c53a722459c ("r8169: don't use MSI-X on RTL8168g"). Thomas Martitz reports that this change also solves an issue where the AMD Radeon Polaris 10 GPU on the HP Zbook 14u G5 is unresponsive after S3 suspend/resume. Link: https://bugzilla.kernel.org/show_bug.cgi?id=201069 Signed-off-by: Daniel Drake Signed-off-by: Bjorn Helgaas Reviewed-by: Rafael J. Wysocki Reviewed-By: Peter Wu CC: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 22924629e64a8..1af30c8815665 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1112,12 +1112,12 @@ int pci_save_state(struct pci_dev *dev) EXPORT_SYMBOL(pci_save_state); static void pci_restore_config_dword(struct pci_dev *pdev, int offset, - u32 saved_val, int retry) + u32 saved_val, int retry, bool force) { u32 val; pci_read_config_dword(pdev, offset, &val); - if (val == saved_val) + if (!force && val == saved_val) return; for (;;) { @@ -1136,25 +1136,36 @@ static void pci_restore_config_dword(struct pci_dev *pdev, int offset, } static void pci_restore_config_space_range(struct pci_dev *pdev, - int start, int end, int retry) + int start, int end, int retry, + bool force) { int index; for (index = end; index >= start; index--) pci_restore_config_dword(pdev, 4 * index, pdev->saved_config_space[index], - retry); + retry, force); } static void pci_restore_config_space(struct pci_dev *pdev) { if (pdev->hdr_type == PCI_HEADER_TYPE_NORMAL) { - pci_restore_config_space_range(pdev, 10, 15, 0); + pci_restore_config_space_range(pdev, 10, 15, 0, false); /* Restore BARs before the command register. */ - pci_restore_config_space_range(pdev, 4, 9, 10); - pci_restore_config_space_range(pdev, 0, 3, 0); + pci_restore_config_space_range(pdev, 4, 9, 10, false); + pci_restore_config_space_range(pdev, 0, 3, 0, false); + } else if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { + pci_restore_config_space_range(pdev, 12, 15, 0, false); + + /* + * Force rewriting of prefetch registers to avoid S3 resume + * issues on Intel PCI bridges that occur when these + * registers are not explicitly written. + */ + pci_restore_config_space_range(pdev, 9, 11, 0, true); + pci_restore_config_space_range(pdev, 0, 8, 0, false); } else { - pci_restore_config_space_range(pdev, 0, 15, 0); + pci_restore_config_space_range(pdev, 0, 15, 0, false); } } -- GitLab From 9047696cb3f8d7cbe80592d231841699fd0246f2 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 29 Sep 2018 16:01:58 +0200 Subject: [PATCH 0826/2269] mac80211: fix setting IEEE80211_KEY_FLAG_RX_MGMT for AP mode keys commit 211710ca74adf790b46ab3867fcce8047b573cd1 upstream. key->sta is only valid after ieee80211_key_link, which is called later in this function. Because of that, the IEEE80211_KEY_FLAG_RX_MGMT is never set when management frame protection is enabled. Fixes: e548c49e6dc6b ("mac80211: add key flag for management keys") Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index b456b882a6ea5..63558335e41ee 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -426,7 +426,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: /* Keys without a station are used for TX only */ - if (key->sta && test_sta_flag(key->sta, WLAN_STA_MFP)) + if (sta && test_sta_flag(sta, WLAN_STA_MFP)) key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; break; case NL80211_IFTYPE_ADHOC: -- GitLab From 1364055c96c5359bfe74637e41ba673956ac9c93 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 4 Oct 2018 11:08:12 +0200 Subject: [PATCH 0827/2269] PM / core: Clear the direct_complete flag on errors commit 69e445ab8b66a9f30519842ef18be555d3ee9b51 upstream. If __device_suspend() runs asynchronously (in which case the device passed to it is in dpm_suspended_list at that point) and it returns early on an error or pending wakeup, and the power.direct_complete flag has been set for the device already, the subsequent device_resume() will be confused by that and it will call pm_runtime_enable() incorrectly, as runtime PM has not been disabled for the device by __device_suspend(). To avoid that, clear power.direct_complete if __device_suspend() is not going to disable runtime PM for the device before returning. Fixes: aae4518b3124 (PM / sleep: Mechanism to avoid resuming runtime-suspended devices unnecessarily) Reported-by: Al Cooper Tested-by: Al Cooper Reviewed-by: Ulf Hansson Cc: 3.16+ # 3.16+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 770b1539a083d..d16b40cd26cc9 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1462,8 +1462,10 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) dpm_wait_for_subordinate(dev, async); - if (async_error) + if (async_error) { + dev->power.direct_complete = false; goto Complete; + } /* * If a device configured to wake up the system from sleep states @@ -1475,6 +1477,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) pm_wakeup_event(dev, 0); if (pm_wakeup_pending()) { + dev->power.direct_complete = false; async_error = -EBUSY; goto Complete; } -- GitLab From f11a6abfdb4181fb713758c13ac2a1605c16db40 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Mon, 24 Sep 2018 16:19:30 -0400 Subject: [PATCH 0828/2269] dm cache metadata: ignore hints array being too small during resize commit 4561ffca88c546f96367f94b8f1e4715a9c62314 upstream. Commit fd2fa9541 ("dm cache metadata: save in-core policy_hint_size to on-disk superblock") enabled previously written policy hints to be used after a cache is reactivated. But in doing so the cache metadata's hint array was left exposed to out of bounds access because on resize the metadata's on-disk hint array wasn't ever extended. Fix this by ignoring that there are no on-disk hints associated with the newly added cache blocks. An expanded on-disk hint array is later rewritten upon the next clean shutdown of the cache. Fixes: fd2fa9541 ("dm cache metadata: save in-core policy_hint_size to on-disk superblock") Cc: stable@vger.kernel.org Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-metadata.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 0a5a45f3ec5fc..7f1c64c4ad24c 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -1454,8 +1454,8 @@ static int __load_mappings(struct dm_cache_metadata *cmd, if (hints_valid) { r = dm_array_cursor_next(&cmd->hint_cursor); if (r) { - DMERR("dm_array_cursor_next for hint failed"); - goto out; + dm_array_cursor_end(&cmd->hint_cursor); + hints_valid = false; } } -- GitLab From ec6ae632e04bd35f436051917cf17db9ec2ebe49 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 25 Sep 2018 20:56:02 -0400 Subject: [PATCH 0829/2269] dm cache: fix resize crash if user doesn't reload cache table commit 5d07384a666d4b2f781dc056bfeec2c27fbdf383 upstream. A reload of the cache's DM table is needed during resize because otherwise a crash will occur when attempting to access smq policy entries associated with the portion of the cache that was recently extended. The reason is cache-size based data structures in the policy will not be resized, the only way to safely extend the cache is to allow for a proper cache policy initialization that occurs when the cache table is loaded. For example the smq policy's space_init(), init_allocator(), calc_hotspot_params() must be sized based on the extended cache size. The fix for this is to disallow cache resizes of this pattern: 1) suspend "cache" target's device 2) resize the fast device used for the cache 3) resume "cache" target's device Instead, the last step must be a full reload of the cache's DM table. Fixes: 66a636356 ("dm cache: add stochastic-multi-queue (smq) policy") Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-target.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index a4b7c26980966..e2ea57d5376e8 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -3097,8 +3097,13 @@ static dm_cblock_t get_cache_dev_size(struct cache *cache) static bool can_resize(struct cache *cache, dm_cblock_t new_size) { - if (from_cblock(new_size) > from_cblock(cache->cache_size)) - return true; + if (from_cblock(new_size) > from_cblock(cache->cache_size)) { + if (cache->sized) { + DMERR("%s: unable to extend cache due to missing cache table reload", + cache_device_name(cache)); + return false; + } + } /* * We can't drop a dirty block when shrinking the cache. -- GitLab From c096f5c4a8bc80efe6c45ed364fd1ac81e2c363a Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 1 Oct 2018 18:36:07 +0300 Subject: [PATCH 0830/2269] xhci: Add missing CAS workaround for Intel Sunrise Point xHCI commit ffe84e01bb1b38c7eb9c6b6da127a6c136d251df upstream. The workaround for missing CAS bit is also needed for xHC on Intel sunrisepoint PCH. For more details see: Intel 100/c230 series PCH specification update Doc #332692-006 Errata #8 Cc: Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 838d37e79fa22..9218f506f8e33 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -196,6 +196,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) } if (pdev->vendor == PCI_VENDOR_ID_INTEL && (pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI)) xhci->quirks |= XHCI_MISSING_CAS; -- GitLab From 35123e64a1682cf3e938458aa786d0b8736474b8 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Mon, 1 Oct 2018 18:36:08 +0300 Subject: [PATCH 0831/2269] usb: xhci-mtk: resume USB3 roothub first commit 555df5820e733cded7eb8d0bf78b2a791be51d75 upstream. Give USB3 devices a better chance to enumerate at USB3 speeds if they are connected to a suspended host. Porting from "671ffdff5b13 xhci: resume USB 3 roothub first" Cc: Signed-off-by: Chunfeng Yun Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mtk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c index 8fb60657ed4fd..510d28a9d1901 100644 --- a/drivers/usb/host/xhci-mtk.c +++ b/drivers/usb/host/xhci-mtk.c @@ -780,10 +780,10 @@ static int __maybe_unused xhci_mtk_resume(struct device *dev) xhci_mtk_host_enable(mtk); xhci_dbg(xhci, "%s: restart port polling\n", __func__); - set_bit(HCD_FLAG_POLL_RH, &hcd->flags); - usb_hcd_poll_rh_status(hcd); set_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags); usb_hcd_poll_rh_status(xhci->shared_hcd); + set_bit(HCD_FLAG_POLL_RH, &hcd->flags); + usb_hcd_poll_rh_status(hcd); return 0; } -- GitLab From 821c42e7d5ea3efc332ca1b466052a7c89f72a3b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 24 Sep 2018 15:28:10 +0200 Subject: [PATCH 0832/2269] USB: serial: simple: add Motorola Tetra MTP6550 id commit f5fad711c06e652f90f581fc7c2caee327c33d31 upstream. Add device-id for the Motorola Tetra radio MTP6550. Bus 001 Device 004: ID 0cad:9012 Motorola CGISS Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 2.00 bDeviceClass 0 (Defined at Interface level) bDeviceSubClass 0 bDeviceProtocol 0 bMaxPacketSize0 64 idVendor 0x0cad Motorola CGISS idProduct 0x9012 bcdDevice 24.16 iManufacturer 1 Motorola Solutions, Inc. iProduct 2 TETRA PEI interface iSerial 0 bNumConfigurations 1 Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 55 bNumInterfaces 2 bConfigurationValue 1 iConfiguration 3 Generic Serial config bmAttributes 0x80 (Bus Powered) MaxPower 500mA Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 0 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 0 bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x81 EP 1 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x01 EP 1 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 1 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 0 bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x82 EP 2 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x02 EP 2 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Device Qualifier (for other device speed): bLength 10 bDescriptorType 6 bcdUSB 2.00 bDeviceClass 0 (Defined at Interface level) bDeviceSubClass 0 bDeviceProtocol 0 bMaxPacketSize0 64 bNumConfigurations 1 Device Status: 0x0000 (Bus Powered) Reported-by: Hans Hult Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/usb-serial-simple.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index 2674da40d9cd7..6d6acf2c07c36 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -87,7 +87,8 @@ DEVICE(moto_modem, MOTO_IDS); /* Motorola Tetra driver */ #define MOTOROLA_TETRA_IDS() \ - { USB_DEVICE(0x0cad, 0x9011) } /* Motorola Solutions TETRA PEI */ + { USB_DEVICE(0x0cad, 0x9011) }, /* Motorola Solutions TETRA PEI */ \ + { USB_DEVICE(0x0cad, 0x9012) } /* MTP6550 */ DEVICE(motorola_tetra, MOTOROLA_TETRA_IDS); /* Novatel Wireless GPS driver */ -- GitLab From c92e73b11ed11baf38942fbbdbeb1f5adab81e94 Mon Sep 17 00:00:00 2001 From: Romain Izard Date: Thu, 20 Sep 2018 16:49:04 +0200 Subject: [PATCH 0833/2269] usb: cdc_acm: Do not leak URB buffers commit f2924d4b16ae138c2de6a0e73f526fb638330858 upstream. When the ACM TTY port is disconnected, the URBs it uses must be killed, and then the buffers must be freed. Unfortunately a previous refactor removed the code freeing the buffers because it looked extremely similar to the code killing the URBs. As a result, there were many new leaks for each plug/unplug cycle of a CDC-ACM device, that were detected by kmemleak. Restore the missing code, and the memory leak is removed. Fixes: ba8c931ded8d ("cdc-acm: refactor killing urbs") Signed-off-by: Romain Izard Acked-by: Oliver Neukum Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index feaa0d8f830ac..9f6f402470ac1 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1527,6 +1527,7 @@ static void acm_disconnect(struct usb_interface *intf) { struct acm *acm = usb_get_intfdata(intf); struct tty_struct *tty; + int i; /* sibling interface is already cleaning up */ if (!acm) @@ -1557,6 +1558,11 @@ static void acm_disconnect(struct usb_interface *intf) tty_unregister_device(acm_tty_driver, acm->minor); + usb_free_urb(acm->ctrlurb); + for (i = 0; i < ACM_NW; i++) + usb_free_urb(acm->wb[i].urb); + for (i = 0; i < acm->rx_buflimit; i++) + usb_free_urb(acm->read_urbs[i]); acm_write_buffers_free(acm); usb_free_coherent(acm->dev, acm->ctrlsize, acm->ctrl_buffer, acm->ctrl_dma); acm_read_buffers_free(acm); -- GitLab From 171f90d4ae841e2205fb706b49f266d40207d0bb Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 18 Sep 2018 00:52:52 +0100 Subject: [PATCH 0834/2269] tty: Drop tty->count on tty_reopen() failure commit fe32416790093b31364c08395727de17ec96ace1 upstream. In case of tty_ldisc_reinit() failure, tty->count should be decremented back, otherwise we will never release_tty(). Tetsuo reported that it fixes noisy warnings on tty release like: pts pts4033: tty_release: tty->count(10529) != (#fd's(7) + #kopen's(0)) Fixes: commit 892d1fa7eaae ("tty: Destroy ldisc instance on hangup") Cc: stable@vger.kernel.org # v4.6+ Cc: Greg Kroah-Hartman Cc: Jiri Slaby Reviewed-by: Jiri Slaby Tested-by: Jiri Slaby Tested-by: Mark Rutland Tested-by: Tetsuo Handa Signed-off-by: Dmitry Safonov Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 562d31073f9a2..8d65b2f9ee806 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1254,6 +1254,7 @@ static void tty_driver_remove_tty(struct tty_driver *driver, struct tty_struct * static int tty_reopen(struct tty_struct *tty) { struct tty_driver *driver = tty->driver; + int retval; if (driver->type == TTY_DRIVER_TYPE_PTY && driver->subtype == PTY_TYPE_MASTER) @@ -1267,10 +1268,14 @@ static int tty_reopen(struct tty_struct *tty) tty->count++; - if (!tty->ldisc) - return tty_ldisc_reinit(tty, tty->termios.c_line); + if (tty->ldisc) + return 0; - return 0; + retval = tty_ldisc_reinit(tty, tty->termios.c_line); + if (retval) + tty->count--; + + return retval; } /** -- GitLab From 79f87e09bcb282f08301bccb2edbc2981bb819d5 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 25 Sep 2018 21:06:24 -0700 Subject: [PATCH 0835/2269] of: unittest: Disable interrupt node tests for old world MAC systems commit 8894891446c9380709451b99ab45c5c53adfd2fc upstream. On systems with OF_IMAP_OLDWORLD_MAC set in of_irq_workarounds, the devicetree interrupt parsing code is different, causing unit tests of devicetree interrupt nodes to fail. Due to a bug in unittest code, which tries to dereference an uninitialized pointer, this results in a crash. OF: /testcase-data/phandle-tests/consumer-a: arguments longer than property Unable to handle kernel paging request for data at address 0x00bc616e Faulting instruction address: 0xc08e9468 Oops: Kernel access of bad area, sig: 11 [#1] BE PREEMPT PowerMac Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 4.14.72-rc1-yocto-standard+ #1 task: cf8e0000 task.stack: cf8da000 NIP: c08e9468 LR: c08ea5bc CTR: c08ea5ac REGS: cf8dbb50 TRAP: 0300 Not tainted (4.14.72-rc1-yocto-standard+) MSR: 00001032 CR: 82004044 XER: 00000000 DAR: 00bc616e DSISR: 40000000 GPR00: c08ea5bc cf8dbc00 cf8e0000 c13ca517 c13ca517 c13ca8a0 00000066 00000002 GPR08: 00000063 00bc614e c0b05865 000affff 82004048 00000000 c00047f0 00000000 GPR16: c0a80000 c0a9cc34 c13ca517 c0ad1134 05ffffff 000affff c0b05860 c0abeef8 GPR24: cecec278 cecec278 c0a8c4d0 c0a885e0 c13ca8a0 05ffffff c13ca8a0 c13ca517 NIP [c08e9468] device_node_gen_full_name+0x30/0x15c LR [c08ea5bc] device_node_string+0x190/0x3c8 Call Trace: [cf8dbc00] [c007f670] trace_hardirqs_on_caller+0x118/0x1fc (unreliable) [cf8dbc40] [c08ea5bc] device_node_string+0x190/0x3c8 [cf8dbcb0] [c08eb794] pointer+0x25c/0x4d0 [cf8dbd00] [c08ebcbc] vsnprintf+0x2b4/0x5ec [cf8dbd60] [c08ec00c] vscnprintf+0x18/0x48 [cf8dbd70] [c008e268] vprintk_store+0x4c/0x22c [cf8dbda0] [c008ecac] vprintk_emit+0x94/0x130 [cf8dbdd0] [c008ff54] printk+0x5c/0x6c [cf8dbe10] [c0b8ddd4] of_unittest+0x2220/0x26f8 [cf8dbea0] [c0004434] do_one_initcall+0x4c/0x184 [cf8dbf00] [c0b4534c] kernel_init_freeable+0x13c/0x1d8 [cf8dbf30] [c0004814] kernel_init+0x24/0x118 [cf8dbf40] [c0013398] ret_from_kernel_thread+0x5c/0x64 The problem was observed when running a qemu test for the g3beige machine with devicetree unittests enabled. Disable interrupt node tests on affected systems to avoid both false unittest failures and the crash. With this patch in place, unittest on the affected system passes with the following message. dt-test ### end of unittest - 144 passed, 0 failed Fixes: 53a42093d96ef ("of: Add device tree selftests") Signed-off-by: Guenter Roeck Reviewed-by: Frank Rowand Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/of/unittest.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 985a85f281a82..7c6aff7618009 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -614,6 +614,9 @@ static void __init of_unittest_parse_interrupts(void) struct of_phandle_args args; int i, rc; + if (of_irq_workarounds & OF_IMAP_OLDWORLD_MAC) + return; + np = of_find_node_by_path("/testcase-data/interrupts/interrupts0"); if (!np) { pr_err("missing testcase data\n"); @@ -688,6 +691,9 @@ static void __init of_unittest_parse_interrupts_extended(void) struct of_phandle_args args; int i, rc; + if (of_irq_workarounds & OF_IMAP_OLDWORLD_MAC) + return; + np = of_find_node_by_path("/testcase-data/interrupts/interrupts-extended0"); if (!np) { pr_err("missing testcase data\n"); @@ -844,15 +850,19 @@ static void __init of_unittest_platform_populate(void) pdev = of_find_device_by_node(np); unittest(pdev, "device 1 creation failed\n"); - irq = platform_get_irq(pdev, 0); - unittest(irq == -EPROBE_DEFER, "device deferred probe failed - %d\n", irq); + if (!(of_irq_workarounds & OF_IMAP_OLDWORLD_MAC)) { + irq = platform_get_irq(pdev, 0); + unittest(irq == -EPROBE_DEFER, + "device deferred probe failed - %d\n", irq); - /* Test that a parsing failure does not return -EPROBE_DEFER */ - np = of_find_node_by_path("/testcase-data/testcase-device2"); - pdev = of_find_device_by_node(np); - unittest(pdev, "device 2 creation failed\n"); - irq = platform_get_irq(pdev, 0); - unittest(irq < 0 && irq != -EPROBE_DEFER, "device parsing error failed - %d\n", irq); + /* Test that a parsing failure does not return -EPROBE_DEFER */ + np = of_find_node_by_path("/testcase-data/testcase-device2"); + pdev = of_find_device_by_node(np); + unittest(pdev, "device 2 creation failed\n"); + irq = platform_get_irq(pdev, 0); + unittest(irq < 0 && irq != -EPROBE_DEFER, + "device parsing error failed - %d\n", irq); + } np = of_find_node_by_path("/testcase-data/platform-tests"); unittest(np, "No testcase data in device tree\n"); -- GitLab From ec727693a9ef20da5829c8c68ae0fa4520a3fba6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 14 Mar 2018 10:34:11 -0300 Subject: [PATCH 0836/2269] perf annotate: Use asprintf when formatting objdump command line commit 6810158d526e483868e519befff407b91e76b3db upstream. We were using a local buffer with an arbitrary size, that would have to get increased to avoid truncation as warned by gcc 8: util/annotate.c: In function 'symbol__disassemble': util/annotate.c:1488:4: error: '%s' directive output may be truncated writing up to 4095 bytes into a region of size between 3966 and 8086 [-Werror=format-truncation=] "%s %s%s --start-address=0x%016" PRIx64 ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ util/annotate.c:1498:20: symfs_filename, symfs_filename); ~~~~~~~~~~~~~~ util/annotate.c:1490:50: note: format string is defined here " -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand", ^~ In file included from /usr/include/stdio.h:861, from util/color.h:5, from util/sort.h:8, from util/annotate.c:14: /usr/include/bits/stdio2.h:67:10: note: '__builtin___snprintf_chk' output 116 or more bytes (assuming 8331) into a destination of size 8192 return __builtin___snprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1, ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ __bos (__s), __fmt, __va_arg_pack ()); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ So switch to asprintf, that will make sure enough space is available. Cc: Adrian Hunter Cc: David Ahern Cc: Jin Yao Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-qagoy2dmbjpc9gdnaj0r3mml@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Cc: Ignat Korchagin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/annotate.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index dac76ac117c11..398d4cc2f0e44 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1432,7 +1432,7 @@ int symbol__disassemble(struct symbol *sym, struct map *map, struct arch **parch, char *cpuid) { struct dso *dso = map->dso; - char command[PATH_MAX * 2]; + char *command; struct arch *arch = NULL; FILE *file; char symfs_filename[PATH_MAX]; @@ -1496,7 +1496,7 @@ int symbol__disassemble(struct symbol *sym, struct map *map, strcpy(symfs_filename, tmp); } - snprintf(command, sizeof(command), + err = asprintf(&command, "%s %s%s --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 " -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand", @@ -1509,12 +1509,17 @@ int symbol__disassemble(struct symbol *sym, struct map *map, symbol_conf.annotate_src ? "-S" : "", symfs_filename, symfs_filename); + if (err < 0) { + pr_err("Failure allocating memory for the command to run\n"); + goto out_remove_tmp; + } + pr_debug("Executing: %s\n", command); err = -1; if (pipe(stdout_fd) < 0) { pr_err("Failure creating the pipe to run %s\n", command); - goto out_remove_tmp; + goto out_free_command; } pid = fork(); @@ -1541,7 +1546,7 @@ int symbol__disassemble(struct symbol *sym, struct map *map, * If we were using debug info should retry with * original binary. */ - goto out_remove_tmp; + goto out_free_command; } nline = 0; @@ -1570,6 +1575,8 @@ int symbol__disassemble(struct symbol *sym, struct map *map, fclose(file); err = 0; +out_free_command: + free(command); out_remove_tmp: close(stdout_fd[0]); @@ -1583,7 +1590,7 @@ out: out_close_stdout: close(stdout_fd[1]); - goto out_remove_tmp; + goto out_free_command; } static void insert_source_line(struct rb_root *root, struct source_line *src_line) -- GitLab From 327400b3a7082eea72890cc6d008dd4b48ad0355 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 19 Mar 2018 09:29:02 +0100 Subject: [PATCH 0837/2269] perf tools: Fix python extension build for gcc 8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b7a313d84e853049062011d78cb04b6decd12f5c upstream. The gcc 8 compiler won't compile the python extension code with the following errors (one example): python.c:830:15: error: cast between incompatible function types from \ ‘PyObject * (*)(struct pyrf_evsel *, PyObject *, PyObject *)’ \ uct _object * (*)(struct pyrf_evsel *, struct _object *, struct _object *)’} to \ ‘PyObject * (*)(PyObject *, PyObject *)’ {aka ‘struct _object * (*)(struct _objeuct \ _object *)’} [-Werror=cast-function-type] .ml_meth = (PyCFunction)pyrf_evsel__open, The problem with the PyMethodDef::ml_meth callback is that its type is determined based on the PyMethodDef::ml_flags value, which we set as METH_VARARGS | METH_KEYWORDS. That indicates that the callback is expecting an extra PyObject* arg, and is actually PyCFunctionWithKeywords type, but the base PyMethodDef::ml_meth type stays PyCFunction. Previous gccs did not find this, gcc8 now does. Fixing this by silencing this warning for python.c build. Commiter notes: Do not do that for CC=clang, as it breaks the build in some clang versions, like the ones in fedora up to fedora27: fedora:25:error: unknown warning option '-Wno-cast-function-type'; did you mean '-Wno-bad-function-cast'? [-Werror,-Wunknown-warning-option] fedora:26:error: unknown warning option '-Wno-cast-function-type'; did you mean '-Wno-bad-function-cast'? [-Werror,-Wunknown-warning-option] fedora:27:error: unknown warning option '-Wno-cast-function-type'; did you mean '-Wno-bad-function-cast'? [-Werror,-Wunknown-warning-option] # those have: clang version 3.9.1 (tags/RELEASE_391/final) The one in rawhide accepts that: clang version 6.0.0 (tags/RELEASE_600/final) Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Josh Poimboeuf Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sergey Senozhatsky Link: http://lkml.kernel.org/r/20180319082902.4518-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Cc: Ignat Korchagin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index af415febbc46e..da4df7fd43a2f 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -28,6 +28,8 @@ class install_lib(_install_lib): cflags = getenv('CFLAGS', '').split() # switch off several checks (need to be at the end of cflags list) cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ] +if cc != "clang": + cflags += ['-Wno-cast-function-type' ] src_perf = getenv('srctree') + '/tools/perf' build_lib = getenv('PYTHON_EXTBUILD_LIB') -- GitLab From 8146256b7dcdb1e98a1f6435501ca76396c42f20 Mon Sep 17 00:00:00 2001 From: Carl Huang Date: Mon, 5 Mar 2018 14:44:02 +0800 Subject: [PATCH 0838/2269] ath10k: fix use-after-free in ath10k_wmi_cmd_send_nowait commit 9ef0f58ed7b4a55da4a64641d538e0d9e46579ac upstream. The skb may be freed in tx completion context before trace_ath10k_wmi_cmd is called. This can be easily captured when KASAN(Kernel Address Sanitizer) is enabled. The fix is to move trace_ath10k_wmi_cmd before the send operation. As the ret has no meaning in trace_ath10k_wmi_cmd then, so remove this parameter too. Signed-off-by: Carl Huang Tested-by: Brian Norris Reviewed-by: Brian Norris Signed-off-by: Kalle Valo Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/trace.h | 12 ++++-------- drivers/net/wireless/ath/ath10k/wmi.c | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/trace.h b/drivers/net/wireless/ath/ath10k/trace.h index e0d00cef0bd8d..5b974bb76e6cc 100644 --- a/drivers/net/wireless/ath/ath10k/trace.h +++ b/drivers/net/wireless/ath/ath10k/trace.h @@ -152,10 +152,9 @@ TRACE_EVENT(ath10k_log_dbg_dump, ); TRACE_EVENT(ath10k_wmi_cmd, - TP_PROTO(struct ath10k *ar, int id, const void *buf, size_t buf_len, - int ret), + TP_PROTO(struct ath10k *ar, int id, const void *buf, size_t buf_len), - TP_ARGS(ar, id, buf, buf_len, ret), + TP_ARGS(ar, id, buf, buf_len), TP_STRUCT__entry( __string(device, dev_name(ar->dev)) @@ -163,7 +162,6 @@ TRACE_EVENT(ath10k_wmi_cmd, __field(unsigned int, id) __field(size_t, buf_len) __dynamic_array(u8, buf, buf_len) - __field(int, ret) ), TP_fast_assign( @@ -171,17 +169,15 @@ TRACE_EVENT(ath10k_wmi_cmd, __assign_str(driver, dev_driver_string(ar->dev)); __entry->id = id; __entry->buf_len = buf_len; - __entry->ret = ret; memcpy(__get_dynamic_array(buf), buf, buf_len); ), TP_printk( - "%s %s id %d len %zu ret %d", + "%s %s id %d len %zu", __get_str(driver), __get_str(device), __entry->id, - __entry->buf_len, - __entry->ret + __entry->buf_len ) ); diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 38a97086708b5..2ab5311659ea9 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -1741,8 +1741,8 @@ int ath10k_wmi_cmd_send_nowait(struct ath10k *ar, struct sk_buff *skb, cmd_hdr->cmd_id = __cpu_to_le32(cmd); memset(skb_cb, 0, sizeof(*skb_cb)); + trace_ath10k_wmi_cmd(ar, cmd_id, skb->data, skb->len); ret = ath10k_htc_send(&ar->htc, ar->wmi.eid, skb); - trace_ath10k_wmi_cmd(ar, cmd_id, skb->data, skb->len, ret); if (ret) goto err_pull; -- GitLab From 1b2ad48a85c4011f2cb620fa96fd50645bd11263 Mon Sep 17 00:00:00 2001 From: Yu Wang Date: Tue, 30 Jan 2018 14:06:08 +0200 Subject: [PATCH 0839/2269] ath10k: fix kernel panic issue during pci probe commit 50e79e25250bf928369996277e85b00536b380c7 upstream. If device gone during chip reset, ar->normal_mode_fw.board is not initialized, but ath10k_debug_print_hwfw_info() will try to access its member, which will cause 'kernel NULL pointer' issue. This was found using a faulty device (pci link went down sometimes) in a random insmod/rmmod/other-op test. To fix it, check ar->normal_mode_fw.board before accessing the member. pci 0000:02:00.0: BAR 0: assigned [mem 0xf7400000-0xf75fffff 64bit] ath10k_pci 0000:02:00.0: enabling device (0000 -> 0002) ath10k_pci 0000:02:00.0: pci irq msi oper_irq_mode 2 irq_mode 0 reset_mode 0 ath10k_pci 0000:02:00.0: failed to read device register, device is gone ath10k_pci 0000:02:00.0: failed to wait for target init: -5 ath10k_pci 0000:02:00.0: failed to warm reset: -5 ath10k_pci 0000:02:00.0: firmware crashed during chip reset ath10k_pci 0000:02:00.0: firmware crashed! (uuid 5d018951-b8e1-404a-8fde-923078b4423a) ath10k_pci 0000:02:00.0: (null) target 0x00000000 chip_id 0x00340aff sub 0000:0000 ath10k_pci 0000:02:00.0: kconfig debug 1 debugfs 1 tracing 1 dfs 1 testmode 1 ath10k_pci 0000:02:00.0: firmware ver api 0 features crc32 00000000 ... BUG: unable to handle kernel NULL pointer dereference at 00000004 ... Call Trace: [] ath10k_print_driver_info+0x12/0x20 [ath10k_core] [] ath10k_pci_fw_crashed_dump+0x6d/0x4d0 [ath10k_pci] [] ? ath10k_pci_sleep.part.19+0x57/0xc0 [ath10k_pci] [] ath10k_pci_hif_power_up+0x14e/0x1b0 [ath10k_pci] [] ? do_page_fault+0xb/0x10 [] ath10k_core_register_work+0x24/0x840 [ath10k_core] [] ? netlbl_unlhsh_remove+0x178/0x410 [] ? __do_page_fault+0x480/0x480 [] process_one_work+0x114/0x3e0 [] worker_thread+0x37/0x4a0 [] kthread+0xa4/0xc0 [] ? create_worker+0x180/0x180 [] ? kthread_park+0x50/0x50 [] ret_from_fork+0x1b/0x28 Code: 78 80 b8 50 09 00 00 00 75 5d 8d 75 94 c7 44 24 08 aa d7 52 fb c7 44 24 04 64 00 00 00 89 34 24 e8 82 52 e2 c5 8b 83 dc 08 00 00 <8b> 50 04 8b 08 31 c0 e8 20 57 e3 c5 89 44 24 10 8b 83 58 09 00 EIP: []- ath10k_debug_print_board_info+0x34/0xb0 [ath10k_core] SS:ESP 0068:f4921d90 CR2: 0000000000000004 Signed-off-by: Yu Wang Signed-off-by: Kalle Valo [AmitP: Minor rebasing for 4.14.y and 4.9.y] Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/debug.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c index df514507d3f12..22003895f8548 100644 --- a/drivers/net/wireless/ath/ath10k/debug.c +++ b/drivers/net/wireless/ath/ath10k/debug.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2013 Qualcomm Atheros, Inc. + * Copyright (c) 2018, The Linux Foundation. All rights reserved. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -163,6 +164,8 @@ void ath10k_debug_print_hwfw_info(struct ath10k *ar) void ath10k_debug_print_board_info(struct ath10k *ar) { char boardinfo[100]; + const struct firmware *board; + u32 crc; if (ar->id.bmi_ids_valid) scnprintf(boardinfo, sizeof(boardinfo), "%d:%d", @@ -170,11 +173,16 @@ void ath10k_debug_print_board_info(struct ath10k *ar) else scnprintf(boardinfo, sizeof(boardinfo), "N/A"); + board = ar->normal_mode_fw.board; + if (!IS_ERR_OR_NULL(board)) + crc = crc32_le(0, board->data, board->size); + else + crc = 0; + ath10k_info(ar, "board_file api %d bmi_id %s crc32 %08x", ar->bd_api, boardinfo, - crc32_le(0, ar->normal_mode_fw.board->data, - ar->normal_mode_fw.board->size)); + crc); } void ath10k_debug_print_boot_info(struct ath10k *ar) -- GitLab From 0f6e2f4e06be4da35c1b9e52d638218bafa91e25 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 13 Mar 2018 09:48:07 -0700 Subject: [PATCH 0840/2269] nvme_fc: fix ctrl create failures racing with workq items commit cf25809bec2c7df4b45df5b2196845d9a4a3c89b upstream. If there are errors during initial controller create, the transport will teardown the partially initialized controller struct and free the ctlr memory. Trouble is - most of those errors can occur due to asynchronous events happening such io timeouts and subsystem connectivity failures. Those failures invoke async workq items to reset the controller and attempt reconnect. Those may be in progress as the main thread frees the ctrl memory, resulting in NULL ptr oops. Prevent this from happening by having the main ctrl failure thread changing state to DELETING followed by synchronously cancelling any pending queued work item. The change of state will prevent the scheduling of resets or reconnect events. Signed-off-by: James Smart Signed-off-by: Keith Busch Signed-off-by: Jens Axboe Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/fc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 7deb7b5d86837..058d542647dd5 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2868,6 +2868,10 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, } if (ret) { + nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING); + cancel_work_sync(&ctrl->ctrl.reset_work); + cancel_delayed_work_sync(&ctrl->connect_work); + /* couldn't schedule retry - fail out */ dev_err(ctrl->ctrl.device, "NVME-FC{%d}: Connect retry failed\n", ctrl->cnum); -- GitLab From 4e43fbc8ef2598cfea7704f6e496db70fe86e6d1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 24 Nov 2017 08:31:07 +0100 Subject: [PATCH 0841/2269] powerpc/lib/code-patching: refactor patch_instruction() commit 8cf4c05712f04a405f0dacebcca8f042b391694a upstream. patch_instruction() uses almost the same sequence as __patch_instruction() This patch refactor it so that patch_instruction() uses __patch_instruction() instead of duplicating code. Signed-off-by: Christophe Leroy Acked-by: Balbir Singh Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/lib/code-patching.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 096d4e4d31e63..e1c58937281fd 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -23,19 +23,26 @@ #include #include -static int __patch_instruction(unsigned int *addr, unsigned int instr) +static int __patch_instruction(unsigned int *exec_addr, unsigned int instr, + unsigned int *patch_addr) { int err; - __put_user_size(instr, addr, 4, err); + __put_user_size(instr, patch_addr, 4, err); if (err) return err; - asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" :: "r" (addr)); + asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr), + "r" (exec_addr)); return 0; } +static int raw_patch_instruction(unsigned int *addr, unsigned int instr) +{ + return __patch_instruction(addr, instr, addr); +} + #ifdef CONFIG_STRICT_KERNEL_RWX static DEFINE_PER_CPU(struct vm_struct *, text_poke_area); @@ -138,7 +145,7 @@ static inline int unmap_patch_area(unsigned long addr) int patch_instruction(unsigned int *addr, unsigned int instr) { int err; - unsigned int *dest = NULL; + unsigned int *patch_addr = NULL; unsigned long flags; unsigned long text_poke_addr; unsigned long kaddr = (unsigned long)addr; @@ -149,7 +156,7 @@ int patch_instruction(unsigned int *addr, unsigned int instr) * to allow patching. We just do the plain old patching */ if (!this_cpu_read(*PTRRELOC(&text_poke_area))) - return __patch_instruction(addr, instr); + return raw_patch_instruction(addr, instr); local_irq_save(flags); @@ -159,17 +166,10 @@ int patch_instruction(unsigned int *addr, unsigned int instr) goto out; } - dest = (unsigned int *)(text_poke_addr) + + patch_addr = (unsigned int *)(text_poke_addr) + ((kaddr & ~PAGE_MASK) / sizeof(unsigned int)); - /* - * We use __put_user_size so that we can handle faults while - * writing to dest and return err to handle faults gracefully - */ - __put_user_size(instr, dest, 4, err); - if (!err) - asm ("dcbst 0, %0; sync; icbi 0,%0; icbi 0,%1; sync; isync" - ::"r" (dest), "r"(addr)); + __patch_instruction(addr, instr, patch_addr); err = unmap_patch_area(text_poke_addr); if (err) @@ -184,7 +184,7 @@ out: int patch_instruction(unsigned int *addr, unsigned int instr) { - return __patch_instruction(addr, instr); + return raw_patch_instruction(addr, instr); } #endif /* CONFIG_STRICT_KERNEL_RWX */ -- GitLab From 609fbeddb24c4035d24fc32d82dc08b30ae3dfc0 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 14 Sep 2018 11:14:11 +1000 Subject: [PATCH 0842/2269] powerpc: Avoid code patching freed init sections commit 51c3c62b58b357e8d35e4cc32f7b4ec907426fe3 upstream. This stops us from doing code patching in init sections after they've been freed. In this chain: kvm_guest_init() -> kvm_use_magic_page() -> fault_in_pages_readable() -> __get_user() -> __get_user_nocheck() -> barrier_nospec(); We have a code patching location at barrier_nospec() and kvm_guest_init() is an init function. This whole chain gets inlined, so when we free the init section (hence kvm_guest_init()), this code goes away and hence should no longer be patched. We seen this as userspace memory corruption when using a memory checker while doing partition migration testing on powervm (this starts the code patching post migration via /sys/kernel/mobility/migration). In theory, it could also happen when using /sys/kernel/debug/powerpc/barrier_nospec. Cc: stable@vger.kernel.org # 4.13+ Signed-off-by: Michael Neuling Reviewed-by: Nicholas Piggin Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/setup.h | 1 + arch/powerpc/lib/code-patching.c | 7 +++++++ arch/powerpc/mm/mem.c | 2 ++ 3 files changed, 10 insertions(+) diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index bbcdf929be544..a5e919e34c421 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -9,6 +9,7 @@ extern void ppc_printk_progress(char *s, unsigned short hex); extern unsigned int rtas_data; extern unsigned long long memory_limit; +extern bool init_mem_is_free; extern unsigned long klimit; extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index e1c58937281fd..0054b0be89853 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -22,12 +22,19 @@ #include #include #include +#include static int __patch_instruction(unsigned int *exec_addr, unsigned int instr, unsigned int *patch_addr) { int err; + /* Make sure we aren't patching a freed init section */ + if (init_mem_is_free && init_section_contains(exec_addr, 4)) { + pr_debug("Skipping init section patching addr: 0x%px\n", exec_addr); + return 0; + } + __put_user_size(instr, patch_addr, 4, err); if (err) return err; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 9c2f83331e5b7..30bf13b72e5e5 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -63,6 +63,7 @@ #endif unsigned long long memory_limit; +bool init_mem_is_free; #ifdef CONFIG_HIGHMEM pte_t *kmap_pte; @@ -405,6 +406,7 @@ void free_initmem(void) { ppc_md.progress = ppc_printk_progress; mark_initmem_nx(); + init_mem_is_free = true; free_initmem_default(POISON_FREE_INITMEM); } -- GitLab From af1a8101794dfea897290e057f61086dabfe6c91 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 1 Oct 2018 12:21:10 +0000 Subject: [PATCH 0843/2269] powerpc/lib: fix book3s/32 boot failure due to code patching commit b45ba4a51cde29b2939365ef0c07ad34c8321789 upstream. Commit 51c3c62b58b3 ("powerpc: Avoid code patching freed init sections") accesses 'init_mem_is_free' flag too early, before the kernel is relocated. This provokes early boot failure (before the console is active). As it is not necessary to do this verification that early, this patch moves the test into patch_instruction() instead of __patch_instruction(). This modification also has the advantage of avoiding unnecessary remappings. Fixes: 51c3c62b58b3 ("powerpc: Avoid code patching freed init sections") Cc: stable@vger.kernel.org # 4.13+ Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/lib/code-patching.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 0054b0be89853..882c750dc519e 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -29,12 +29,6 @@ static int __patch_instruction(unsigned int *exec_addr, unsigned int instr, { int err; - /* Make sure we aren't patching a freed init section */ - if (init_mem_is_free && init_section_contains(exec_addr, 4)) { - pr_debug("Skipping init section patching addr: 0x%px\n", exec_addr); - return 0; - } - __put_user_size(instr, patch_addr, 4, err); if (err) return err; @@ -149,7 +143,7 @@ static inline int unmap_patch_area(unsigned long addr) return 0; } -int patch_instruction(unsigned int *addr, unsigned int instr) +static int do_patch_instruction(unsigned int *addr, unsigned int instr) { int err; unsigned int *patch_addr = NULL; @@ -189,12 +183,22 @@ out: } #else /* !CONFIG_STRICT_KERNEL_RWX */ -int patch_instruction(unsigned int *addr, unsigned int instr) +static int do_patch_instruction(unsigned int *addr, unsigned int instr) { return raw_patch_instruction(addr, instr); } #endif /* CONFIG_STRICT_KERNEL_RWX */ + +int patch_instruction(unsigned int *addr, unsigned int instr) +{ + /* Make sure we aren't patching a freed init section */ + if (init_mem_is_free && init_section_contains(addr, 4)) { + pr_debug("Skipping init section patching addr: 0x%px\n", addr); + return 0; + } + return do_patch_instruction(addr, instr); +} NOKPROBE_SYMBOL(patch_instruction); int patch_branch(unsigned int *addr, unsigned long target, int flags) -- GitLab From b5dcd4ab8e6c1368048002b8f648a434c4bacae6 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 5 Oct 2018 12:48:48 -0700 Subject: [PATCH 0844/2269] ARC: clone syscall to setp r25 as thread pointer commit c58a584f05e35d1d4342923cd7aac07d9c3d3d16 upstream. Per ARC TLS ABI, r25 is designated TP (thread pointer register). However so far kernel didn't do any special treatment, like setting up usermode r25, even for CLONE_SETTLS. We instead relied on libc runtime to do this, in say clone libc wrapper [1]. This was deliberate to keep kernel ABI agnostic (userspace could potentially change TP, specially for different ARC ISA say ARCompact vs. ARCv2 with different spare registers etc) However userspace setting up r25, after clone syscall opens a race, if child is not scheduled and gets a signal instead. It starts off in userspace not in clone but in a signal handler and anything TP sepcific there such as pthread_self() fails which showed up with uClibc testsuite nptl/tst-kill6 [2] Fix this by having kernel populate r25 to TP value. So this locks in ABI, but it was not going to change anyways, and fwiw is same for both ARCompact (arc700 core) and ARCvs (HS3x cores) [1] https://cgit.uclibc-ng.org/cgi/cgit/uclibc-ng.git/tree/libc/sysdeps/linux/arc/clone.S [2] https://github.com/wbx-github/uclibc-ng-test/blob/master/test/nptl/tst-kill6.c Fixes: ARC STAR 9001378481 Cc: stable@vger.kernel.org Reported-by: Nikita Sobolev Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/kernel/process.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index 4674541eba3fd..8ce6e72359155 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -241,6 +241,26 @@ int copy_thread(unsigned long clone_flags, task_thread_info(current)->thr_ptr; } + + /* + * setup usermode thread pointer #1: + * when child is picked by scheduler, __switch_to() uses @c_callee to + * populate usermode callee regs: this works (despite being in a kernel + * function) since special return path for child @ret_from_fork() + * ensures those regs are not clobbered all the way to RTIE to usermode + */ + c_callee->r25 = task_thread_info(p)->thr_ptr; + +#ifdef CONFIG_ARC_CURR_IN_REG + /* + * setup usermode thread pointer #2: + * however for this special use of r25 in kernel, __switch_to() sets + * r25 for kernel needs and only in the final return path is usermode + * r25 setup, from pt_regs->user_r25. So set that up as well + */ + c_regs->user_r25 = c_callee->r25; +#endif + return 0; } -- GitLab From 75fc05a20f5fa0b4e1ccfbf14130c3842adcdbe5 Mon Sep 17 00:00:00 2001 From: Harsh Jain Date: Wed, 19 Sep 2018 22:42:16 +0530 Subject: [PATCH 0845/2269] crypto: chelsio - Fix memory corruption in DMA Mapped buffers. commit add92a817e60e308a419693413a38d9d1e663aff upstream. Update PCI Id in "cpl_rx_phys_dsgl" header. In case pci_chan_id and tx_chan_id are not derived from same queue, H/W can send request completion indication before completing DMA Transfer. Herbert, It would be good if fix can be merge to stable tree. For 4.14 kernel, It requires some update to avoid mege conficts. Cc: Signed-off-by: Harsh Jain Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/chelsio/chcr_algo.c | 41 ++++++++++++++++++---------- drivers/crypto/chelsio/chcr_crypto.h | 2 ++ 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index 0e81607018331..bb7b59fc5c08b 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -384,7 +384,8 @@ static inline int is_hmac(struct crypto_tfm *tfm) static void write_phys_cpl(struct cpl_rx_phys_dsgl *phys_cpl, struct scatterlist *sg, - struct phys_sge_parm *sg_param) + struct phys_sge_parm *sg_param, + int pci_chan_id) { struct phys_sge_pairs *to; unsigned int len = 0, left_size = sg_param->obsize; @@ -402,6 +403,7 @@ static void write_phys_cpl(struct cpl_rx_phys_dsgl *phys_cpl, phys_cpl->rss_hdr_int.opcode = CPL_RX_PHYS_ADDR; phys_cpl->rss_hdr_int.qid = htons(sg_param->qid); phys_cpl->rss_hdr_int.hash_val = 0; + phys_cpl->rss_hdr_int.channel = pci_chan_id; to = (struct phys_sge_pairs *)((unsigned char *)phys_cpl + sizeof(struct cpl_rx_phys_dsgl)); for (i = 0; nents && left_size; to++) { @@ -418,7 +420,8 @@ static void write_phys_cpl(struct cpl_rx_phys_dsgl *phys_cpl, static inline int map_writesg_phys_cpl(struct device *dev, struct cpl_rx_phys_dsgl *phys_cpl, struct scatterlist *sg, - struct phys_sge_parm *sg_param) + struct phys_sge_parm *sg_param, + int pci_chan_id) { if (!sg || !sg_param->nents) return -EINVAL; @@ -428,7 +431,7 @@ static inline int map_writesg_phys_cpl(struct device *dev, pr_err("CHCR : DMA mapping failed\n"); return -EINVAL; } - write_phys_cpl(phys_cpl, sg, sg_param); + write_phys_cpl(phys_cpl, sg, sg_param, pci_chan_id); return 0; } @@ -608,7 +611,7 @@ static inline void create_wreq(struct chcr_context *ctx, is_iv ? iv_loc : IV_NOP, !!lcb, ctx->tx_qidx); - chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->dev->tx_channel_id, + chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->tx_chan_id, qid); chcr_req->ulptx.len = htonl((DIV_ROUND_UP((calc_tx_flits_ofld(skb) * 8), 16) - ((sizeof(chcr_req->wreq)) >> 4))); @@ -698,7 +701,8 @@ static struct sk_buff *create_cipher_wr(struct cipher_wr_param *wrparam) sg_param.obsize = wrparam->bytes; sg_param.qid = wrparam->qid; error = map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, - reqctx->dst, &sg_param); + reqctx->dst, &sg_param, + ctx->pci_chan_id); if (error) goto map_fail1; @@ -1228,16 +1232,23 @@ static int chcr_device_init(struct chcr_context *ctx) adap->vres.ncrypto_fc); rxq_perchan = u_ctx->lldi.nrxq / u_ctx->lldi.nchan; txq_perchan = ntxq / u_ctx->lldi.nchan; - rxq_idx = ctx->dev->tx_channel_id * rxq_perchan; - rxq_idx += id % rxq_perchan; - txq_idx = ctx->dev->tx_channel_id * txq_perchan; - txq_idx += id % txq_perchan; spin_lock(&ctx->dev->lock_chcr_dev); - ctx->rx_qidx = rxq_idx; - ctx->tx_qidx = txq_idx; + ctx->tx_chan_id = ctx->dev->tx_channel_id; ctx->dev->tx_channel_id = !ctx->dev->tx_channel_id; ctx->dev->rx_channel_id = 0; spin_unlock(&ctx->dev->lock_chcr_dev); + rxq_idx = ctx->tx_chan_id * rxq_perchan; + rxq_idx += id % rxq_perchan; + txq_idx = ctx->tx_chan_id * txq_perchan; + txq_idx += id % txq_perchan; + ctx->rx_qidx = rxq_idx; + ctx->tx_qidx = txq_idx; + /* Channel Id used by SGE to forward packet to Host. + * Same value should be used in cpl_fw6_pld RSS_CH field + * by FW. Driver programs PCI channel ID to be used in fw + * at the time of queue allocation with value "pi->tx_chan" + */ + ctx->pci_chan_id = txq_idx / txq_perchan; } out: return err; @@ -2066,7 +2077,8 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req, sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize); sg_param.qid = qid; error = map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, - reqctx->dst, &sg_param); + reqctx->dst, &sg_param, + ctx->pci_chan_id); if (error) goto dstmap_fail; @@ -2389,7 +2401,7 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req, sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize); sg_param.qid = qid; error = map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, - reqctx->dst, &sg_param); + reqctx->dst, &sg_param, ctx->pci_chan_id); if (error) goto dstmap_fail; @@ -2545,7 +2557,8 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req, sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize); sg_param.qid = qid; error = map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, - reqctx->dst, &sg_param); + reqctx->dst, &sg_param, + ctx->pci_chan_id); if (error) goto dstmap_fail; diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h index 30af1ee17b876..e039d9aeb6512 100644 --- a/drivers/crypto/chelsio/chcr_crypto.h +++ b/drivers/crypto/chelsio/chcr_crypto.h @@ -222,6 +222,8 @@ struct chcr_context { struct chcr_dev *dev; unsigned char tx_qidx; unsigned char rx_qidx; + unsigned char tx_chan_id; + unsigned char pci_chan_id; struct __crypto_ctx crypto_ctx[0]; }; -- GitLab From dfe96e30b5a5cbd4b6a4806ea7f0f779cc4ee015 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 6 Dec 2017 18:45:35 +0100 Subject: [PATCH 0846/2269] perf utils: Move is_directory() to path.h commit 06c3f2aa9fc68e7f3fe3d83e7569d2a2801d9f99 upstream. So that it can be used more widely, like in the next patch, when it will be used to fix a bug in 'perf test' handling of dirent.d_type == DT_UNKNOWN. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171206174535.25380-1-jolsa@kernel.org [ Split from a larger patch, removed needless includes in path.h ] Signed-off-by: Arnaldo Carvalho de Melo Cc: Ignat Korchagin Signed-off-by: Greg Kroah-Hartman --- tools/perf/builtin-script.c | 14 +------------- tools/perf/util/path.c | 14 ++++++++++++++ tools/perf/util/path.h | 3 +++ 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 615fdc63452ee..e37653b0f2d0c 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -25,6 +25,7 @@ #include "util/string2.h" #include "util/thread-stack.h" #include "util/time-utils.h" +#include "util/path.h" #include "print_binary.h" #include #include @@ -2129,19 +2130,6 @@ out: return rc; } -/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */ -static int is_directory(const char *base_path, const struct dirent *dent) -{ - char path[PATH_MAX]; - struct stat st; - - sprintf(path, "%s/%s", base_path, dent->d_name); - if (stat(path, &st)) - return 0; - - return S_ISDIR(st.st_mode); -} - #define for_each_lang(scripts_path, scripts_dir, lang_dirent) \ while ((lang_dirent = readdir(scripts_dir)) != NULL) \ if ((lang_dirent->d_type == DT_DIR || \ diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index 933f5c6bffb45..ca56ba2dd3da6 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c @@ -18,6 +18,7 @@ #include #include #include +#include #include static char bad_path[] = "/bad-path/"; @@ -77,3 +78,16 @@ bool is_regular_file(const char *file) return S_ISREG(st.st_mode); } + +/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */ +bool is_directory(const char *base_path, const struct dirent *dent) +{ + char path[PATH_MAX]; + struct stat st; + + sprintf(path, "%s/%s", base_path, dent->d_name); + if (stat(path, &st)) + return false; + + return S_ISDIR(st.st_mode); +} diff --git a/tools/perf/util/path.h b/tools/perf/util/path.h index 14a254ada7eb4..f014f905df509 100644 --- a/tools/perf/util/path.h +++ b/tools/perf/util/path.h @@ -2,9 +2,12 @@ #ifndef _PERF_PATH_H #define _PERF_PATH_H +struct dirent; + int path__join(char *bf, size_t size, const char *path1, const char *path2); int path__join3(char *bf, size_t size, const char *path1, const char *path2, const char *path3); bool is_regular_file(const char *file); +bool is_directory(const char *base_path, const struct dirent *dent); #endif /* _PERF_PATH_H */ -- GitLab From 3a7a9fb68c97ed1891fdd3843947e1d68e0252e8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 2 Aug 2018 22:59:12 +0800 Subject: [PATCH 0847/2269] f2fs: fix invalid memory access commit d3f07c049dab1a3f1740f476afd3d5e5b738c21c upstream. syzbot found the following crash on: HEAD commit: d9bd94c0bcaa Add linux-next specific files for 20180801 git tree: linux-next console output: https://syzkaller.appspot.com/x/log.txt?x=1001189c400000 kernel config: https://syzkaller.appspot.com/x/.config?x=cc8964ea4d04518c dashboard link: https://syzkaller.appspot.com/bug?extid=c966a82db0b14aa37e81 compiler: gcc (GCC) 8.0.1 20180413 (experimental) Unfortunately, I don't have any reproducer for this crash yet. IMPORTANT: if you fix the bug, please add the following tag to the commit: Reported-by: syzbot+c966a82db0b14aa37e81@syzkaller.appspotmail.com loop7: rw=12288, want=8200, limit=20 netlink: 65342 bytes leftover after parsing attributes in process `syz-executor4'. openvswitch: netlink: Message has 8 unknown bytes. kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN CPU: 1 PID: 7615 Comm: syz-executor7 Not tainted 4.18.0-rc7-next-20180801+ #29 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__read_once_size include/linux/compiler.h:188 [inline] RIP: 0010:compound_head include/linux/page-flags.h:142 [inline] RIP: 0010:PageLocked include/linux/page-flags.h:272 [inline] RIP: 0010:f2fs_put_page fs/f2fs/f2fs.h:2011 [inline] RIP: 0010:validate_checkpoint+0x66d/0xec0 fs/f2fs/checkpoint.c:835 Code: e8 58 05 7f fe 4c 8d 6b 80 4d 8d 74 24 08 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 c6 04 02 00 4c 89 f2 48 c1 ea 03 <80> 3c 02 00 0f 85 f4 06 00 00 4c 89 ea 4d 8b 7c 24 08 48 b8 00 00 RSP: 0018:ffff8801937cebe8 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: ffff8801937cef30 RCX: ffffc90006035000 RDX: 0000000000000000 RSI: ffffffff82fd9658 RDI: 0000000000000005 RBP: ffff8801937cef58 R08: ffff8801ab254700 R09: fffff94000d9e026 R10: fffff94000d9e026 R11: ffffea0006cf0137 R12: fffffffffffffffb R13: ffff8801937ceeb0 R14: 0000000000000003 R15: ffff880193419b40 FS: 00007f36a61d5700(0000) GS:ffff8801db100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fc04ff93000 CR3: 00000001d0562000 CR4: 00000000001426e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: f2fs_get_valid_checkpoint+0x436/0x1ec0 fs/f2fs/checkpoint.c:860 f2fs_fill_super+0x2d42/0x8110 fs/f2fs/super.c:2883 mount_bdev+0x314/0x3e0 fs/super.c:1344 f2fs_mount+0x3c/0x50 fs/f2fs/super.c:3133 legacy_get_tree+0x131/0x460 fs/fs_context.c:729 vfs_get_tree+0x1cb/0x5c0 fs/super.c:1743 do_new_mount fs/namespace.c:2603 [inline] do_mount+0x6f2/0x1e20 fs/namespace.c:2927 ksys_mount+0x12d/0x140 fs/namespace.c:3143 __do_sys_mount fs/namespace.c:3157 [inline] __se_sys_mount fs/namespace.c:3154 [inline] __x64_sys_mount+0xbe/0x150 fs/namespace.c:3154 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x45943a Code: b8 a6 00 00 00 0f 05 48 3d 01 f0 ff ff 0f 83 bd 8a fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 0f 83 9a 8a fb ff c3 66 0f 1f 84 00 00 00 00 00 RSP: 002b:00007f36a61d4a88 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 RAX: ffffffffffffffda RBX: 00007f36a61d4b30 RCX: 000000000045943a RDX: 00007f36a61d4ad0 RSI: 0000000020000100 RDI: 00007f36a61d4af0 RBP: 0000000020000100 R08: 00007f36a61d4b30 R09: 00007f36a61d4ad0 R10: 0000000000000000 R11: 0000000000000206 R12: 0000000000000013 R13: 0000000000000000 R14: 00000000004c8ea0 R15: 0000000000000000 Modules linked in: Dumping ftrace buffer: (ftrace buffer empty) ---[ end trace bd8550c129352286 ]--- RIP: 0010:__read_once_size include/linux/compiler.h:188 [inline] RIP: 0010:compound_head include/linux/page-flags.h:142 [inline] RIP: 0010:PageLocked include/linux/page-flags.h:272 [inline] RIP: 0010:f2fs_put_page fs/f2fs/f2fs.h:2011 [inline] RIP: 0010:validate_checkpoint+0x66d/0xec0 fs/f2fs/checkpoint.c:835 Code: e8 58 05 7f fe 4c 8d 6b 80 4d 8d 74 24 08 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 c6 04 02 00 4c 89 f2 48 c1 ea 03 <80> 3c 02 00 0f 85 f4 06 00 00 4c 89 ea 4d 8b 7c 24 08 48 b8 00 00 RSP: 0018:ffff8801937cebe8 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: ffff8801937cef30 RCX: ffffc90006035000 RDX: 0000000000000000 RSI: ffffffff82fd9658 RDI: 0000000000000005 netlink: 65342 bytes leftover after parsing attributes in process `syz-executor4'. RBP: ffff8801937cef58 R08: ffff8801ab254700 R09: fffff94000d9e026 openvswitch: netlink: Message has 8 unknown bytes. R10: fffff94000d9e026 R11: ffffea0006cf0137 R12: fffffffffffffffb R13: ffff8801937ceeb0 R14: 0000000000000003 R15: ffff880193419b40 FS: 00007f36a61d5700(0000) GS:ffff8801db100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fc04ff93000 CR3: 00000001d0562000 CR4: 00000000001426e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 In validate_checkpoint(), if we failed to call get_checkpoint_version(), we will pass returned invalid page pointer into f2fs_put_page, cause accessing invalid memory, this patch tries to handle error path correctly to fix this issue. Signed-off-by: Chao Yu Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index c282e21f5b5e2..41fce930f44ce 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -708,6 +708,7 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, crc_offset = le32_to_cpu((*cp_block)->checksum_offset); if (crc_offset > (blk_size - sizeof(__le32))) { + f2fs_put_page(*cp_page, 1); f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc_offset: %zu", crc_offset); return -EINVAL; @@ -715,6 +716,7 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, crc = cur_cp_crc(*cp_block); if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) { + f2fs_put_page(*cp_page, 1); f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value"); return -EINVAL; } @@ -734,14 +736,14 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, err = get_checkpoint_version(sbi, cp_addr, &cp_block, &cp_page_1, version); if (err) - goto invalid_cp1; + return NULL; pre_version = *version; cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; err = get_checkpoint_version(sbi, cp_addr, &cp_block, &cp_page_2, version); if (err) - goto invalid_cp2; + goto invalid_cp; cur_version = *version; if (cur_version == pre_version) { @@ -749,9 +751,8 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, f2fs_put_page(cp_page_2, 1); return cp_page_1; } -invalid_cp2: f2fs_put_page(cp_page_2, 1); -invalid_cp1: +invalid_cp: f2fs_put_page(cp_page_1, 1); return NULL; } -- GitLab From 5656b735418363abdeda50027c627fb9542336f6 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 12 Sep 2018 16:27:44 -0700 Subject: [PATCH 0848/2269] ucma: fix a use-after-free in ucma_resolve_ip() commit 5fe23f262e0548ca7f19fb79f89059a60d087d22 upstream. There is a race condition between ucma_close() and ucma_resolve_ip(): CPU0 CPU1 ucma_resolve_ip(): ucma_close(): ctx = ucma_get_ctx(file, cmd.id); list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) { mutex_lock(&mut); idr_remove(&ctx_idr, ctx->id); mutex_unlock(&mut); ... mutex_lock(&mut); if (!ctx->closing) { mutex_unlock(&mut); rdma_destroy_id(ctx->cm_id); ... ucma_free_ctx(ctx); ret = rdma_resolve_addr(); ucma_put_ctx(ctx); Before idr_remove(), ucma_get_ctx() could still find the ctx and after rdma_destroy_id(), rdma_resolve_addr() may still access id_priv pointer. Also, ucma_put_ctx() may use ctx after ucma_free_ctx() too. ucma_close() should call ucma_put_ctx() too which tests the refcnt and waits for the last one releasing it. The similar pattern is already used by ucma_destroy_id(). Reported-and-tested-by: syzbot+da2591e115d57a9cbb8b@syzkaller.appspotmail.com Reported-by: syzbot+cfe3c1e8ef634ba8964b@syzkaller.appspotmail.com Cc: Jason Gunthorpe Cc: Doug Ledford Cc: Leon Romanovsky Signed-off-by: Cong Wang Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/ucma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 16423d7ab599a..17144a781aebc 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1742,6 +1742,8 @@ static int ucma_close(struct inode *inode, struct file *filp) mutex_lock(&mut); if (!ctx->closing) { mutex_unlock(&mut); + ucma_put_ctx(ctx); + wait_for_completion(&ctx->comp); /* rdma_destroy_id ensures that no event handlers are * inflight for that id before releasing it. */ -- GitLab From 4c925efc22301608ffef444dc45cc07adc931bd3 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 3 Sep 2018 23:06:23 +0200 Subject: [PATCH 0849/2269] ubifs: Check for name being NULL while mounting commit 37f31b6ca4311b94d985fb398a72e5399ad57925 upstream. The requested device name can be NULL or an empty string. Check for that and refuse to continue. UBIFS has to do this manually since we cannot use mount_bdev(), which checks for this condition. Fixes: 1e51764a3c2ac ("UBIFS: add new flash file system") Reported-by: syzbot+38bd0f7865e5c6379280@syzkaller.appspotmail.com Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/super.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index e1cd3dcf5a031..ad827cf642fe5 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1930,6 +1930,9 @@ static struct ubi_volume_desc *open_ubi(const char *name, int mode) int dev, vol; char *endptr; + if (!name || !*name) + return ERR_PTR(-EINVAL); + /* First, try to open using the device node path method */ ubi = ubi_open_volume_path(name, mode); if (!IS_ERR(ubi)) -- GitLab From 251bc1f44c33a8707d9bd0a1eef459224ab5da99 Mon Sep 17 00:00:00 2001 From: Ka-Cheong Poon Date: Mon, 30 Jul 2018 22:48:41 -0700 Subject: [PATCH 0850/2269] rds: rds_ib_recv_alloc_cache() should call alloc_percpu_gfp() instead MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f394ad28feffbeebab77c8bf9a203bd49b957c9a upstream. Currently, rds_ib_conn_alloc() calls rds_ib_recv_alloc_caches() without passing along the gfp_t flag. But rds_ib_recv_alloc_caches() and rds_ib_recv_alloc_cache() should take a gfp_t parameter so that rds_ib_recv_alloc_cache() can call alloc_percpu_gfp() using the correct flag instead of calling alloc_percpu(). Signed-off-by: Ka-Cheong Poon Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller Cc: Håkon Bugge Signed-off-by: Greg Kroah-Hartman --- net/rds/ib.h | 2 +- net/rds/ib_cm.c | 2 +- net/rds/ib_recv.c | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/net/rds/ib.h b/net/rds/ib.h index 86a8578d95b86..7db93f7f5c616 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -373,7 +373,7 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc); int rds_ib_recv_init(void); void rds_ib_recv_exit(void); int rds_ib_recv_path(struct rds_conn_path *conn); -int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic); +int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic, gfp_t gfp); void rds_ib_recv_free_caches(struct rds_ib_connection *ic); void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp); void rds_ib_inc_free(struct rds_incoming *inc); diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 6e721c449c4bf..e086395a23551 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -946,7 +946,7 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp) if (!ic) return -ENOMEM; - ret = rds_ib_recv_alloc_caches(ic); + ret = rds_ib_recv_alloc_caches(ic, gfp); if (ret) { kfree(ic); return ret; diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index b4e421aa97279..918d2e676b9b9 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -98,12 +98,12 @@ static void rds_ib_cache_xfer_to_ready(struct rds_ib_refill_cache *cache) } } -static int rds_ib_recv_alloc_cache(struct rds_ib_refill_cache *cache) +static int rds_ib_recv_alloc_cache(struct rds_ib_refill_cache *cache, gfp_t gfp) { struct rds_ib_cache_head *head; int cpu; - cache->percpu = alloc_percpu(struct rds_ib_cache_head); + cache->percpu = alloc_percpu_gfp(struct rds_ib_cache_head, gfp); if (!cache->percpu) return -ENOMEM; @@ -118,13 +118,13 @@ static int rds_ib_recv_alloc_cache(struct rds_ib_refill_cache *cache) return 0; } -int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic) +int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic, gfp_t gfp) { int ret; - ret = rds_ib_recv_alloc_cache(&ic->i_cache_incs); + ret = rds_ib_recv_alloc_cache(&ic->i_cache_incs, gfp); if (!ret) { - ret = rds_ib_recv_alloc_cache(&ic->i_cache_frags); + ret = rds_ib_recv_alloc_cache(&ic->i_cache_frags, gfp); if (ret) free_percpu(ic->i_cache_incs.percpu); } -- GitLab From 7f42eada5e3fd87ed15efba117e1c792a9fbdd0a Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 13 Oct 2017 16:11:48 +0300 Subject: [PATCH 0851/2269] virtio_balloon: fix deadlock on OOM commit c7cdff0e864713a089d7cb3a2b1136ba9a54881a upstream. fill_balloon doing memory allocations under balloon_lock can cause a deadlock when leak_balloon is called from virtballoon_oom_notify and tries to take same lock. To fix, split page allocation and enqueue and do allocations outside the lock. Here's a detailed analysis of the deadlock by Tetsuo Handa: In leak_balloon(), mutex_lock(&vb->balloon_lock) is called in order to serialize against fill_balloon(). But in fill_balloon(), alloc_page(GFP_HIGHUSER[_MOVABLE] | __GFP_NOMEMALLOC | __GFP_NORETRY) is called with vb->balloon_lock mutex held. Since GFP_HIGHUSER[_MOVABLE] implies __GFP_DIRECT_RECLAIM | __GFP_IO | __GFP_FS, despite __GFP_NORETRY is specified, this allocation attempt might indirectly depend on somebody else's __GFP_DIRECT_RECLAIM memory allocation. And such indirect __GFP_DIRECT_RECLAIM memory allocation might call leak_balloon() via virtballoon_oom_notify() via blocking_notifier_call_chain() callback via out_of_memory() when it reached __alloc_pages_may_oom() and held oom_lock mutex. Since vb->balloon_lock mutex is already held by fill_balloon(), it will cause OOM lockup. Thread1 Thread2 fill_balloon() takes a balloon_lock balloon_page_enqueue() alloc_page(GFP_HIGHUSER_MOVABLE) direct reclaim (__GFP_FS context) takes a fs lock waits for that fs lock alloc_page(GFP_NOFS) __alloc_pages_may_oom() takes the oom_lock out_of_memory() blocking_notifier_call_chain() leak_balloon() tries to take that balloon_lock and deadlocks Reported-by: Tetsuo Handa Cc: Michal Hocko Cc: Wei Wang Signed-off-by: Michael S. Tsirkin Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio_balloon.c | 24 +++++++++++++++----- include/linux/balloon_compaction.h | 35 +++++++++++++++++++++++++++++- mm/balloon_compaction.c | 28 ++++++++++++++++++------ 3 files changed, 74 insertions(+), 13 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 36c9fbf70d44b..43fcd17a738ec 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -143,16 +143,17 @@ static void set_page_pfns(struct virtio_balloon *vb, static unsigned fill_balloon(struct virtio_balloon *vb, size_t num) { - struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info; unsigned num_allocated_pages; + unsigned num_pfns; + struct page *page; + LIST_HEAD(pages); /* We can only do one array worth at a time. */ num = min(num, ARRAY_SIZE(vb->pfns)); - mutex_lock(&vb->balloon_lock); - for (vb->num_pfns = 0; vb->num_pfns < num; - vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) { - struct page *page = balloon_page_enqueue(vb_dev_info); + for (num_pfns = 0; num_pfns < num; + num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) { + struct page *page = balloon_page_alloc(); if (!page) { dev_info_ratelimited(&vb->vdev->dev, @@ -162,6 +163,19 @@ static unsigned fill_balloon(struct virtio_balloon *vb, size_t num) msleep(200); break; } + + balloon_page_push(&pages, page); + } + + mutex_lock(&vb->balloon_lock); + + vb->num_pfns = 0; + + while ((page = balloon_page_pop(&pages))) { + balloon_page_enqueue(&vb->vb_dev_info, page); + + vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE; + set_page_pfns(vb, vb->pfns + vb->num_pfns, page); vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE; if (!virtio_has_feature(vb->vdev, diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h index fbbe6da40feda..53051f3d8f256 100644 --- a/include/linux/balloon_compaction.h +++ b/include/linux/balloon_compaction.h @@ -50,6 +50,7 @@ #include #include #include +#include /* * Balloon device information descriptor. @@ -67,7 +68,9 @@ struct balloon_dev_info { struct inode *inode; }; -extern struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info); +extern struct page *balloon_page_alloc(void); +extern void balloon_page_enqueue(struct balloon_dev_info *b_dev_info, + struct page *page); extern struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info); static inline void balloon_devinfo_init(struct balloon_dev_info *balloon) @@ -193,4 +196,34 @@ static inline gfp_t balloon_mapping_gfp_mask(void) } #endif /* CONFIG_BALLOON_COMPACTION */ + +/* + * balloon_page_push - insert a page into a page list. + * @head : pointer to list + * @page : page to be added + * + * Caller must ensure the page is private and protect the list. + */ +static inline void balloon_page_push(struct list_head *pages, struct page *page) +{ + list_add(&page->lru, pages); +} + +/* + * balloon_page_pop - remove a page from a page list. + * @head : pointer to list + * @page : page to be added + * + * Caller must ensure the page is private and protect the list. + */ +static inline struct page *balloon_page_pop(struct list_head *pages) +{ + struct page *page = list_first_entry_or_null(pages, struct page, lru); + + if (!page) + return NULL; + + list_del(&page->lru); + return page; +} #endif /* _LINUX_BALLOON_COMPACTION_H */ diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index 68d28924ba79d..ef858d547e2d7 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -10,23 +10,38 @@ #include #include +/* + * balloon_page_alloc - allocates a new page for insertion into the balloon + * page list. + * + * Driver must call it to properly allocate a new enlisted balloon page. + * Driver must call balloon_page_enqueue before definitively removing it from + * the guest system. This function returns the page address for the recently + * allocated page or NULL in the case we fail to allocate a new page this turn. + */ +struct page *balloon_page_alloc(void) +{ + struct page *page = alloc_page(balloon_mapping_gfp_mask() | + __GFP_NOMEMALLOC | __GFP_NORETRY); + return page; +} +EXPORT_SYMBOL_GPL(balloon_page_alloc); + /* * balloon_page_enqueue - allocates a new page and inserts it into the balloon * page list. * @b_dev_info: balloon device descriptor where we will insert a new page to + * @page: new page to enqueue - allocated using balloon_page_alloc. * - * Driver must call it to properly allocate a new enlisted balloon page + * Driver must call it to properly enqueue a new allocated balloon page * before definitively removing it from the guest system. * This function returns the page address for the recently enqueued page or * NULL in the case we fail to allocate a new page this turn. */ -struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info) +void balloon_page_enqueue(struct balloon_dev_info *b_dev_info, + struct page *page) { unsigned long flags; - struct page *page = alloc_page(balloon_mapping_gfp_mask() | - __GFP_NOMEMALLOC | __GFP_NORETRY); - if (!page) - return NULL; /* * Block others from accessing the 'page' when we get around to @@ -39,7 +54,6 @@ struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info) __count_vm_event(BALLOON_INFLATE); spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); unlock_page(page); - return page; } EXPORT_SYMBOL_GPL(balloon_page_enqueue); -- GitLab From 711b942ae3beb9c39912271eb12e685ac5d1b7aa Mon Sep 17 00:00:00 2001 From: Jan Stancek Date: Fri, 1 Dec 2017 10:50:28 +0100 Subject: [PATCH 0852/2269] virtio_balloon: fix increment of vb->num_pfns in fill_balloon() commit d9e427f6ab8142d6868eb719e6a7851aafea56b6 upstream. commit c7cdff0e8647 ("virtio_balloon: fix deadlock on OOM") changed code to increment vb->num_pfns before call to set_page_pfns(), which used to happen only after. This patch fixes boot hang for me on ppc64le KVM guests. Fixes: c7cdff0e8647 ("virtio_balloon: fix deadlock on OOM") Cc: Michael S. Tsirkin Cc: Tetsuo Handa Cc: Michal Hocko Cc: Wei Wang Cc: stable@vger.kernel.org Signed-off-by: Jan Stancek Signed-off-by: Michael S. Tsirkin Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio_balloon.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 43fcd17a738ec..d9873aa014a6a 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -174,13 +174,12 @@ static unsigned fill_balloon(struct virtio_balloon *vb, size_t num) while ((page = balloon_page_pop(&pages))) { balloon_page_enqueue(&vb->vb_dev_info, page); - vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE; - set_page_pfns(vb, vb->pfns + vb->num_pfns, page); vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE; if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) adjust_managed_page_count(page, -1); + vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE; } num_allocated_pages = vb->num_pfns; -- GitLab From c03f0ab15f3bbe5806e30616a16e016ca624468d Mon Sep 17 00:00:00 2001 From: Zhi Chen Date: Mon, 18 Jun 2018 17:00:39 +0300 Subject: [PATCH 0853/2269] ath10k: fix scan crash due to incorrect length calculation commit c8291988806407e02a01b4b15b4504eafbcc04e0 upstream. Length of WMI scan message was not calculated correctly. The allocated buffer was smaller than what we expected. So WMI message corrupted skb_info, which is at the end of skb->data. This fix takes TLV header into account even if the element is zero-length. Crash log: [49.629986] Unhandled kernel unaligned access[#1]: [49.634932] CPU: 0 PID: 1176 Comm: logd Not tainted 4.4.60 #180 [49.641040] task: 83051460 ti: 8329c000 task.ti: 8329c000 [49.646608] $ 0 : 00000000 00000001 80984a80 00000000 [49.652038] $ 4 : 45259e89 8046d484 8046df30 8024ba70 [49.657468] $ 8 : 00000000 804cc4c0 00000001 20306320 [49.662898] $12 : 33322037 000110f2 00000000 31203930 [49.668327] $16 : 82792b40 80984a80 00000001 804207fc [49.673757] $20 : 00000000 0000012c 00000040 80470000 [49.679186] $24 : 00000000 8024af7c [49.684617] $28 : 8329c000 8329db88 00000001 802c58d0 [49.690046] Hi : 00000000 [49.693022] Lo : 453c0000 [49.696013] epc : 800efae4 put_page+0x0/0x58 [49.700615] ra : 802c58d0 skb_release_data+0x148/0x1d4 [49.706184] Status: 1000fc03 KERNEL EXL IE [49.710531] Cause : 00800010 (ExcCode 04) [49.714669] BadVA : 45259e89 [49.717644] PrId : 00019374 (MIPS 24Kc) Signed-off-by: Zhi Chen Signed-off-by: Kalle Valo Cc: Brian Norris Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/wmi-tlv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c index baec856af90ff..b54001e97ced0 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c +++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c @@ -1486,10 +1486,10 @@ ath10k_wmi_tlv_op_gen_start_scan(struct ath10k *ar, bssid_len = arg->n_bssids * sizeof(struct wmi_mac_addr); ie_len = roundup(arg->ie_len, 4); len = (sizeof(*tlv) + sizeof(*cmd)) + - (arg->n_channels ? sizeof(*tlv) + chan_len : 0) + - (arg->n_ssids ? sizeof(*tlv) + ssid_len : 0) + - (arg->n_bssids ? sizeof(*tlv) + bssid_len : 0) + - (arg->ie_len ? sizeof(*tlv) + ie_len : 0); + sizeof(*tlv) + chan_len + + sizeof(*tlv) + ssid_len + + sizeof(*tlv) + bssid_len + + sizeof(*tlv) + ie_len; skb = ath10k_wmi_alloc_skb(ar, len); if (!skb) -- GitLab From 0b46ce3e3423aee80d28d296e1806176cdcec7ad Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 13 Oct 2018 09:27:30 +0200 Subject: [PATCH 0854/2269] Linux 4.14.76 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7fc373c011c0f..332dd011b3b93 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 75 +SUBLEVEL = 76 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 6d46bcc5a74730c0a90682289d56f3fc60e8a798 Mon Sep 17 00:00:00 2001 From: Steve Muckle Date: Tue, 16 Oct 2018 11:23:05 -0700 Subject: [PATCH 0855/2269] ANDROID: x86_64_cuttlefish_defconfig: disable CONFIG_MEMORY_STATE_TIME Bug: 117847156 Change-Id: Idfbac9c1f0dc2617642c30ddb65400083da44b49 Signed-off-by: Steve Muckle --- arch/x86/configs/x86_64_cuttlefish_defconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index db63c91b57b75..6425593f7aec9 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -205,7 +205,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 CONFIG_VIRTIO_BLK=y CONFIG_UID_SYS_STATS=y -CONFIG_MEMORY_STATE_TIME=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y -- GitLab From 67d1ee6c7b76f1b1de9c6e217df8ee5894902aaf Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 26 Sep 2018 00:41:04 -0400 Subject: [PATCH 0856/2269] bnxt_en: Fix TX timeout during netpoll. [ Upstream commit 73f21c653f930f438d53eed29b5e4c65c8a0f906 ] The current netpoll implementation in the bnxt_en driver has problems that may miss TX completion events. bnxt_poll_work() in effect is only handling at most 1 TX packet before exiting. In addition, there may be in flight TX completions that ->poll() may miss even after we fix bnxt_poll_work() to handle all visible TX completions. netpoll may not call ->poll() again and HW may not generate IRQ because the driver does not ARM the IRQ when the budget (0 for netpoll) is reached. We fix it by handling all TX completions and to always ARM the IRQ when we exit ->poll() with 0 budget. Also, the logic to ACK the completion ring in case it is almost filled with TX completions need to be adjusted to take care of the 0 budget case, as discussed with Eric Dumazet Reported-by: Song Liu Reviewed-by: Song Liu Tested-by: Song Liu Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 937db80192892..a741c0648b258 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1864,8 +1864,11 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) if (TX_CMP_TYPE(txcmp) == CMP_TYPE_TX_L2_CMP) { tx_pkts++; /* return full budget so NAPI will complete. */ - if (unlikely(tx_pkts > bp->tx_wake_thresh)) + if (unlikely(tx_pkts > bp->tx_wake_thresh)) { rx_pkts = budget; + raw_cons = NEXT_RAW_CMP(raw_cons); + break; + } } else if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) { if (likely(budget)) rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event); @@ -1893,7 +1896,7 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) } raw_cons = NEXT_RAW_CMP(raw_cons); - if (rx_pkts == budget) + if (rx_pkts && rx_pkts == budget) break; } @@ -2007,8 +2010,12 @@ static int bnxt_poll(struct napi_struct *napi, int budget) while (1) { work_done += bnxt_poll_work(bp, bnapi, budget - work_done); - if (work_done >= budget) + if (work_done >= budget) { + if (!budget) + BNXT_CP_DB_REARM(cpr->cp_doorbell, + cpr->cp_raw_cons); break; + } if (!bnxt_has_work(bp, cpr)) { if (napi_complete_done(napi, work_done)) -- GitLab From e73b51a995ac55aa207b693dc6830784efa6c41c Mon Sep 17 00:00:00 2001 From: Venkat Duvvuru Date: Fri, 5 Oct 2018 00:26:02 -0400 Subject: [PATCH 0857/2269] bnxt_en: free hwrm resources, if driver probe fails. [ Upstream commit a2bf74f4e1b82395dad2b08d2a911d9151db71c1 ] When the driver probe fails, all the resources that were allocated prior to the failure must be freed. However, hwrm dma response memory is not getting freed. This patch fixes the problem described above. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Signed-off-by: Venkat Duvvuru Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index a741c0648b258..da6c73868fa05 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2964,10 +2964,11 @@ static void bnxt_free_hwrm_resources(struct bnxt *bp) { struct pci_dev *pdev = bp->pdev; - dma_free_coherent(&pdev->dev, PAGE_SIZE, bp->hwrm_cmd_resp_addr, - bp->hwrm_cmd_resp_dma_addr); - - bp->hwrm_cmd_resp_addr = NULL; + if (bp->hwrm_cmd_resp_addr) { + dma_free_coherent(&pdev->dev, PAGE_SIZE, bp->hwrm_cmd_resp_addr, + bp->hwrm_cmd_resp_dma_addr); + bp->hwrm_cmd_resp_addr = NULL; + } if (bp->hwrm_dbg_resp_addr) { dma_free_coherent(&pdev->dev, HWRM_DBG_REG_BUF_SIZE, bp->hwrm_dbg_resp_addr, @@ -8217,6 +8218,7 @@ init_err_cleanup_tc: bnxt_clear_int_mode(bp); init_err_pci_clean: + bnxt_free_hwrm_resources(bp); bnxt_cleanup_pci(bp); init_err_free: -- GitLab From 94402f23659fa9005776a1500f61953c825c3420 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Mon, 24 Sep 2018 14:40:11 -0700 Subject: [PATCH 0858/2269] bonding: avoid possible dead-lock [ Upstream commit d4859d749aa7090ffb743d15648adb962a1baeae ] Syzkaller reported this on a slightly older kernel but it's still applicable to the current kernel - ====================================================== WARNING: possible circular locking dependency detected 4.18.0-next-20180823+ #46 Not tainted ------------------------------------------------------ syz-executor4/26841 is trying to acquire lock: 00000000dd41ef48 ((wq_completion)bond_dev->name){+.+.}, at: flush_workqueue+0x2db/0x1e10 kernel/workqueue.c:2652 but task is already holding lock: 00000000768ab431 (rtnl_mutex){+.+.}, at: rtnl_lock net/core/rtnetlink.c:77 [inline] 00000000768ab431 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x412/0xc30 net/core/rtnetlink.c:4708 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (rtnl_mutex){+.+.}: __mutex_lock_common kernel/locking/mutex.c:925 [inline] __mutex_lock+0x171/0x1700 kernel/locking/mutex.c:1073 mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:1088 rtnl_lock+0x17/0x20 net/core/rtnetlink.c:77 bond_netdev_notify drivers/net/bonding/bond_main.c:1310 [inline] bond_netdev_notify_work+0x44/0xd0 drivers/net/bonding/bond_main.c:1320 process_one_work+0xc73/0x1aa0 kernel/workqueue.c:2153 worker_thread+0x189/0x13c0 kernel/workqueue.c:2296 kthread+0x35a/0x420 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:415 -> #1 ((work_completion)(&(&nnw->work)->work)){+.+.}: process_one_work+0xc0b/0x1aa0 kernel/workqueue.c:2129 worker_thread+0x189/0x13c0 kernel/workqueue.c:2296 kthread+0x35a/0x420 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:415 -> #0 ((wq_completion)bond_dev->name){+.+.}: lock_acquire+0x1e4/0x4f0 kernel/locking/lockdep.c:3901 flush_workqueue+0x30a/0x1e10 kernel/workqueue.c:2655 drain_workqueue+0x2a9/0x640 kernel/workqueue.c:2820 destroy_workqueue+0xc6/0x9d0 kernel/workqueue.c:4155 __alloc_workqueue_key+0xef9/0x1190 kernel/workqueue.c:4138 bond_init+0x269/0x940 drivers/net/bonding/bond_main.c:4734 register_netdevice+0x337/0x1100 net/core/dev.c:8410 bond_newlink+0x49/0xa0 drivers/net/bonding/bond_netlink.c:453 rtnl_newlink+0xef4/0x1d50 net/core/rtnetlink.c:3099 rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4711 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2454 rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4729 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343 netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:632 ___sys_sendmsg+0x7fd/0x930 net/socket.c:2115 __sys_sendmsg+0x11d/0x290 net/socket.c:2153 __do_sys_sendmsg net/socket.c:2162 [inline] __se_sys_sendmsg net/socket.c:2160 [inline] __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2160 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe other info that might help us debug this: Chain exists of: (wq_completion)bond_dev->name --> (work_completion)(&(&nnw->work)->work) --> rtnl_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(rtnl_mutex); lock((work_completion)(&(&nnw->work)->work)); lock(rtnl_mutex); lock((wq_completion)bond_dev->name); *** DEADLOCK *** 1 lock held by syz-executor4/26841: stack backtrace: CPU: 1 PID: 26841 Comm: syz-executor4 Not tainted 4.18.0-next-20180823+ #46 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113 print_circular_bug.isra.34.cold.55+0x1bd/0x27d kernel/locking/lockdep.c:1222 check_prev_add kernel/locking/lockdep.c:1862 [inline] check_prevs_add kernel/locking/lockdep.c:1975 [inline] validate_chain kernel/locking/lockdep.c:2416 [inline] __lock_acquire+0x3449/0x5020 kernel/locking/lockdep.c:3412 lock_acquire+0x1e4/0x4f0 kernel/locking/lockdep.c:3901 flush_workqueue+0x30a/0x1e10 kernel/workqueue.c:2655 drain_workqueue+0x2a9/0x640 kernel/workqueue.c:2820 destroy_workqueue+0xc6/0x9d0 kernel/workqueue.c:4155 __alloc_workqueue_key+0xef9/0x1190 kernel/workqueue.c:4138 bond_init+0x269/0x940 drivers/net/bonding/bond_main.c:4734 register_netdevice+0x337/0x1100 net/core/dev.c:8410 bond_newlink+0x49/0xa0 drivers/net/bonding/bond_netlink.c:453 rtnl_newlink+0xef4/0x1d50 net/core/rtnetlink.c:3099 rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4711 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2454 rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4729 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343 netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:632 ___sys_sendmsg+0x7fd/0x930 net/socket.c:2115 __sys_sendmsg+0x11d/0x290 net/socket.c:2153 __do_sys_sendmsg net/socket.c:2162 [inline] __se_sys_sendmsg net/socket.c:2160 [inline] __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2160 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457089 Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f2df20a5c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f2df20a66d4 RCX: 0000000000457089 RDX: 0000000000000000 RSI: 0000000020000180 RDI: 0000000000000003 RBP: 0000000000930140 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 00000000004d40b8 R14: 00000000004c8ad8 R15: 0000000000000001 Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_main.c | 43 +++++++++++++-------------------- include/net/bonding.h | 7 +----- 2 files changed, 18 insertions(+), 32 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 15aedb64a02be..23c0b515d2627 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -210,6 +210,7 @@ static void bond_get_stats(struct net_device *bond_dev, static void bond_slave_arr_handler(struct work_struct *work); static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act, int mod); +static void bond_netdev_notify_work(struct work_struct *work); /*---------------------------- General routines -----------------------------*/ @@ -1254,6 +1255,8 @@ static struct slave *bond_alloc_slave(struct bonding *bond) return NULL; } } + INIT_DELAYED_WORK(&slave->notify_work, bond_netdev_notify_work); + return slave; } @@ -1261,6 +1264,7 @@ static void bond_free_slave(struct slave *slave) { struct bonding *bond = bond_get_bond_by_slave(slave); + cancel_delayed_work_sync(&slave->notify_work); if (BOND_MODE(bond) == BOND_MODE_8023AD) kfree(SLAVE_AD_INFO(slave)); @@ -1282,39 +1286,26 @@ static void bond_fill_ifslave(struct slave *slave, struct ifslave *info) info->link_failure_count = slave->link_failure_count; } -static void bond_netdev_notify(struct net_device *dev, - struct netdev_bonding_info *info) -{ - rtnl_lock(); - netdev_bonding_info_change(dev, info); - rtnl_unlock(); -} - static void bond_netdev_notify_work(struct work_struct *_work) { - struct netdev_notify_work *w = - container_of(_work, struct netdev_notify_work, work.work); + struct slave *slave = container_of(_work, struct slave, + notify_work.work); + + if (rtnl_trylock()) { + struct netdev_bonding_info binfo; - bond_netdev_notify(w->dev, &w->bonding_info); - dev_put(w->dev); - kfree(w); + bond_fill_ifslave(slave, &binfo.slave); + bond_fill_ifbond(slave->bond, &binfo.master); + netdev_bonding_info_change(slave->dev, &binfo); + rtnl_unlock(); + } else { + queue_delayed_work(slave->bond->wq, &slave->notify_work, 1); + } } void bond_queue_slave_event(struct slave *slave) { - struct bonding *bond = slave->bond; - struct netdev_notify_work *nnw = kzalloc(sizeof(*nnw), GFP_ATOMIC); - - if (!nnw) - return; - - dev_hold(slave->dev); - nnw->dev = slave->dev; - bond_fill_ifslave(slave, &nnw->bonding_info.slave); - bond_fill_ifbond(bond, &nnw->bonding_info.master); - INIT_DELAYED_WORK(&nnw->work, bond_netdev_notify_work); - - queue_delayed_work(slave->bond->wq, &nnw->work, 0); + queue_delayed_work(slave->bond->wq, &slave->notify_work, 0); } void bond_lower_state_changed(struct slave *slave) diff --git a/include/net/bonding.h b/include/net/bonding.h index 73799da57400f..04008209506a1 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -139,12 +139,6 @@ struct bond_parm_tbl { int mode; }; -struct netdev_notify_work { - struct delayed_work work; - struct net_device *dev; - struct netdev_bonding_info bonding_info; -}; - struct slave { struct net_device *dev; /* first - useful for panic debug */ struct bonding *bond; /* our master */ @@ -172,6 +166,7 @@ struct slave { #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *np; #endif + struct delayed_work notify_work; struct kobject kobj; struct rtnl_link_stats64 slave_stats; }; -- GitLab From 18bf9a724362a3feff649f529178f49b11e877e4 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 19 Sep 2018 15:02:07 +0200 Subject: [PATCH 0859/2269] ip6_tunnel: be careful when accessing the inner header [ Upstream commit 76c0ddd8c3a683f6e2c6e60e11dc1a1558caf4bc ] the ip6 tunnel xmit ndo assumes that the processed skb always contains an ip[v6] header, but syzbot has found a way to send frames that fall short of this assumption, leading to the following splat: BUG: KMSAN: uninit-value in ip6ip6_tnl_xmit net/ipv6/ip6_tunnel.c:1307 [inline] BUG: KMSAN: uninit-value in ip6_tnl_start_xmit+0x7d2/0x1ef0 net/ipv6/ip6_tunnel.c:1390 CPU: 0 PID: 4504 Comm: syz-executor558 Not tainted 4.16.0+ #87 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:53 kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067 __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:683 ip6ip6_tnl_xmit net/ipv6/ip6_tunnel.c:1307 [inline] ip6_tnl_start_xmit+0x7d2/0x1ef0 net/ipv6/ip6_tunnel.c:1390 __netdev_start_xmit include/linux/netdevice.h:4066 [inline] netdev_start_xmit include/linux/netdevice.h:4075 [inline] xmit_one net/core/dev.c:3026 [inline] dev_hard_start_xmit+0x5f1/0xc70 net/core/dev.c:3042 __dev_queue_xmit+0x27ee/0x3520 net/core/dev.c:3557 dev_queue_xmit+0x4b/0x60 net/core/dev.c:3590 packet_snd net/packet/af_packet.c:2944 [inline] packet_sendmsg+0x7c70/0x8a30 net/packet/af_packet.c:2969 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg net/socket.c:640 [inline] ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046 __sys_sendmmsg+0x42d/0x800 net/socket.c:2136 SYSC_sendmmsg+0xc4/0x110 net/socket.c:2167 SyS_sendmmsg+0x63/0x90 net/socket.c:2162 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x441819 RSP: 002b:00007ffe58ee8268 EFLAGS: 00000213 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000441819 RDX: 0000000000000002 RSI: 0000000020000100 RDI: 0000000000000003 RBP: 00000000006cd018 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000213 R12: 0000000000402510 R13: 00000000004025a0 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline] kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188 kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314 kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321 slab_post_alloc_hook mm/slab.h:445 [inline] slab_alloc_node mm/slub.c:2737 [inline] __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369 __kmalloc_reserve net/core/skbuff.c:138 [inline] __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206 alloc_skb include/linux/skbuff.h:984 [inline] alloc_skb_with_frags+0x1d4/0xb20 net/core/skbuff.c:5234 sock_alloc_send_pskb+0xb56/0x1190 net/core/sock.c:2085 packet_alloc_skb net/packet/af_packet.c:2803 [inline] packet_snd net/packet/af_packet.c:2894 [inline] packet_sendmsg+0x6454/0x8a30 net/packet/af_packet.c:2969 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg net/socket.c:640 [inline] ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046 __sys_sendmmsg+0x42d/0x800 net/socket.c:2136 SYSC_sendmmsg+0xc4/0x110 net/socket.c:2167 SyS_sendmmsg+0x63/0x90 net/socket.c:2162 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 This change addresses the issue adding the needed check before accessing the inner header. The ipv4 side of the issue is apparently there since the ipv4 over ipv6 initial support, and the ipv6 side predates git history. Fixes: c4d3efafcc93 ("[IPV6] IP6TUNNEL: Add support to IPv4 over IPv6 tunnel.") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+3fde91d4d394747d6db4@syzkaller.appspotmail.com Tested-by: Alexander Potapenko Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_tunnel.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index ee8dbd228fe2b..0e9296f44ee47 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1227,7 +1227,7 @@ static inline int ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - const struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph; int encap_limit = -1; struct flowi6 fl6; __u8 dsfield; @@ -1235,6 +1235,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) u8 tproto; int err; + /* ensure we can access the full inner ip header */ + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + return -1; + + iph = ip_hdr(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); tproto = ACCESS_ONCE(t->parms.proto); @@ -1298,7 +1303,7 @@ static inline int ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct ipv6hdr *ipv6h; int encap_limit = -1; __u16 offset; struct flowi6 fl6; @@ -1307,6 +1312,10 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) u8 tproto; int err; + if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) + return -1; + + ipv6h = ipv6_hdr(skb); tproto = ACCESS_ONCE(t->parms.proto); if ((tproto != IPPROTO_IPV6 && tproto != 0) || ip6_tnl_addr_conflict(t, ipv6h)) -- GitLab From deb33b68f40e788c974acf61bc220dc077cb8032 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 24 Sep 2018 15:48:19 +0200 Subject: [PATCH 0860/2269] ip_tunnel: be careful when accessing the inner header [ Upstream commit ccfec9e5cb2d48df5a955b7bf47f7782157d3bc2] Cong noted that we need the same checks introduced by commit 76c0ddd8c3a6 ("ip6_tunnel: be careful when accessing the inner header") even for ipv4 tunnels. Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") Suggested-by: Cong Wang Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_tunnel.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 4784f3f36b7e0..72eee34092ae4 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -635,6 +635,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, u8 protocol) { struct ip_tunnel *tunnel = netdev_priv(dev); + unsigned int inner_nhdr_len = 0; const struct iphdr *inner_iph; struct flowi4 fl4; u8 tos, ttl; @@ -644,6 +645,14 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, __be32 dst; bool connected; + /* ensure we can access the inner net header, for several users below */ + if (skb->protocol == htons(ETH_P_IP)) + inner_nhdr_len = sizeof(struct iphdr); + else if (skb->protocol == htons(ETH_P_IPV6)) + inner_nhdr_len = sizeof(struct ipv6hdr); + if (unlikely(!pskb_may_pull(skb, inner_nhdr_len))) + goto tx_error; + inner_iph = (const struct iphdr *)skb_inner_network_header(skb); connected = (tunnel->parms.iph.daddr != 0); -- GitLab From 32b193216e185a3ba817a179f29a53a9973665a9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 30 Sep 2018 11:33:39 -0700 Subject: [PATCH 0861/2269] ipv4: fix use-after-free in ip_cmsg_recv_dstaddr() [ Upstream commit 64199fc0a46ba211362472f7f942f900af9492fd ] Caching ip_hdr(skb) before a call to pskb_may_pull() is buggy, do not do it. Fixes: 2efd4fca703a ("ip: in cmsg IP(V6)_ORIGDSTADDR call pskb_may_pull") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Reported-by: syzbot Acked-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_sockglue.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 048d5f6dd320e..4ef92ebc4f6d9 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -147,7 +147,6 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) { struct sockaddr_in sin; - const struct iphdr *iph = ip_hdr(skb); __be16 *ports; int end; @@ -162,7 +161,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) ports = (__be16 *)skb_transport_header(skb); sin.sin_family = AF_INET; - sin.sin_addr.s_addr = iph->daddr; + sin.sin_addr.s_addr = ip_hdr(skb)->daddr; sin.sin_port = ports[1]; memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); -- GitLab From 48c14f2ea5c58268f4ea59da6467c74cdec9e6f2 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 4 Oct 2018 10:12:37 -0700 Subject: [PATCH 0862/2269] ipv6: take rcu lock in rawv6_send_hdrinc() [ Upstream commit a688caa34beb2fd2a92f1b6d33e40cde433ba160 ] In rawv6_send_hdrinc(), in order to avoid an extra dst_hold(), we directly assign the dst to skb and set passed in dst to NULL to avoid double free. However, in error case, we free skb and then do stats update with the dst pointer passed in. This causes use-after-free on the dst. Fix it by taking rcu read lock right before dst could get released to make sure dst does not get freed until the stats update is done. Note: we don't have this issue in ipv4 cause dst is not used for stats update in v4. Syzkaller reported following crash: BUG: KASAN: use-after-free in rawv6_send_hdrinc net/ipv6/raw.c:692 [inline] BUG: KASAN: use-after-free in rawv6_sendmsg+0x4421/0x4630 net/ipv6/raw.c:921 Read of size 8 at addr ffff8801d95ba730 by task syz-executor0/32088 CPU: 1 PID: 32088 Comm: syz-executor0 Not tainted 4.19.0-rc2+ #93 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c4/0x2b4 lib/dump_stack.c:113 print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433 rawv6_send_hdrinc net/ipv6/raw.c:692 [inline] rawv6_sendmsg+0x4421/0x4630 net/ipv6/raw.c:921 inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:631 ___sys_sendmsg+0x7fd/0x930 net/socket.c:2114 __sys_sendmsg+0x11d/0x280 net/socket.c:2152 __do_sys_sendmsg net/socket.c:2161 [inline] __se_sys_sendmsg net/socket.c:2159 [inline] __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2159 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457099 Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f83756edc78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f83756ee6d4 RCX: 0000000000457099 RDX: 0000000000000000 RSI: 0000000020003840 RDI: 0000000000000004 RBP: 00000000009300a0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 00000000004d4b30 R14: 00000000004c90b1 R15: 0000000000000000 Allocated by task 32088: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:490 kmem_cache_alloc+0x12e/0x730 mm/slab.c:3554 dst_alloc+0xbb/0x1d0 net/core/dst.c:105 ip6_dst_alloc+0x35/0xa0 net/ipv6/route.c:353 ip6_rt_cache_alloc+0x247/0x7b0 net/ipv6/route.c:1186 ip6_pol_route+0x8f8/0xd90 net/ipv6/route.c:1895 ip6_pol_route_output+0x54/0x70 net/ipv6/route.c:2093 fib6_rule_lookup+0x277/0x860 net/ipv6/fib6_rules.c:122 ip6_route_output_flags+0x2c5/0x350 net/ipv6/route.c:2121 ip6_route_output include/net/ip6_route.h:88 [inline] ip6_dst_lookup_tail+0xe27/0x1d60 net/ipv6/ip6_output.c:951 ip6_dst_lookup_flow+0xc8/0x270 net/ipv6/ip6_output.c:1079 rawv6_sendmsg+0x12d9/0x4630 net/ipv6/raw.c:905 inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:631 ___sys_sendmsg+0x7fd/0x930 net/socket.c:2114 __sys_sendmsg+0x11d/0x280 net/socket.c:2152 __do_sys_sendmsg net/socket.c:2161 [inline] __se_sys_sendmsg net/socket.c:2159 [inline] __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2159 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 5356: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 __cache_free mm/slab.c:3498 [inline] kmem_cache_free+0x83/0x290 mm/slab.c:3756 dst_destroy+0x267/0x3c0 net/core/dst.c:141 dst_destroy_rcu+0x16/0x19 net/core/dst.c:154 __rcu_reclaim kernel/rcu/rcu.h:236 [inline] rcu_do_batch kernel/rcu/tree.c:2576 [inline] invoke_rcu_callbacks kernel/rcu/tree.c:2880 [inline] __rcu_process_callbacks kernel/rcu/tree.c:2847 [inline] rcu_process_callbacks+0xf23/0x2670 kernel/rcu/tree.c:2864 __do_softirq+0x30b/0xad8 kernel/softirq.c:292 Fixes: 1789a640f556 ("raw: avoid two atomics in xmit") Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/raw.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index e4462b0ff801b..f08cc6527339c 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -650,8 +650,6 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->protocol = htons(ETH_P_IPV6); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb_dst_set(skb, &rt->dst); - *dstp = NULL; skb_put(skb, length); skb_reset_network_header(skb); @@ -664,8 +662,14 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->transport_header = skb->network_header; err = memcpy_from_msg(iph, msg, length); - if (err) - goto error_fault; + if (err) { + err = -EFAULT; + kfree_skb(skb); + goto error; + } + + skb_dst_set(skb, &rt->dst); + *dstp = NULL; /* if egress device is enslaved to an L3 master device pass the * skb to its handler for processing @@ -674,21 +678,28 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, if (unlikely(!skb)) return 0; + /* Acquire rcu_read_lock() in case we need to use rt->rt6i_idev + * in the error path. Since skb has been freed, the dst could + * have been queued for deletion. + */ + rcu_read_lock(); IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, rt->dst.dev, dst_output); if (err > 0) err = net_xmit_errno(err); - if (err) - goto error; + if (err) { + IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); + rcu_read_unlock(); + goto error_check; + } + rcu_read_unlock(); out: return 0; -error_fault: - err = -EFAULT; - kfree_skb(skb); error: IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); +error_check: if (err == -ENOBUFS && !np->recverr) err = 0; return err; -- GitLab From 8193b775247a2c38294fd9d1fee5084b5c1b3de8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 9 Oct 2018 16:48:58 -0700 Subject: [PATCH 0863/2269] net: dsa: bcm_sf2: Call setup during switch resume [ Upstream commit 54baca096386d862d19c10f58f34bf787c6b3cbe ] There is no reason to open code what the switch setup function does, in fact, because we just issued a switch reset, we would make all the register get their default values, including for instance, having unused port be enabled again and wasting power and leading to an inappropriate switch core clock being selected. Fixes: 8cfa94984c9c ("net: dsa: bcm_sf2: add suspend/resume callbacks") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/bcm_sf2.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 72d6ffbfd6387..48de089dd9c88 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -772,7 +772,6 @@ static int bcm_sf2_sw_suspend(struct dsa_switch *ds) static int bcm_sf2_sw_resume(struct dsa_switch *ds) { struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - unsigned int port; int ret; ret = bcm_sf2_sw_rst(priv); @@ -784,12 +783,7 @@ static int bcm_sf2_sw_resume(struct dsa_switch *ds) if (priv->hw_params.num_gphy == 1) bcm_sf2_gphy_enable_set(ds, true); - for (port = 0; port < DSA_MAX_PORTS; port++) { - if ((1 << port) & ds->enabled_port_mask) - bcm_sf2_port_setup(ds, port, NULL); - else if (dsa_is_cpu_port(ds, port)) - bcm_sf2_imp_setup(ds, port); - } + ds->ops->setup(ds); return 0; } -- GitLab From 6c61dae979ae39850a3a9945f93e83e5b5baf31c Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Tue, 25 Sep 2018 10:21:55 +0100 Subject: [PATCH 0864/2269] net: hns: fix for unmapping problem when SMMU is on [ Upstream commit 2e9361efa707e186d91b938e44f9e326725259f7 ] If SMMU is on, there is more likely that skb_shinfo(skb)->frags[i] can not send by a single BD. when this happen, the hns_nic_net_xmit_hw function map the whole data in a frags using skb_frag_dma_map, but unmap each BD' data individually when tx is done, which causes problem when SMMU is on. This patch fixes this problem by ummapping the whole data in a frags when tx is done. Signed-off-by: Yunsheng Lin Signed-off-by: Peng Li Reviewed-by: Yisen Zhuang Signed-off-by: Salil Mehta Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/hisilicon/hns/hnae.c | 2 +- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 30 ++++++++++++------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c index a051e582d541a..79d03f8ee7b18 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.c +++ b/drivers/net/ethernet/hisilicon/hns/hnae.c @@ -84,7 +84,7 @@ static void hnae_unmap_buffer(struct hnae_ring *ring, struct hnae_desc_cb *cb) if (cb->type == DESC_TYPE_SKB) dma_unmap_single(ring_to_dev(ring), cb->dma, cb->length, ring_to_dma_dir(ring)); - else + else if (cb->length) dma_unmap_page(ring_to_dev(ring), cb->dma, cb->length, ring_to_dma_dir(ring)); } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 07d6a9cf2c556..4faadc3ffe8c6 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -40,9 +40,9 @@ #define SKB_TMP_LEN(SKB) \ (((SKB)->transport_header - (SKB)->mac_header) + tcp_hdrlen(SKB)) -static void fill_v2_desc(struct hnae_ring *ring, void *priv, - int size, dma_addr_t dma, int frag_end, - int buf_num, enum hns_desc_type type, int mtu) +static void fill_v2_desc_hw(struct hnae_ring *ring, void *priv, int size, + int send_sz, dma_addr_t dma, int frag_end, + int buf_num, enum hns_desc_type type, int mtu) { struct hnae_desc *desc = &ring->desc[ring->next_to_use]; struct hnae_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use]; @@ -64,7 +64,7 @@ static void fill_v2_desc(struct hnae_ring *ring, void *priv, desc_cb->type = type; desc->addr = cpu_to_le64(dma); - desc->tx.send_size = cpu_to_le16((u16)size); + desc->tx.send_size = cpu_to_le16((u16)send_sz); /* config bd buffer end */ hnae_set_bit(rrcfv, HNSV2_TXD_VLD_B, 1); @@ -133,6 +133,14 @@ static void fill_v2_desc(struct hnae_ring *ring, void *priv, ring_ptr_move_fw(ring, next_to_use); } +static void fill_v2_desc(struct hnae_ring *ring, void *priv, + int size, dma_addr_t dma, int frag_end, + int buf_num, enum hns_desc_type type, int mtu) +{ + fill_v2_desc_hw(ring, priv, size, size, dma, frag_end, + buf_num, type, mtu); +} + static const struct acpi_device_id hns_enet_acpi_match[] = { { "HISI00C1", 0 }, { "HISI00C2", 0 }, @@ -289,15 +297,15 @@ static void fill_tso_desc(struct hnae_ring *ring, void *priv, /* when the frag size is bigger than hardware, split this frag */ for (k = 0; k < frag_buf_num; k++) - fill_v2_desc(ring, priv, - (k == frag_buf_num - 1) ? + fill_v2_desc_hw(ring, priv, k == 0 ? size : 0, + (k == frag_buf_num - 1) ? sizeoflast : BD_MAX_SEND_SIZE, - dma + BD_MAX_SEND_SIZE * k, - frag_end && (k == frag_buf_num - 1) ? 1 : 0, - buf_num, - (type == DESC_TYPE_SKB && !k) ? + dma + BD_MAX_SEND_SIZE * k, + frag_end && (k == frag_buf_num - 1) ? 1 : 0, + buf_num, + (type == DESC_TYPE_SKB && !k) ? DESC_TYPE_SKB : DESC_TYPE_PAGE, - mtu); + mtu); } netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev, -- GitLab From 9b4869cf385aa16f89c0f019eed4ec4e36aa441c Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 9 Oct 2018 17:48:14 +0200 Subject: [PATCH 0865/2269] net: ipv4: update fnhe_pmtu when first hop's MTU changes [ Upstream commit af7d6cce53694a88d6a1bb60c9a239a6a5144459 ] Since commit 5aad1de5ea2c ("ipv4: use separate genid for next hop exceptions"), exceptions get deprecated separately from cached routes. In particular, administrative changes don't clear PMTU anymore. As Stefano described in commit e9fa1495d738 ("ipv6: Reflect MTU changes on PMTU of exceptions for MTU-less routes"), the PMTU discovered before the local MTU change can become stale: - if the local MTU is now lower than the PMTU, that PMTU is now incorrect - if the local MTU was the lowest value in the path, and is increased, we might discover a higher PMTU Similarly to what commit e9fa1495d738 did for IPv6, update PMTU in those cases. If the exception was locked, the discovered PMTU was smaller than the minimal accepted PMTU. In that case, if the new local MTU is smaller than the current PMTU, let PMTU discovery figure out if locking of the exception is still needed. To do this, we need to know the old link MTU in the NETDEV_CHANGEMTU notifier. By the time the notifier is called, dev->mtu has been changed. This patch adds the old MTU as additional information in the notifier structure, and a new call_netdevice_notifiers_u32() function. Fixes: 5aad1de5ea2c ("ipv4: use separate genid for next hop exceptions") Signed-off-by: Sabrina Dubroca Reviewed-by: Stefano Brivio Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/netdevice.h | 7 ++++++ include/net/ip_fib.h | 1 + net/core/dev.c | 28 ++++++++++++++++++++-- net/ipv4/fib_frontend.c | 12 ++++++---- net/ipv4/fib_semantics.c | 50 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 92 insertions(+), 6 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2ea7ee1fb4954..a516dbe5869fa 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2307,6 +2307,13 @@ struct netdev_notifier_info { struct net_device *dev; }; +struct netdev_notifier_info_ext { + struct netdev_notifier_info info; /* must be first */ + union { + u32 mtu; + } ext; +}; + struct netdev_notifier_change_info { struct netdev_notifier_info info; /* must be first */ unsigned int flags_changed; diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 5c5d344c06293..32df52869a14d 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -372,6 +372,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev); int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); int fib_sync_down_addr(struct net_device *dev, __be32 local); int fib_sync_up(struct net_device *dev, unsigned int nh_flags); +void fib_sync_mtu(struct net_device *dev, u32 orig_mtu); #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, diff --git a/net/core/dev.c b/net/core/dev.c index 85f4a10477075..e8a66ad6d07c2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1688,6 +1688,28 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) } EXPORT_SYMBOL(call_netdevice_notifiers); +/** + * call_netdevice_notifiers_mtu - call all network notifier blocks + * @val: value passed unmodified to notifier function + * @dev: net_device pointer passed unmodified to notifier function + * @arg: additional u32 argument passed to the notifier function + * + * Call all network notifier blocks. Parameters and return value + * are as for raw_notifier_call_chain(). + */ +static int call_netdevice_notifiers_mtu(unsigned long val, + struct net_device *dev, u32 arg) +{ + struct netdev_notifier_info_ext info = { + .info.dev = dev, + .ext.mtu = arg, + }; + + BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != 0); + + return call_netdevice_notifiers_info(val, dev, &info.info); +} + #ifdef CONFIG_NET_INGRESS static struct static_key ingress_needed __read_mostly; @@ -6891,14 +6913,16 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) err = __dev_set_mtu(dev, new_mtu); if (!err) { - err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev, + orig_mtu); err = notifier_to_errno(err); if (err) { /* setting mtu back and notifying everyone again, * so that they have a chance to revert changes. */ __dev_set_mtu(dev, orig_mtu); - call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev, + new_mtu); } } return err; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 5bbdd05d0cd30..1b3f860f7dcd2 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1185,7 +1185,8 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct netdev_notifier_changeupper_info *info; + struct netdev_notifier_changeupper_info *upper_info = ptr; + struct netdev_notifier_info_ext *info_ext = ptr; struct in_device *in_dev; struct net *net = dev_net(dev); unsigned int flags; @@ -1220,16 +1221,19 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo fib_sync_up(dev, RTNH_F_LINKDOWN); else fib_sync_down_dev(dev, event, false); - /* fall through */ + rt_cache_flush(net); + break; case NETDEV_CHANGEMTU: + fib_sync_mtu(dev, info_ext->ext.mtu); rt_cache_flush(net); break; case NETDEV_CHANGEUPPER: - info = ptr; + upper_info = ptr; /* flush all routes if dev is linked to or unlinked from * an L3 master device (e.g., VRF) */ - if (info->upper_dev && netif_is_l3_master(info->upper_dev)) + if (upper_info->upper_dev && + netif_is_l3_master(upper_info->upper_dev)) fib_disable_ip(dev, NETDEV_DOWN, true); break; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index b557af72cde96..e76b8a7bb8911 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1520,6 +1520,56 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh, return NOTIFY_DONE; } +/* Update the PMTU of exceptions when: + * - the new MTU of the first hop becomes smaller than the PMTU + * - the old MTU was the same as the PMTU, and it limited discovery of + * larger MTUs on the path. With that limit raised, we can now + * discover larger MTUs + * A special case is locked exceptions, for which the PMTU is smaller + * than the minimal accepted PMTU: + * - if the new MTU is greater than the PMTU, don't make any change + * - otherwise, unlock and set PMTU + */ +static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig) +{ + struct fnhe_hash_bucket *bucket; + int i; + + bucket = rcu_dereference_protected(nh->nh_exceptions, 1); + if (!bucket) + return; + + for (i = 0; i < FNHE_HASH_SIZE; i++) { + struct fib_nh_exception *fnhe; + + for (fnhe = rcu_dereference_protected(bucket[i].chain, 1); + fnhe; + fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) { + if (fnhe->fnhe_mtu_locked) { + if (new <= fnhe->fnhe_pmtu) { + fnhe->fnhe_pmtu = new; + fnhe->fnhe_mtu_locked = false; + } + } else if (new < fnhe->fnhe_pmtu || + orig == fnhe->fnhe_pmtu) { + fnhe->fnhe_pmtu = new; + } + } + } +} + +void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) +{ + unsigned int hash = fib_devindex_hashfn(dev->ifindex); + struct hlist_head *head = &fib_info_devhash[hash]; + struct fib_nh *nh; + + hlist_for_each_entry(nh, head, nh_hash) { + if (nh->nh_dev == dev) + nh_update_mtu(nh, dev->mtu, orig_mtu); + } +} + /* Event force Flags Description * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host -- GitLab From 335c37612f9fec84f8e361e2ce8148b7cd156d22 Mon Sep 17 00:00:00 2001 From: Jeff Barnhill <0xeffeff@gmail.com> Date: Fri, 21 Sep 2018 00:45:27 +0000 Subject: [PATCH 0866/2269] net/ipv6: Display all addresses in output of /proc/net/if_inet6 [ Upstream commit 86f9bd1ff61c413a2a251fa736463295e4e24733 ] The backend handling for /proc/net/if_inet6 in addrconf.c doesn't properly handle starting/stopping the iteration. The problem is that at some point during the iteration, an overflow is detected and the process is subsequently stopped. The item being shown via seq_printf() when the overflow occurs is not actually shown, though. When start() is subsequently called to resume iterating, it returns the next item, and thus the item that was being processed when the overflow occurred never gets printed. Alter the meaning of the private data member "offset". Currently, when it is not 0 (which only happens at the very beginning), "offset" represents the next hlist item to be printed. After this change, "offset" always represents the current item. This is also consistent with the private data member "bucket", which represents the current bucket, and also the use of "pos" as defined in seq_file.txt: The pos passed to start() will always be either zero, or the most recent pos used in the previous session. Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com> Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6a76e41e6d516..569f7c3f6b956 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4136,7 +4136,6 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos) p++; continue; } - state->offset++; return ifa; } @@ -4160,13 +4159,12 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, return ifa; } + state->offset = 0; while (++state->bucket < IN6_ADDR_HSIZE) { - state->offset = 0; hlist_for_each_entry_rcu_bh(ifa, &inet6_addr_lst[state->bucket], addr_lst) { if (!net_eq(dev_net(ifa->idev->dev), net)) continue; - state->offset++; return ifa; } } -- GitLab From 19c5e73c745cac1ff3c54526335f16c515a1e3a7 Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Thu, 20 Sep 2018 14:29:45 -0600 Subject: [PATCH 0867/2269] netlabel: check for IPV4MASK in addrinfo_get [ Upstream commit f88b4c01b97e09535505cf3c327fdbce55c27f00 ] netlbl_unlabel_addrinfo_get() assumes that if it finds the NLBL_UNLABEL_A_IPV4ADDR attribute, it must also have the NLBL_UNLABEL_A_IPV4MASK attribute as well. However, this is not necessarily the case as the current checks in netlbl_unlabel_staticadd() and friends are not sufficent to enforce this. If passed a netlink message with NLBL_UNLABEL_A_IPV4ADDR, NLBL_UNLABEL_A_IPV6ADDR, and NLBL_UNLABEL_A_IPV6MASK attributes, these functions will all call netlbl_unlabel_addrinfo_get() which will then attempt dereference NULL when fetching the non-existent NLBL_UNLABEL_A_IPV4MASK attribute: Unable to handle kernel NULL pointer dereference at virtual address 0 Process unlab (pid: 31762, stack limit = 0xffffff80502d8000) Call trace: netlbl_unlabel_addrinfo_get+0x44/0xd8 netlbl_unlabel_staticremovedef+0x98/0xe0 genl_rcv_msg+0x354/0x388 netlink_rcv_skb+0xac/0x118 genl_rcv+0x34/0x48 netlink_unicast+0x158/0x1f0 netlink_sendmsg+0x32c/0x338 sock_sendmsg+0x44/0x60 ___sys_sendmsg+0x1d0/0x2a8 __sys_sendmsg+0x64/0xb4 SyS_sendmsg+0x34/0x4c el0_svc_naked+0x34/0x38 Code: 51001149 7100113f 540000a0 f9401508 (79400108) ---[ end trace f6438a488e737143 ]--- Kernel panic - not syncing: Fatal exception Signed-off-by: Sean Tranchetti Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlabel/netlabel_unlabeled.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index c070dfc0190aa..c92894c3e40a3 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -781,7 +781,8 @@ static int netlbl_unlabel_addrinfo_get(struct genl_info *info, { u32 addr_len; - if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR]) { + if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR] && + info->attrs[NLBL_UNLABEL_A_IPV4MASK]) { addr_len = nla_len(info->attrs[NLBL_UNLABEL_A_IPV4ADDR]); if (addr_len != sizeof(struct in_addr) && addr_len != nla_len(info->attrs[NLBL_UNLABEL_A_IPV4MASK])) -- GitLab From d9bf6699aee863044cbb8c2b5d2de5e1db4fd7d8 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Fri, 5 Oct 2018 09:04:40 +0200 Subject: [PATCH 0868/2269] net: mvpp2: Extract the correct ethtype from the skb for tx csum offload [ Upstream commit 35f3625c21852ad839f20c91c7d81c4c1101e207 ] When offloading the L3 and L4 csum computation on TX, we need to extract the l3_proto from the ethtype, independently of the presence of a vlan tag. The actual driver uses skb->protocol as-is, resulting in packets with the wrong L4 checksum being sent when there's a vlan tag in the packet header and checksum offloading is enabled. This commit makes use of vlan_protocol_get() to get the correct ethtype regardless the presence of a vlan tag. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Maxime Chevallier Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvpp2.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 529be74f609d5..c5e9a897fbdf9 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -5101,7 +5102,7 @@ static void mvpp2_txq_desc_put(struct mvpp2_tx_queue *txq) } /* Set Tx descriptors fields relevant for CSUM calculation */ -static u32 mvpp2_txq_desc_csum(int l3_offs, int l3_proto, +static u32 mvpp2_txq_desc_csum(int l3_offs, __be16 l3_proto, int ip_hdr_len, int l4_proto) { u32 command; @@ -6065,14 +6066,15 @@ static u32 mvpp2_skb_tx_csum(struct mvpp2_port *port, struct sk_buff *skb) if (skb->ip_summed == CHECKSUM_PARTIAL) { int ip_hdr_len = 0; u8 l4_proto; + __be16 l3_proto = vlan_get_protocol(skb); - if (skb->protocol == htons(ETH_P_IP)) { + if (l3_proto == htons(ETH_P_IP)) { struct iphdr *ip4h = ip_hdr(skb); /* Calculate IPv4 checksum and L4 checksum */ ip_hdr_len = ip4h->ihl; l4_proto = ip4h->protocol; - } else if (skb->protocol == htons(ETH_P_IPV6)) { + } else if (l3_proto == htons(ETH_P_IPV6)) { struct ipv6hdr *ip6h = ipv6_hdr(skb); /* Read l4_protocol from one of IPv6 extra headers */ @@ -6084,7 +6086,7 @@ static u32 mvpp2_skb_tx_csum(struct mvpp2_port *port, struct sk_buff *skb) } return mvpp2_txq_desc_csum(skb_network_offset(skb), - skb->protocol, ip_hdr_len, l4_proto); + l3_proto, ip_hdr_len, l4_proto); } return MVPP2_TXD_L4_CSUM_NOT | MVPP2_TXD_IP_CSUM_DISABLE; -- GitLab From 85ebbc5a2543f3ffcc535dbeb9842af542f3edb1 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 18 Sep 2018 16:58:47 +0200 Subject: [PATCH 0869/2269] net: mvpp2: fix a txq_done race condition [ Upstream commit 774268f3e51b53ed432a1ec516574fd5ba469398 ] When no Tx IRQ is available, the txq_done() routine (called from tx_done()) shouldn't be called from the polling function, as in such case it is already called in the Tx path thanks to an hrtimer. This mostly occurred when using PPv2.1, as the engine then do not have Tx IRQs. Fixes: edc660fa09e2 ("net: mvpp2: replace TX coalescing interrupts with hrtimer") Reported-by: Stefan Chulski Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvpp2.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index c5e9a897fbdf9..00e6f1d155a6f 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -6534,10 +6534,12 @@ static int mvpp2_poll(struct napi_struct *napi, int budget) cause_rx_tx & ~MVPP2_CAUSE_MISC_SUM_MASK); } - cause_tx = cause_rx_tx & MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK; - if (cause_tx) { - cause_tx >>= MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_OFFSET; - mvpp2_tx_done(port, cause_tx, qv->sw_thread_id); + if (port->has_tx_irqs) { + cause_tx = cause_rx_tx & MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK; + if (cause_tx) { + cause_tx >>= MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_OFFSET; + mvpp2_tx_done(port, cause_tx, qv->sw_thread_id); + } } /* Process RX packets */ -- GitLab From d9057423312e9d12f02290cb97f14300330a729e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 3 Oct 2018 15:05:36 -0700 Subject: [PATCH 0870/2269] net: sched: Add policy validation for tc attributes [ Upstream commit 8b4c3cdd9dd8290343ce959a132d3b334062c5b9 ] A number of TC attributes are processed without proper validation (e.g., length checks). Add a tca policy for all input attributes and use when invoking nlmsg_parse. The 2 Fixes tags below cover the latest additions. The other attributes are a string (KIND), nested attribute (OPTIONS which does seem to have validation in most cases), for dumps only or a flag. Fixes: 5bc1701881e39 ("net: sched: introduce multichain support for filters") Fixes: d47a6b0e7c492 ("net: sched: introduce ingress/egress block index attributes for qdisc") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_api.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 22bc6fc483111..cd69aa0675430 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1216,6 +1216,16 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) * Delete/get qdisc. */ +const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { + [TCA_KIND] = { .type = NLA_STRING }, + [TCA_OPTIONS] = { .type = NLA_NESTED }, + [TCA_RATE] = { .type = NLA_BINARY, + .len = sizeof(struct tc_estimator) }, + [TCA_STAB] = { .type = NLA_NESTED }, + [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG }, + [TCA_CHAIN] = { .type = NLA_U32 }, +}; + static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { @@ -1232,7 +1242,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); + err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, + extack); if (err < 0) return err; @@ -1302,7 +1313,8 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, replay: /* Reinit, just in case something touches this. */ - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); + err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, + extack); if (err < 0) return err; @@ -1512,7 +1524,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL); + err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, + rtm_tca_policy, NULL); if (err < 0) return err; @@ -1729,7 +1742,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); + err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, + extack); if (err < 0) return err; -- GitLab From 3e80ad8cbf228d74666a862fdf2c38f5b6b3c8aa Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 2 Oct 2018 16:52:03 -0700 Subject: [PATCH 0871/2269] net: systemport: Fix wake-up interrupt race during resume [ Upstream commit 45ec318578c0c22a11f5b9927d064418e1ab1905 ] The AON_PM_L2 is normally used to trigger and identify the source of a wake-up event. Since the RX_SYS clock is no longer turned off, we also have an interrupt being sent to the SYSTEMPORT INTRL_2_0 controller, and that interrupt remains active up until the magic packet detector is disabled which happens much later during the driver resumption. The race happens if we have a CPU that is entering the SYSTEMPORT INTRL2_0 handler during resume, and another CPU has managed to clear the wake-up interrupt during bcm_sysport_resume_from_wol(). In that case, we have the first CPU stuck in the interrupt handler with an interrupt cause that has been cleared under its feet, and so we keep returning IRQ_NONE and we never make any progress. This was not a problem before because we would always turn off the RX_SYS clock during WoL, so the SYSTEMPORT INTRL2_0 would also be turned off as well, thus not latching the interrupt. The fix is to make sure we do not enable either the MPD or BRCM_TAG_MATCH interrupts since those are redundant with what the AON_PM_L2 interrupt controller already processes and they would cause such a race to occur. Fixes: bb9051a2b230 ("net: systemport: Add support for WAKE_FILTER") Fixes: 83e82f4c706b ("net: systemport: add Wake-on-LAN support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bcmsysport.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 0fff2432ab4cd..6e7f9a470ea18 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1001,14 +1001,22 @@ static void bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv) { u32 reg; - /* Stop monitoring MPD interrupt */ - intrl2_0_mask_set(priv, INTRL2_0_MPD); - /* Clear the MagicPacket detection logic */ reg = umac_readl(priv, UMAC_MPD_CTRL); reg &= ~MPD_EN; umac_writel(priv, reg, UMAC_MPD_CTRL); + reg = intrl2_0_readl(priv, INTRL2_CPU_STATUS); + if (reg & INTRL2_0_MPD) + netdev_info(priv->netdev, "Wake-on-LAN (MPD) interrupt!\n"); + + if (reg & INTRL2_0_BRCM_MATCH_TAG) { + reg = rxchk_readl(priv, RXCHK_BRCM_TAG_MATCH_STATUS) & + RXCHK_BRCM_TAG_MATCH_MASK; + netdev_info(priv->netdev, + "Wake-on-LAN (filters 0x%02x) interrupt!\n", reg); + } + netif_dbg(priv, wol, priv->netdev, "resumed from WOL\n"); } @@ -1043,11 +1051,6 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id) if (priv->irq0_stat & INTRL2_0_TX_RING_FULL) bcm_sysport_tx_reclaim_all(priv); - if (priv->irq0_stat & INTRL2_0_MPD) { - netdev_info(priv->netdev, "Wake-on-LAN interrupt!\n"); - bcm_sysport_resume_from_wol(priv); - } - if (!priv->is_lite) goto out; @@ -2248,9 +2251,6 @@ static int bcm_sysport_suspend_to_wol(struct bcm_sysport_priv *priv) /* UniMAC receive needs to be turned on */ umac_enable_set(priv, CMD_RX_EN, 1); - /* Enable the interrupt wake-up source */ - intrl2_0_mask_clear(priv, INTRL2_0_MPD); - netif_dbg(priv, wol, ndev, "entered WOL mode\n"); return 0; -- GitLab From 133aea0f2108f5c3b68a621f20caf275f7e90e64 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Fri, 28 Sep 2018 17:04:30 -0600 Subject: [PATCH 0872/2269] net/usb: cancel pending work when unbinding smsc75xx [ Upstream commit f7b2a56e1f3dcbdb4cf09b2b63e859ffe0e09df8 ] Cancel pending work before freeing smsc75xx private data structure during binding. This fixes the following crash in the driver: BUG: unable to handle kernel NULL pointer dereference at 0000000000000050 IP: mutex_lock+0x2b/0x3f Workqueue: events smsc75xx_deferred_multicast_write [smsc75xx] task: ffff8caa83e85700 task.stack: ffff948b80518000 RIP: 0010:mutex_lock+0x2b/0x3f Call Trace: smsc75xx_deferred_multicast_write+0x40/0x1af [smsc75xx] process_one_work+0x18d/0x2fc worker_thread+0x1a2/0x269 ? pr_cont_work+0x58/0x58 kthread+0xfa/0x10a ? pr_cont_work+0x58/0x58 ? rcu_read_unlock_sched_notrace+0x48/0x48 ret_from_fork+0x22/0x40 Signed-off-by: Yu Zhao Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/smsc75xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 05553d2524469..b64b1ee56d2d9 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -1517,6 +1517,7 @@ static void smsc75xx_unbind(struct usbnet *dev, struct usb_interface *intf) { struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); if (pdata) { + cancel_work_sync(&pdata->set_multicast); netif_dbg(dev, ifdown, dev->net, "free pdata\n"); kfree(pdata); pdata = NULL; -- GitLab From 0d5b9edea99531145d4cade061ad6c7238654c48 Mon Sep 17 00:00:00 2001 From: Shahed Shaikh Date: Wed, 26 Sep 2018 12:41:10 -0700 Subject: [PATCH 0873/2269] qlcnic: fix Tx descriptor corruption on 82xx devices [ Upstream commit c333fa0c4f220f8f7ea5acd6b0ebf3bf13fd684d ] In regular NIC transmission flow, driver always configures MAC using Tx queue zero descriptor as a part of MAC learning flow. But with multi Tx queue supported NIC, regular transmission can occur on any non-zero Tx queue and from that context it uses Tx queue zero descriptor to configure MAC, at the same time TX queue zero could be used by another CPU for regular transmission which could lead to Tx queue zero descriptor corruption and cause FW abort. This patch fixes this in such a way that driver always configures learned MAC address from the same Tx queue which is used for regular transmission. Fixes: 7e2cf4feba05 ("qlcnic: change driver hardware interface mechanism") Signed-off-by: Shahed Shaikh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qlcnic/qlcnic.h | 8 +++++--- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 3 ++- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h | 3 ++- drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h | 3 ++- drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c | 12 ++++++------ 5 files changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index 81312924df140..0c443ea98479a 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -1800,7 +1800,8 @@ struct qlcnic_hardware_ops { int (*config_loopback) (struct qlcnic_adapter *, u8); int (*clear_loopback) (struct qlcnic_adapter *, u8); int (*config_promisc_mode) (struct qlcnic_adapter *, u32); - void (*change_l2_filter) (struct qlcnic_adapter *, u64 *, u16); + void (*change_l2_filter)(struct qlcnic_adapter *adapter, u64 *addr, + u16 vlan, struct qlcnic_host_tx_ring *tx_ring); int (*get_board_info) (struct qlcnic_adapter *); void (*set_mac_filter_count) (struct qlcnic_adapter *); void (*free_mac_list) (struct qlcnic_adapter *); @@ -2064,9 +2065,10 @@ static inline int qlcnic_nic_set_promisc(struct qlcnic_adapter *adapter, } static inline void qlcnic_change_filter(struct qlcnic_adapter *adapter, - u64 *addr, u16 id) + u64 *addr, u16 vlan, + struct qlcnic_host_tx_ring *tx_ring) { - adapter->ahw->hw_ops->change_l2_filter(adapter, addr, id); + adapter->ahw->hw_ops->change_l2_filter(adapter, addr, vlan, tx_ring); } static inline int qlcnic_get_board_info(struct qlcnic_adapter *adapter) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index 46b0372dd0326..1fc84d8f891b3 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -2134,7 +2134,8 @@ out: } void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *adapter, u64 *addr, - u16 vlan_id) + u16 vlan_id, + struct qlcnic_host_tx_ring *tx_ring) { u8 mac[ETH_ALEN]; memcpy(&mac, addr, ETH_ALEN); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h index b75a812468569..73fe2f64491de 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h @@ -550,7 +550,8 @@ int qlcnic_83xx_wrt_reg_indirect(struct qlcnic_adapter *, ulong, u32); int qlcnic_83xx_nic_set_promisc(struct qlcnic_adapter *, u32); int qlcnic_83xx_config_hw_lro(struct qlcnic_adapter *, int); int qlcnic_83xx_config_rss(struct qlcnic_adapter *, int); -void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *, u64 *, u16); +void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *adapter, u64 *addr, + u16 vlan, struct qlcnic_host_tx_ring *ring); int qlcnic_83xx_get_pci_info(struct qlcnic_adapter *, struct qlcnic_pci_info *); int qlcnic_83xx_set_nic_info(struct qlcnic_adapter *, struct qlcnic_info *); void qlcnic_83xx_initialize_nic(struct qlcnic_adapter *, int); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h index 4bb33af8e2b3a..56a3bd9e37dcd 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h @@ -173,7 +173,8 @@ int qlcnic_82xx_napi_add(struct qlcnic_adapter *adapter, struct net_device *netdev); void qlcnic_82xx_get_beacon_state(struct qlcnic_adapter *); void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, - u64 *uaddr, u16 vlan_id); + u64 *uaddr, u16 vlan_id, + struct qlcnic_host_tx_ring *tx_ring); int qlcnic_82xx_config_intr_coalesce(struct qlcnic_adapter *, struct ethtool_coalesce *); int qlcnic_82xx_set_rx_coalesce(struct qlcnic_adapter *); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 84dd83031a1bf..9647578cbe6a8 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -268,13 +268,12 @@ static void qlcnic_add_lb_filter(struct qlcnic_adapter *adapter, } void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, u64 *uaddr, - u16 vlan_id) + u16 vlan_id, struct qlcnic_host_tx_ring *tx_ring) { struct cmd_desc_type0 *hwdesc; struct qlcnic_nic_req *req; struct qlcnic_mac_req *mac_req; struct qlcnic_vlan_req *vlan_req; - struct qlcnic_host_tx_ring *tx_ring = adapter->tx_ring; u32 producer; u64 word; @@ -301,7 +300,8 @@ void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, u64 *uaddr, static void qlcnic_send_filter(struct qlcnic_adapter *adapter, struct cmd_desc_type0 *first_desc, - struct sk_buff *skb) + struct sk_buff *skb, + struct qlcnic_host_tx_ring *tx_ring) { struct vlan_ethhdr *vh = (struct vlan_ethhdr *)(skb->data); struct ethhdr *phdr = (struct ethhdr *)(skb->data); @@ -335,7 +335,7 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter, tmp_fil->vlan_id == vlan_id) { if (jiffies > (QLCNIC_READD_AGE * HZ + tmp_fil->ftime)) qlcnic_change_filter(adapter, &src_addr, - vlan_id); + vlan_id, tx_ring); tmp_fil->ftime = jiffies; return; } @@ -350,7 +350,7 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter, if (!fil) return; - qlcnic_change_filter(adapter, &src_addr, vlan_id); + qlcnic_change_filter(adapter, &src_addr, vlan_id, tx_ring); fil->ftime = jiffies; fil->vlan_id = vlan_id; memcpy(fil->faddr, &src_addr, ETH_ALEN); @@ -766,7 +766,7 @@ netdev_tx_t qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) } if (adapter->drv_mac_learn) - qlcnic_send_filter(adapter, first_desc, skb); + qlcnic_send_filter(adapter, first_desc, skb, tx_ring); tx_ring->tx_stats.tx_bytes += skb->len; tx_ring->tx_stats.xmit_called++; -- GitLab From 72675512fb1a823326d8c26ffa5a9a59ecb54bbb Mon Sep 17 00:00:00 2001 From: Giacinto Cifelli Date: Wed, 10 Oct 2018 20:05:53 +0200 Subject: [PATCH 0874/2269] qmi_wwan: Added support for Gemalto's Cinterion ALASxx WWAN interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4f7617705bfff84d756fe4401a1f4f032f374984 ] Added support for Gemalto's Cinterion ALASxx WWAN interfaces by adding QMI_FIXED_INTF with Cinterion's VID and PID. Signed-off-by: Giacinto Cifelli Acked-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index c5d4b35bb72ae..11a25cef113f7 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1233,6 +1233,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x0b3c, 0xc00b, 4)}, /* Olivetti Olicard 500 */ {QMI_FIXED_INTF(0x1e2d, 0x0060, 4)}, /* Cinterion PLxx */ {QMI_FIXED_INTF(0x1e2d, 0x0053, 4)}, /* Cinterion PHxx,PXxx */ + {QMI_FIXED_INTF(0x1e2d, 0x0063, 10)}, /* Cinterion ALASxx (1 RmNet) */ {QMI_FIXED_INTF(0x1e2d, 0x0082, 4)}, /* Cinterion PHxx,PXxx (2 RmNet) */ {QMI_FIXED_INTF(0x1e2d, 0x0082, 5)}, /* Cinterion PHxx,PXxx (2 RmNet) */ {QMI_FIXED_INTF(0x1e2d, 0x0083, 4)}, /* Cinterion PHxx,PXxx (1 RmNet + USB Audio)*/ -- GitLab From 5f999abba33f6788f52cdadae3432f5e731c09bc Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Mon, 1 Oct 2018 22:46:40 -0300 Subject: [PATCH 0875/2269] rtnetlink: fix rtnl_fdb_dump() for ndmsg header [ Upstream commit bd961c9bc66497f0c63f4ba1d02900bb85078366 ] Currently, rtnl_fdb_dump() assumes the family header is 'struct ifinfomsg', which is not always true -- 'struct ndmsg' is used by iproute2 ('ip neigh'). The problem is, the function bails out early if nlmsg_parse() fails, which does occur for iproute2 usage of 'struct ndmsg' because the payload length is shorter than the family header alone (as 'struct ifinfomsg' is assumed). This breaks backward compatibility with userspace -- nothing is sent back. Some examples with iproute2 and netlink library for go [1]: 1) $ bridge fdb show 33:33:00:00:00:01 dev ens3 self permanent 01:00:5e:00:00:01 dev ens3 self permanent 33:33:ff:15:98:30 dev ens3 self permanent This one works, as it uses 'struct ifinfomsg'. fdb_show() @ iproute2/bridge/fdb.c """ .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), ... if (rtnl_dump_request(&rth, RTM_GETNEIGH, [...] """ 2) $ ip --family bridge neigh RTNETLINK answers: Invalid argument Dump terminated This one fails, as it uses 'struct ndmsg'. do_show_or_flush() @ iproute2/ip/ipneigh.c """ .n.nlmsg_type = RTM_GETNEIGH, .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)), """ 3) $ ./neighlist < no output > This one fails, as it uses 'struct ndmsg'-based. neighList() @ netlink/neigh_linux.go """ req := h.newNetlinkRequest(unix.RTM_GETNEIGH, [...] msg := Ndmsg{ """ The actual breakage was introduced by commit 0ff50e83b512 ("net: rtnetlink: bail out from rtnl_fdb_dump() on parse error"), because nlmsg_parse() fails if the payload length (with the _actual_ family header) is less than the family header length alone (which is assumed, in parameter 'hdrlen'). This is true in the examples above with struct ndmsg, with size and payload length shorter than struct ifinfomsg. However, that commit just intends to fix something under the assumption the family header is indeed an 'struct ifinfomsg' - by preventing access to the payload as such (via 'ifm' pointer) if the payload length is not sufficient to actually contain it. The assumption was introduced by commit 5e6d24358799 ("bridge: netlink dump interface at par with brctl"), to support iproute2's 'bridge fdb' command (not 'ip neigh') which indeed uses 'struct ifinfomsg', thus is not broken. So, in order to unbreak the 'struct ndmsg' family headers and still allow 'struct ifinfomsg' to continue to work, check for the known message sizes used with 'struct ndmsg' in iproute2 (with zero or one attribute which is not used in this function anyway) then do not parse the data as ifinfomsg. Same examples with this patch applied (or revert/before the original fix): $ bridge fdb show 33:33:00:00:00:01 dev ens3 self permanent 01:00:5e:00:00:01 dev ens3 self permanent 33:33:ff:15:98:30 dev ens3 self permanent $ ip --family bridge neigh dev ens3 lladdr 33:33:00:00:00:01 PERMANENT dev ens3 lladdr 01:00:5e:00:00:01 PERMANENT dev ens3 lladdr 33:33:ff:15:98:30 PERMANENT $ ./neighlist netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x33, 0x33, 0x0, 0x0, 0x0, 0x1}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0} netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x1, 0x0, 0x5e, 0x0, 0x0, 0x1}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0} netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x33, 0x33, 0xff, 0x15, 0x98, 0x30}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0} Tested on mainline (v4.19-rc6) and net-next (3bd09b05b068). References: [1] netlink library for go (test-case) https://github.com/vishvananda/netlink $ cat ~/go/src/neighlist/main.go package main import ("fmt"; "syscall"; "github.com/vishvananda/netlink") func main() { neighs, _ := netlink.NeighList(0, syscall.AF_BRIDGE) for _, neigh := range neighs { fmt.Printf("%#v\n", neigh) } } $ export GOPATH=~/go $ go get github.com/vishvananda/netlink $ go build neighlist $ ~/go/src/neighlist/neighlist Thanks to David Ahern for suggestions to improve this patch. Fixes: 0ff50e83b512 ("net: rtnetlink: bail out from rtnl_fdb_dump() on parse error") Fixes: 5e6d24358799 ("bridge: netlink dump interface at par with brctl") Reported-by: Aidan Obley Signed-off-by: Mauricio Faria de Oliveira Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index efe396cc77b5f..5272e2ae375b5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3292,16 +3292,27 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) int err = 0; int fidx = 0; - err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, - IFLA_MAX, ifla_policy, NULL); - if (err < 0) { - return -EINVAL; - } else if (err == 0) { - if (tb[IFLA_MASTER]) - br_idx = nla_get_u32(tb[IFLA_MASTER]); - } + /* A hack to preserve kernel<->userspace interface. + * Before Linux v4.12 this code accepted ndmsg since iproute2 v3.3.0. + * However, ndmsg is shorter than ifinfomsg thus nlmsg_parse() bails. + * So, check for ndmsg with an optional u32 attribute (not used here). + * Fortunately these sizes don't conflict with the size of ifinfomsg + * with an optional attribute. + */ + if (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) && + (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) + + nla_attr_size(sizeof(u32)))) { + err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, + IFLA_MAX, ifla_policy, NULL); + if (err < 0) { + return -EINVAL; + } else if (err == 0) { + if (tb[IFLA_MASTER]) + br_idx = nla_get_u32(tb[IFLA_MASTER]); + } - brport_idx = ifm->ifi_index; + brport_idx = ifm->ifi_index; + } if (br_idx) { br_dev = __dev_get_by_index(net, br_idx); -- GitLab From a8b0f004eb9022d9150932d94126cab4911a5159 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Oct 2018 15:47:35 -0700 Subject: [PATCH 0876/2269] rtnl: limit IFLA_NUM_TX_QUEUES and IFLA_NUM_RX_QUEUES to 4096 [ Upstream commit 0e1d6eca5113858ed2caea61a5adc03c595f6096 ] We have an impressive number of syzkaller bugs that are linked to the fact that syzbot was able to create a networking device with millions of TX (or RX) queues. Let's limit the number of RX/TX queues to 4096, this really should cover all known cases. A separate patch will add various cond_resched() in the loops handling sysfs entries at device creation and dismantle. Tested: lpaa6:~# ip link add gre-4097 numtxqueues 4097 numrxqueues 4097 type ip6gretap RTNETLINK answers: Invalid argument lpaa6:~# time ip link add gre-4096 numtxqueues 4096 numrxqueues 4096 type ip6gretap real 0m0.180s user 0m0.000s sys 0m0.107s Fixes: 76ff5cc91935 ("rtnl: allow to specify number of rx and tx queues on device creation") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5272e2ae375b5..760364526dc1a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2430,6 +2430,12 @@ struct net_device *rtnl_create_link(struct net *net, else if (ops->get_num_rx_queues) num_rx_queues = ops->get_num_rx_queues(); + if (num_tx_queues < 1 || num_tx_queues > 4096) + return ERR_PTR(-EINVAL); + + if (num_rx_queues < 1 || num_rx_queues > 4096) + return ERR_PTR(-EINVAL); + dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type, ops->setup, num_tx_queues, num_rx_queues); if (!dev) -- GitLab From d63d3995d7db668a07765fd92cbb66c06968a5b3 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 20 Sep 2018 17:27:28 +0800 Subject: [PATCH 0877/2269] sctp: update dst pmtu with the correct daddr [ Upstream commit d7ab5cdce54da631f0c8c11e506c974536a3581e ] When processing pmtu update from an icmp packet, it calls .update_pmtu with sk instead of skb in sctp_transport_update_pmtu. However for sctp, the daddr in the transport might be different from inet_sock->inet_daddr or sk->sk_v6_daddr, which is used to update or create the route cache. The incorrect daddr will cause a different route cache created for the path. So before calling .update_pmtu, inet_sock->inet_daddr/sk->sk_v6_daddr should be updated with the daddr in the transport, and update it back after it's done. The issue has existed since route exceptions introduction. Fixes: 4895c771c7f0 ("ipv4: Add FIB nexthop exceptions.") Reported-by: ian.periam@dialogic.com Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/transport.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/sctp/transport.c b/net/sctp/transport.c index e0c2a4e230393..43105cf04bc45 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -254,6 +254,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) { struct dst_entry *dst = sctp_transport_dst_check(t); + struct sock *sk = t->asoc->base.sk; bool change = true; if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { @@ -265,12 +266,19 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) pmtu = SCTP_TRUNC4(pmtu); if (dst) { - dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu); + struct sctp_pf *pf = sctp_get_pf_specific(dst->ops->family); + union sctp_addr addr; + + pf->af->from_sk(&addr, sk); + pf->to_sk_daddr(&t->ipaddr, sk); + dst->ops->update_pmtu(dst, sk, NULL, pmtu); + pf->to_sk_daddr(&addr, sk); + dst = sctp_transport_dst_check(t); } if (!dst) { - t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk); + t->af_specific->get_dst(t, &t->saddr, &t->fl, sk); dst = t->dst; } -- GitLab From 66c1b9cfa07d6f590c118ff8b548578b5f059187 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 1 Oct 2018 12:21:59 +0300 Subject: [PATCH 0878/2269] team: Forbid enslaving team device to itself [ Upstream commit 471b83bd8bbe4e89743683ef8ecb78f7029d8288 ] team's ndo_add_slave() acquires 'team->lock' and later tries to open the newly enslaved device via dev_open(). This emits a 'NETDEV_UP' event that causes the VLAN driver to add VLAN 0 on the team device. team's ndo_vlan_rx_add_vid() will also try to acquire 'team->lock' and deadlock. Fix this by checking early at the enslavement function that a team device is not being enslaved to itself. A similar check was added to the bond driver in commit 09a89c219baf ("bonding: disallow enslaving a bond to itself"). WARNING: possible recursive locking detected 4.18.0-rc7+ #176 Not tainted -------------------------------------------- syz-executor4/6391 is trying to acquire lock: (____ptrval____) (&team->lock){+.+.}, at: team_vlan_rx_add_vid+0x3b/0x1e0 drivers/net/team/team.c:1868 but task is already holding lock: (____ptrval____) (&team->lock){+.+.}, at: team_add_slave+0xdb/0x1c30 drivers/net/team/team.c:1947 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&team->lock); lock(&team->lock); *** DEADLOCK *** May be due to missing lock nesting notation 2 locks held by syz-executor4/6391: #0: (____ptrval____) (rtnl_mutex){+.+.}, at: rtnl_lock net/core/rtnetlink.c:77 [inline] #0: (____ptrval____) (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x412/0xc30 net/core/rtnetlink.c:4662 #1: (____ptrval____) (&team->lock){+.+.}, at: team_add_slave+0xdb/0x1c30 drivers/net/team/team.c:1947 stack backtrace: CPU: 1 PID: 6391 Comm: syz-executor4 Not tainted 4.18.0-rc7+ #176 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113 print_deadlock_bug kernel/locking/lockdep.c:1765 [inline] check_deadlock kernel/locking/lockdep.c:1809 [inline] validate_chain kernel/locking/lockdep.c:2405 [inline] __lock_acquire.cold.64+0x1fb/0x486 kernel/locking/lockdep.c:3435 lock_acquire+0x1e4/0x540 kernel/locking/lockdep.c:3924 __mutex_lock_common kernel/locking/mutex.c:757 [inline] __mutex_lock+0x176/0x1820 kernel/locking/mutex.c:894 mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:909 team_vlan_rx_add_vid+0x3b/0x1e0 drivers/net/team/team.c:1868 vlan_add_rx_filter_info+0x14a/0x1d0 net/8021q/vlan_core.c:210 __vlan_vid_add net/8021q/vlan_core.c:278 [inline] vlan_vid_add+0x63e/0x9d0 net/8021q/vlan_core.c:308 vlan_device_event.cold.12+0x2a/0x2f net/8021q/vlan.c:381 notifier_call_chain+0x180/0x390 kernel/notifier.c:93 __raw_notifier_call_chain kernel/notifier.c:394 [inline] raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401 call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1735 call_netdevice_notifiers net/core/dev.c:1753 [inline] dev_open+0x173/0x1b0 net/core/dev.c:1433 team_port_add drivers/net/team/team.c:1219 [inline] team_add_slave+0xa8b/0x1c30 drivers/net/team/team.c:1948 do_set_master+0x1c9/0x220 net/core/rtnetlink.c:2248 do_setlink+0xba4/0x3e10 net/core/rtnetlink.c:2382 rtnl_setlink+0x2a9/0x400 net/core/rtnetlink.c:2636 rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4665 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2455 rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4683 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343 netlink_sendmsg+0xa18/0xfd0 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:642 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:652 ___sys_sendmsg+0x7fd/0x930 net/socket.c:2126 __sys_sendmsg+0x11d/0x290 net/socket.c:2164 __do_sys_sendmsg net/socket.c:2173 [inline] __se_sys_sendmsg net/socket.c:2171 [inline] __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2171 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x456b29 Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f9706bf8c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f9706bf96d4 RCX: 0000000000456b29 RDX: 0000000000000000 RSI: 0000000020000240 RDI: 0000000000000004 RBP: 00000000009300a0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 00000000004d3548 R14: 00000000004c8227 R15: 0000000000000000 Fixes: 87002b03baab ("net: introduce vlan_vid_[add/del] and use them instead of direct [add/kill]_vid ndo calls") Signed-off-by: Ido Schimmel Reported-and-tested-by: syzbot+bd051aba086537515cdb@syzkaller.appspotmail.com Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/team/team.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 83c5917138373..817451a1efd6d 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1165,6 +1165,11 @@ static int team_port_add(struct team *team, struct net_device *port_dev) return -EBUSY; } + if (dev == port_dev) { + netdev_err(dev, "Cannot enslave team device to itself\n"); + return -EINVAL; + } + if (port_dev->features & NETIF_F_VLAN_CHALLENGED && vlan_uses_dev(dev)) { netdev_err(dev, "Device %s is VLAN challenged and team device has VLAN set up\n", -- GitLab From 49984ca4e60ef5a707fd73b9e4bf34c539bbe2bf Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Tue, 25 Sep 2018 18:21:58 +0200 Subject: [PATCH 0879/2269] tipc: fix flow control accounting for implicit connect [ Upstream commit 92ef12b32feab8f277b69e9fb89ede2796777f4d ] In the case of implicit connect message with data > 1K, the flow control accounting is incorrect. At this state, the socket does not know the peer nodes capability and falls back to legacy flow control by return 1, however the receiver of this message will perform the new block accounting. This leads to a slack and eventually traffic disturbance. In this commit, we perform tipc_node_get_capabilities() at implicit connect and perform accounting based on the peer's capability. Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: Jon Maloy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/socket.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 0aebf0695ae02..4d2125d258fe1 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1063,8 +1063,10 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) /* Handle implicit connection setup */ if (unlikely(dest)) { rc = __tipc_sendmsg(sock, m, dlen); - if (dlen && (dlen == rc)) + if (dlen && dlen == rc) { + tsk->peer_caps = tipc_node_get_capabilities(net, dnode); tsk->snt_unacked = tsk_inc(tsk, dlen + msg_hdr_sz(hdr)); + } return rc; } -- GitLab From 7976e6b70ecf1c9d592d597a7e9292aa8ad9e3fc Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 4 Oct 2018 13:37:32 +0200 Subject: [PATCH 0880/2269] udp: Unbreak modules that rely on external __skb_recv_udp() availability [ Upstream commit 7e823644b60555f70f241274b8d0120dd919269a ] Commit 2276f58ac589 ("udp: use a separate rx queue for packet reception") turned static inline __skb_recv_udp() from being a trivial helper around __skb_recv_datagram() into a UDP specific implementaion, making it EXPORT_SYMBOL_GPL() at the same time. There are external modules that got broken by __skb_recv_udp() not being visible to them. Let's unbreak them by making __skb_recv_udp EXPORT_SYMBOL(). Rationale (one of those) why this is actually "technically correct" thing to do: __skb_recv_udp() used to be an inline wrapper around __skb_recv_datagram(), which itself (still, and correctly so, I believe) is EXPORT_SYMBOL(). Cc: Paolo Abeni Cc: Eric Dumazet Fixes: 2276f58ac589 ("udp: use a separate rx queue for packet reception") Signed-off-by: Jiri Kosina Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 3de413867991e..dc0ec227b9d21 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1565,7 +1565,7 @@ busy_check: *err = error; return NULL; } -EXPORT_SYMBOL_GPL(__skb_recv_udp); +EXPORT_SYMBOL(__skb_recv_udp); /* * This should be easy, if there is something there we -- GitLab From 5e7bb38dc696b672b75de0539a75e776f023fe9b Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Mon, 17 Sep 2018 09:22:57 +0100 Subject: [PATCH 0881/2269] net: stmmac: Fixup the tail addr setting in xmit path [ Upstream commit 0431100b3d82c509729ece1ab22ada2484e209c1 ] Currently we are always setting the tail address of descriptor list to the end of the pre-allocated list. According to databook this is not correct. Tail address should point to the last available descriptor + 1, which means we have to update the tail address everytime we call the xmit function. This should make no impact in older versions of MAC but in newer versions there are some DMA features which allows the IP to fetch descriptors in advance and in a non sequential order so its critical that we set the tail address correctly. Signed-off-by: Jose Abreu Fixes: f748be531d70 ("stmmac: support new GMAC4") Cc: David S. Miller Cc: Joao Pinto Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 1a9a382bf1c4b..bafbebeb0e001 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2190,8 +2190,7 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv) priv->plat->dma_cfg, tx_q->dma_tx_phy, chan); - tx_q->tx_tail_addr = tx_q->dma_tx_phy + - (DMA_TX_SIZE * sizeof(struct dma_desc)); + tx_q->tx_tail_addr = tx_q->dma_tx_phy; priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, tx_q->tx_tail_addr, chan); @@ -2963,6 +2962,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len); + tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * sizeof(*desc)); priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, tx_q->tx_tail_addr, queue); @@ -3178,9 +3178,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) if (priv->synopsys_id < DWMAC_CORE_4_00) priv->hw->dma->enable_dma_transmission(priv->ioaddr); - else + else { + tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * sizeof(*desc)); priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, tx_q->tx_tail_addr, queue); + } return NETDEV_TX_OK; -- GitLab From 5150140b4ea7ac8401deaebec03e619988a3804b Mon Sep 17 00:00:00 2001 From: Jianfeng Tan Date: Sat, 29 Sep 2018 15:41:27 +0000 Subject: [PATCH 0882/2269] net/packet: fix packet drop as of virtio gso [ Upstream commit 9d2f67e43b73e8af7438be219b66a5de0cfa8bd9 ] When we use raw socket as the vhost backend, a packet from virito with gso offloading information, cannot be sent out in later validaton at xmit path, as we did not set correct skb->protocol which is further used for looking up the gso function. To fix this, we set this field according to virito hdr information. Fixes: e858fae2b0b8f4 ("virtio_net: use common code for virtio_net_hdr and skb GSO conversion") Signed-off-by: Jianfeng Tan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/virtio_net.h | 18 ++++++++++++++++++ net/packet/af_packet.c | 11 +++++++---- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 9397628a19671..cb462f9ab7dd5 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -5,6 +5,24 @@ #include #include +static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, + const struct virtio_net_hdr *hdr) +{ + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_TCPV4: + case VIRTIO_NET_HDR_GSO_UDP: + skb->protocol = cpu_to_be16(ETH_P_IP); + break; + case VIRTIO_NET_HDR_GSO_TCPV6: + skb->protocol = cpu_to_be16(ETH_P_IPV6); + break; + default: + return -EINVAL; + } + + return 0; +} + static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, const struct virtio_net_hdr *hdr, bool little_endian) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8833a58ca3ee9..8d1a7c9003930 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2753,10 +2753,12 @@ tpacket_error: } } - if (po->has_vnet_hdr && virtio_net_hdr_to_skb(skb, vnet_hdr, - vio_le())) { - tp_len = -EINVAL; - goto tpacket_error; + if (po->has_vnet_hdr) { + if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) { + tp_len = -EINVAL; + goto tpacket_error; + } + virtio_net_hdr_set_proto(skb, vnet_hdr); } skb->destructor = tpacket_destruct_skb; @@ -2952,6 +2954,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (err) goto out_free; len += sizeof(vnet_hdr); + virtio_net_hdr_set_proto(skb, &vnet_hdr); } skb_probe_transport_header(skb, reserve); -- GitLab From 431a4fee711414dbd14389d62f38adb03dc39e49 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 9 Oct 2018 16:48:57 -0700 Subject: [PATCH 0883/2269] net: dsa: bcm_sf2: Fix unbind ordering [ Upstream commit bf3b452b7af787b8bf27de6490dc4eedf6f97599 ] The order in which we release resources is unfortunately leading to bus errors while dismantling the port. This is because we set priv->wol_ports_mask to 0 to tell bcm_sf2_sw_suspend() that it is now permissible to clock gate the switch. Later on, when dsa_slave_destroy() comes in from dsa_unregister_switch() and calls dsa_switch_ops::port_disable, we perform the same dismantling again, and this time we hit registers that are clock gated. Make sure that dsa_unregister_switch() is the first thing that happens, which takes care of releasing all user visible resources, then proceed with clock gating hardware. We still need to set priv->wol_ports_mask to 0 to make sure that an enabled port properly gets disabled in case it was previously used as part of Wake-on-LAN. Fixes: d9338023fb8e ("net: dsa: bcm_sf2: Make it a real platform device driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/bcm_sf2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 48de089dd9c88..0132921f408ac 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1264,10 +1264,10 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev) { struct bcm_sf2_priv *priv = platform_get_drvdata(pdev); - /* Disable all ports and interrupts */ priv->wol_ports_mask = 0; - bcm_sf2_sw_suspend(priv->dev->ds); dsa_unregister_switch(priv->dev->ds); + /* Disable all ports and interrupts */ + bcm_sf2_sw_suspend(priv->dev->ds); bcm_sf2_mdio_unregister(priv); return 0; -- GitLab From 7ba8867fb3a7fb6ec0b919eb69e539b4bf03b712 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Sat, 25 Aug 2018 03:29:58 +0000 Subject: [PATCH 0884/2269] net/mlx5e: Set vlan masks for all offloaded TC rules [ Upstream commit cee26487620bc9bc3c7db21b6984d91f7bae12ae ] In flow steering, if asked to, the hardware matches on the first ethertype which is not vlan. It's possible to set a rule as follows, which is meant to match on untagged packet, but will match on a vlan packet: tc filter add dev eth0 parent ffff: protocol ip flower ... To avoid this for packets with single tag, we set vlan masks to tell hardware to check the tags for every matched packet. Fixes: 095b6cfd69ce ('net/mlx5e: Add TC vlan match parsing') Signed-off-by: Jianbo Liu Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index e28f9dab9ceb3..9e0be077df9c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -864,6 +864,9 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, mask->vlan_priority); MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, key->vlan_priority); } + } else { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, svlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); } if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { -- GitLab From 7aa339e9099496a01183daf9b74b77c7cc26ed03 Mon Sep 17 00:00:00 2001 From: Friedemann Gerold Date: Sat, 15 Sep 2018 18:03:39 +0300 Subject: [PATCH 0885/2269] net: aquantia: memory corruption on jumbo frames [ Upstream commit d26ed6b0e5e23190d43ab34bc69cbecdc464a2cf ] This patch fixes skb_shared area, which will be corrupted upon reception of 4K jumbo packets. Originally build_skb usage purpose was to reuse page for skb to eliminate needs of extra fragments. But that logic does not take into account that skb_shared_info should be reserved at the end of skb data area. In case packet data consumes all the page (4K), skb_shinfo location overflows the page. As a consequence, __build_skb zeroed shinfo data above the allocated page, corrupting next page. The issue is rarely seen in real life because jumbo are normally larger than 4K and that causes another code path to trigger. But it 100% reproducible with simple scapy packet, like: sendp(IP(dst="192.168.100.3") / TCP(dport=443) \ / Raw(RandString(size=(4096-40))), iface="enp1s0") Fixes: 018423e90bee ("net: ethernet: aquantia: Add ring support code") Reported-by: Friedemann Gerold Reported-by: Michael Rauch Signed-off-by: Friedemann Gerold Tested-by: Nikita Danilov Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/aquantia/atlantic/aq_ring.c | 32 +++++++++++-------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 0654e0c76bc27..640babf752ea6 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -222,9 +222,10 @@ int aq_ring_rx_clean(struct aq_ring_s *self, } /* for single fragment packets use build_skb() */ - if (buff->is_eop) { + if (buff->is_eop && + buff->len <= AQ_CFG_RX_FRAME_MAX - AQ_SKB_ALIGN) { skb = build_skb(page_address(buff->page), - buff->len + AQ_SKB_ALIGN); + AQ_CFG_RX_FRAME_MAX); if (unlikely(!skb)) { err = -ENOMEM; goto err_exit; @@ -244,18 +245,21 @@ int aq_ring_rx_clean(struct aq_ring_s *self, buff->len - ETH_HLEN, SKB_TRUESIZE(buff->len - ETH_HLEN)); - for (i = 1U, next_ = buff->next, - buff_ = &self->buff_ring[next_]; true; - next_ = buff_->next, - buff_ = &self->buff_ring[next_], ++i) { - skb_add_rx_frag(skb, i, buff_->page, 0, - buff_->len, - SKB_TRUESIZE(buff->len - - ETH_HLEN)); - buff_->is_cleaned = 1; - - if (buff_->is_eop) - break; + if (!buff->is_eop) { + for (i = 1U, next_ = buff->next, + buff_ = &self->buff_ring[next_]; + true; next_ = buff_->next, + buff_ = &self->buff_ring[next_], ++i) { + skb_add_rx_frag(skb, i, + buff_->page, 0, + buff_->len, + SKB_TRUESIZE(buff->len - + ETH_HLEN)); + buff_->is_cleaned = 1; + + if (buff_->is_eop) + break; + } } } -- GitLab From 83eb2fdd0483ea9195fa4a46532e57b2df1b9974 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Sun, 16 Sep 2018 14:45:27 +0300 Subject: [PATCH 0886/2269] net/mlx5: E-Switch, Fix out of bound access when setting vport rate [ Upstream commit 11aa5800ed66ed0415b7509f02881c76417d212a ] The code that deals with eswitch vport bw guarantee was going beyond the eswitch vport array limit, fix that. This was pointed out by the kernel address sanitizer (KASAN). The error from KASAN log: [2018-09-15 15:04:45] BUG: KASAN: slab-out-of-bounds in mlx5_eswitch_set_vport_rate+0x8c1/0xae0 [mlx5_core] Fixes: c9497c98901c ("net/mlx5: Add support for setting VF min rate") Signed-off-by: Eran Ben Elisha Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index de72b66df3e58..1af9894abd95f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1922,7 +1922,7 @@ static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw) u32 max_guarantee = 0; int i; - for (i = 0; i <= esw->total_vports; i++) { + for (i = 0; i < esw->total_vports; i++) { evport = &esw->vports[i]; if (!evport->enabled || evport->info.min_rate < max_guarantee) continue; @@ -1942,7 +1942,7 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) int err; int i; - for (i = 0; i <= esw->total_vports; i++) { + for (i = 0; i < esw->total_vports; i++) { evport = &esw->vports[i]; if (!evport->enabled) continue; -- GitLab From 90a3d8afe1f41ab7034d91c6550b63485ae5ae1b Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Mon, 24 Sep 2018 14:39:42 -0700 Subject: [PATCH 0887/2269] bonding: pass link-local packets to bonding master also. [ Upstream commit 6a9e461f6fe4434e6172304b69774daff9a3ac4c ] Commit b89f04c61efe ("bonding: deliver link-local packets with skb->dev set to link that packets arrived on") changed the behavior of how link-local-multicast packets are processed. The change in the behavior broke some legacy use cases where these packets are expected to arrive on bonding master device also. This patch passes the packet to the stack with the link it arrived on as well as passes to the bonding-master device to preserve the legacy use case. Fixes: b89f04c61efe ("bonding: deliver link-local packets with skb->dev set to link that packets arrived on") Reported-by: Michal Soltys Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_main.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 23c0b515d2627..b880126e616bf 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1177,9 +1177,26 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb) } } - /* don't change skb->dev for link-local packets */ - if (is_link_local_ether_addr(eth_hdr(skb)->h_dest)) + /* Link-local multicast packets should be passed to the + * stack on the link they arrive as well as pass them to the + * bond-master device. These packets are mostly usable when + * stack receives it with the link on which they arrive + * (e.g. LLDP) they also must be available on master. Some of + * the use cases include (but are not limited to): LLDP agents + * that must be able to operate both on enslaved interfaces as + * well as on bonds themselves; linux bridges that must be able + * to process/pass BPDUs from attached bonds when any kind of + * STP version is enabled on the network. + */ + if (is_link_local_ether_addr(eth_hdr(skb)->h_dest)) { + struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); + + if (nskb) { + nskb->dev = bond->dev; + netif_rx(nskb); + } return RX_HANDLER_PASS; + } if (bond_should_deliver_exact_match(skb, slave, bond)) return RX_HANDLER_EXACT; -- GitLab From f578e5b34c388a9281cfc613ed59aa9c414cc83f Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Tue, 2 Oct 2018 12:14:34 -0700 Subject: [PATCH 0888/2269] bonding: fix warning message [ Upstream commit 0f3b914c9cfcd7bbedd445dc4ac5dd999fa213c2 ] RX queue config for bonding master could be different from its slave device(s). With the commit 6a9e461f6fe4 ("bonding: pass link-local packets to bonding master also."), the packet is reinjected into stack with skb->dev as bonding master. This potentially triggers the message: "bondX received packet on queue Y, but number of RX queues is Z" whenever the queue that packet is received on is higher than the numrxqueues on bonding master (Y > Z). Fixes: 6a9e461f6fe4 ("bonding: pass link-local packets to bonding master also.") Reported-by: John Sperbeck Signed-off-by: Eric Dumazet Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b880126e616bf..cf64a365362b9 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1193,6 +1193,7 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb) if (nskb) { nskb->dev = bond->dev; + nskb->queue_mapping = 0; netif_rx(nskb); } return RX_HANDLER_PASS; -- GitLab From eb79c31aac15aad1ff700f1ac6c8223946b8553e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 2 Oct 2018 10:10:14 -0700 Subject: [PATCH 0889/2269] nfp: avoid soft lockups under control message storm [ Upstream commit ff58e2df62ce29d0552278c290ae494b30fe0c6f ] When FW floods the driver with control messages try to exit the cmsg processing loop every now and then to avoid soft lockups. Cmsg processing is generally very lightweight so 512 seems like a reasonable budget, which should not be exceeded under normal conditions. Fixes: 77ece8d5f196 ("nfp: add control vNIC datapath") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Tested-by: Pieter Jansen van Vuuren Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/netronome/nfp/nfp_net_common.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 56751990bceef..6df2c8b2ce6f3 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2058,14 +2058,17 @@ nfp_ctrl_rx_one(struct nfp_net *nn, struct nfp_net_dp *dp, return true; } -static void nfp_ctrl_rx(struct nfp_net_r_vector *r_vec) +static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec) { struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring; struct nfp_net *nn = r_vec->nfp_net; struct nfp_net_dp *dp = &nn->dp; + unsigned int budget = 512; - while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring)) + while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--) continue; + + return budget; } static void nfp_ctrl_poll(unsigned long arg) @@ -2077,9 +2080,13 @@ static void nfp_ctrl_poll(unsigned long arg) __nfp_ctrl_tx_queued(r_vec); spin_unlock_bh(&r_vec->lock); - nfp_ctrl_rx(r_vec); - - nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); + if (nfp_ctrl_rx(r_vec)) { + nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); + } else { + tasklet_schedule(&r_vec->tasklet); + nn_dp_warn(&r_vec->nfp_net->dp, + "control message budget exceeded!\n"); + } } /* Setup and Configuration -- GitLab From 98c77f2eef29ffe9288edd84f8b5bc3fe2a3a614 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 19 Sep 2018 19:01:37 +0200 Subject: [PATCH 0890/2269] bnxt_en: don't try to offload VLAN 'modify' action [ Upstream commit 8c6ec3613e7b0aade20a3196169c0bab32ed3e3f ] bnxt offload code currently supports only 'push' and 'pop' operation: let .ndo_setup_tc() return -EOPNOTSUPP if VLAN 'modify' action is configured. Fixes: 2ae7408fedfe ("bnxt_en: bnxt: add TC flower filter offload support") Signed-off-by: Davide Caratti Acked-by: Sathya Perla Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 6a185344b378d..149d30f604590 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -78,17 +78,23 @@ static int bnxt_tc_parse_redir(struct bnxt *bp, return 0; } -static void bnxt_tc_parse_vlan(struct bnxt *bp, - struct bnxt_tc_actions *actions, - const struct tc_action *tc_act) +static int bnxt_tc_parse_vlan(struct bnxt *bp, + struct bnxt_tc_actions *actions, + const struct tc_action *tc_act) { - if (tcf_vlan_action(tc_act) == TCA_VLAN_ACT_POP) { + switch (tcf_vlan_action(tc_act)) { + case TCA_VLAN_ACT_POP: actions->flags |= BNXT_TC_ACTION_FLAG_POP_VLAN; - } else if (tcf_vlan_action(tc_act) == TCA_VLAN_ACT_PUSH) { + break; + case TCA_VLAN_ACT_PUSH: actions->flags |= BNXT_TC_ACTION_FLAG_PUSH_VLAN; actions->push_vlan_tci = htons(tcf_vlan_push_vid(tc_act)); actions->push_vlan_tpid = tcf_vlan_push_proto(tc_act); + break; + default: + return -EOPNOTSUPP; } + return 0; } static int bnxt_tc_parse_actions(struct bnxt *bp, @@ -122,7 +128,9 @@ static int bnxt_tc_parse_actions(struct bnxt *bp, /* Push/pop VLAN */ if (is_tcf_vlan(tc_act)) { - bnxt_tc_parse_vlan(bp, actions, tc_act); + rc = bnxt_tc_parse_vlan(bp, actions, tc_act); + if (rc) + return rc; continue; } } -- GitLab From 4b7b26024f52a5cdeca20b56657f90d74a0a1995 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Sat, 22 Sep 2018 01:34:01 -0700 Subject: [PATCH 0891/2269] net-ethtool: ETHTOOL_GUFO did not and should not require CAP_NET_ADMIN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 474ff2600889e16280dbc6ada8bfecb216169a70 ] So it should not fail with EPERM even though it is no longer implemented... This is a fix for: (userns)$ egrep ^Cap /proc/self/status CapInh: 0000003fffffffff CapPrm: 0000003fffffffff CapEff: 0000003fffffffff CapBnd: 0000003fffffffff CapAmb: 0000003fffffffff (userns)$ tcpdump -i usb_rndis0 tcpdump: WARNING: usb_rndis0: SIOCETHTOOL(ETHTOOL_GUFO) ioctl failed: Operation not permitted Warning: Kernel filter failed: Bad file descriptor tcpdump: can't remove kernel filter: Bad file descriptor With this change it returns EOPNOTSUPP instead of EPERM. See also https://github.com/the-tcpdump-group/libpcap/issues/689 Fixes: 08a00fea6de2 "net: Remove references to NETIF_F_UFO from ethtool." Cc: David S. Miller Signed-off-by: Maciej Żenczykowski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/ethtool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 490eab16b04b9..0ae5ac5e090fd 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -2572,6 +2572,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GPHYSTATS: case ETHTOOL_GTSO: case ETHTOOL_GPERMADDR: + case ETHTOOL_GUFO: case ETHTOOL_GGSO: case ETHTOOL_GGRO: case ETHTOOL_GFLAGS: -- GitLab From 17af5475aef350b78875646b2795381f452de756 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 1 Oct 2018 15:02:26 -0700 Subject: [PATCH 0892/2269] tcp/dccp: fix lockdep issue when SYN is backlogged [ Upstream commit 1ad98e9d1bdf4724c0a8532fabd84bf3c457c2bc ] In normal SYN processing, packets are handled without listener lock and in RCU protected ingress path. But syzkaller is known to be able to trick us and SYN packets might be processed in process context, after being queued into socket backlog. In commit 06f877d613be ("tcp/dccp: fix other lockdep splats accessing ireq_opt") I made a very stupid fix, that happened to work mostly because of the regular path being RCU protected. Really the thing protecting ireq->ireq_opt is RCU read lock, and the pseudo request refcnt is not relevant. This patch extends what I did in commit 449809a66c1d ("tcp/dccp: block BH for SYN processing") by adding an extra rcu_read_{lock|unlock} pair in the paths that might be taken when processing SYN from socket backlog (thus possibly in process context) Fixes: 06f877d613be ("tcp/dccp: fix other lockdep splats accessing ireq_opt") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/inet_sock.h | 3 +-- net/dccp/input.c | 4 +++- net/ipv4/tcp_input.c | 4 +++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 8e51b4a69088c..1035525a4c111 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -131,8 +131,7 @@ static inline int inet_request_bound_dev_if(const struct sock *sk, static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq) { - return rcu_dereference_check(ireq->ireq_opt, - refcount_read(&ireq->req.rsk_refcnt) > 0); + return rcu_dereference(ireq->ireq_opt); } struct inet_cork { diff --git a/net/dccp/input.c b/net/dccp/input.c index fa6be9750bb46..849f399aec21a 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -605,11 +605,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (sk->sk_state == DCCP_LISTEN) { if (dh->dccph_type == DCCP_PKT_REQUEST) { /* It is possible that we process SYN packets from backlog, - * so we need to make sure to disable BH right there. + * so we need to make sure to disable BH and RCU right there. */ + rcu_read_lock(); local_bh_disable(); acceptable = inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) >= 0; local_bh_enable(); + rcu_read_unlock(); if (!acceptable) return 1; consume_skb(skb); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 991f382afc1b0..e24c0d7adf655 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5913,11 +5913,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (th->fin) goto discard; /* It is possible that we process SYN packets from backlog, - * so we need to make sure to disable BH right there. + * so we need to make sure to disable BH and RCU right there. */ + rcu_read_lock(); local_bh_disable(); acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0; local_bh_enable(); + rcu_read_unlock(); if (!acceptable) return 1; -- GitLab From c5df58138946fe24d3cb0c99bb6ce04130c657b7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Oct 2018 12:35:05 -0700 Subject: [PATCH 0893/2269] inet: make sure to grab rcu_read_lock before using ireq->ireq_opt [ Upstream commit 2ab2ddd301a22ca3c5f0b743593e4ad2953dfa53 ] Timer handlers do not imply rcu_read_lock(), so my recent fix triggered a LOCKDEP warning when SYNACK is retransmit. Lets add rcu_read_lock()/rcu_read_unlock() pairs around ireq->ireq_opt usages instead of guessing what is done by callers, since it is not worth the pain. Get rid of ireq_opt_deref() helper since it hides the logic without real benefit, since it is now a standard rcu_dereference(). Fixes: 1ad98e9d1bdf ("tcp/dccp: fix lockdep issue when SYN is backlogged") Signed-off-by: Eric Dumazet Reported-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/inet_sock.h | 5 ----- net/dccp/ipv4.c | 4 +++- net/ipv4/inet_connection_sock.c | 5 ++++- net/ipv4/tcp_ipv4.c | 4 +++- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 1035525a4c111..16a1492a5bd33 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -129,11 +129,6 @@ static inline int inet_request_bound_dev_if(const struct sock *sk, return sk->sk_bound_dev_if; } -static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq) -{ - return rcu_dereference(ireq->ireq_opt); -} - struct inet_cork { unsigned int flags; __be32 addr; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b08feb219b44b..8e08cea6f1786 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -493,9 +493,11 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); + rcu_read_lock(); err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, - ireq_opt_deref(ireq)); + rcu_dereference(ireq->ireq_opt)); + rcu_read_unlock(); err = net_xmit_eval(err); } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 0cc08c5122023..9d6b172caf6cf 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -542,7 +542,8 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, struct ip_options_rcu *opt; struct rtable *rt; - opt = ireq_opt_deref(ireq); + rcu_read_lock(); + opt = rcu_dereference(ireq->ireq_opt); flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, @@ -556,11 +557,13 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, goto no_route; if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) goto route_err; + rcu_read_unlock(); return &rt->dst; route_err: ip_rt_put(rt); no_route: + rcu_read_unlock(); __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0e1a670dabd98..31b34c0c2d5f5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -875,9 +875,11 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, if (skb) { __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); + rcu_read_lock(); err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, - ireq_opt_deref(ireq)); + rcu_dereference(ireq->ireq_opt)); + rcu_read_unlock(); err = net_xmit_eval(err); } -- GitLab From a15fac93a3e6019dac62b5c38700decd47849bf9 Mon Sep 17 00:00:00 2001 From: Oder Chiou Date: Wed, 15 Aug 2018 14:47:49 +0800 Subject: [PATCH 0894/2269] ASoC: rt5514: Fix the issue of the delay volume applied again [ Upstream commit 6f0a256253f48095ba2e5bcdfbed41f21643c105 ] After our evaluation, we need to modify the default values to make sure the volume applied immediately. Signed-off-by: Oder Chiou Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/rt5514.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/rt5514.c b/sound/soc/codecs/rt5514.c index 1bfc8db1826a1..56ddab43da7e0 100644 --- a/sound/soc/codecs/rt5514.c +++ b/sound/soc/codecs/rt5514.c @@ -64,8 +64,8 @@ static const struct reg_sequence rt5514_patch[] = { {RT5514_ANA_CTRL_LDO10, 0x00028604}, {RT5514_ANA_CTRL_ADCFED, 0x00000800}, {RT5514_ASRC_IN_CTRL1, 0x00000003}, - {RT5514_DOWNFILTER0_CTRL3, 0x10000352}, - {RT5514_DOWNFILTER1_CTRL3, 0x10000352}, + {RT5514_DOWNFILTER0_CTRL3, 0x10000342}, + {RT5514_DOWNFILTER1_CTRL3, 0x10000342}, }; static const struct reg_default rt5514_reg[] = { @@ -92,10 +92,10 @@ static const struct reg_default rt5514_reg[] = { {RT5514_ASRC_IN_CTRL1, 0x00000003}, {RT5514_DOWNFILTER0_CTRL1, 0x00020c2f}, {RT5514_DOWNFILTER0_CTRL2, 0x00020c2f}, - {RT5514_DOWNFILTER0_CTRL3, 0x10000352}, + {RT5514_DOWNFILTER0_CTRL3, 0x10000342}, {RT5514_DOWNFILTER1_CTRL1, 0x00020c2f}, {RT5514_DOWNFILTER1_CTRL2, 0x00020c2f}, - {RT5514_DOWNFILTER1_CTRL3, 0x10000352}, + {RT5514_DOWNFILTER1_CTRL3, 0x10000342}, {RT5514_ANA_CTRL_LDO10, 0x00028604}, {RT5514_ANA_CTRL_LDO18_16, 0x02000345}, {RT5514_ANA_CTRL_ADC12, 0x0000a2a8}, -- GitLab From c08a99325a567d033b3a4549d8637191a135a827 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 22 Aug 2018 22:49:36 -0500 Subject: [PATCH 0895/2269] ASoC: wm8804: Add ACPI support [ Upstream commit 960cdd50ca9fdfeb82c2757107bcb7f93c8d7d41 ] HID made of either Wolfson/CirrusLogic PCI ID + 8804 identifier. This helps enumerate the HifiBerry Digi+ HAT boards on the Up2 platform. The scripts at https://github.com/thesofproject/acpi-scripts can be used to add the ACPI initrd overlays. Signed-off-by: Pierre-Louis Bossart Acked-by: Charles Keepax Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/wm8804-i2c.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm8804-i2c.c b/sound/soc/codecs/wm8804-i2c.c index f27464c2c5bad..79541960f45d9 100644 --- a/sound/soc/codecs/wm8804-i2c.c +++ b/sound/soc/codecs/wm8804-i2c.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "wm8804.h" @@ -40,17 +41,29 @@ static const struct i2c_device_id wm8804_i2c_id[] = { }; MODULE_DEVICE_TABLE(i2c, wm8804_i2c_id); +#if defined(CONFIG_OF) static const struct of_device_id wm8804_of_match[] = { { .compatible = "wlf,wm8804", }, { } }; MODULE_DEVICE_TABLE(of, wm8804_of_match); +#endif + +#ifdef CONFIG_ACPI +static const struct acpi_device_id wm8804_acpi_match[] = { + { "1AEC8804", 0 }, /* Wolfson PCI ID + part ID */ + { "10138804", 0 }, /* Cirrus Logic PCI ID + part ID */ + { }, +}; +MODULE_DEVICE_TABLE(acpi, wm8804_acpi_match); +#endif static struct i2c_driver wm8804_i2c_driver = { .driver = { .name = "wm8804", .pm = &wm8804_pm, - .of_match_table = wm8804_of_match, + .of_match_table = of_match_ptr(wm8804_of_match), + .acpi_match_table = ACPI_PTR(wm8804_acpi_match), }, .probe = wm8804_i2c_probe, .remove = wm8804_i2c_remove, -- GitLab From c5f7b0d2ce9e523ff334b9ffee2e1145ac3532b9 Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Thu, 23 Aug 2018 10:26:20 +0200 Subject: [PATCH 0896/2269] ASoC: sigmadsp: safeload should not have lower byte limit [ Upstream commit 5ea752c6efdf5aa8a57aed816d453a8f479f1b0a ] Fixed range in safeload conditional to allow safeload to up to 20 bytes, without a lower limit. Signed-off-by: Danny Smith Acked-by: Lars-Peter Clausen Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/sigmadsp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/codecs/sigmadsp.c b/sound/soc/codecs/sigmadsp.c index d53680ac78e42..6df158669420d 100644 --- a/sound/soc/codecs/sigmadsp.c +++ b/sound/soc/codecs/sigmadsp.c @@ -117,8 +117,7 @@ static int sigmadsp_ctrl_write(struct sigmadsp *sigmadsp, struct sigmadsp_control *ctrl, void *data) { /* safeload loads up to 20 bytes in a atomic operation */ - if (ctrl->num_bytes > 4 && ctrl->num_bytes <= 20 && sigmadsp->ops && - sigmadsp->ops->safeload) + if (ctrl->num_bytes <= 20 && sigmadsp->ops && sigmadsp->ops->safeload) return sigmadsp->ops->safeload(sigmadsp, ctrl->addr, data, ctrl->num_bytes); else -- GitLab From e121efd796c9f798a5f6ba4991c628bd23719071 Mon Sep 17 00:00:00 2001 From: Lei Yang Date: Wed, 5 Sep 2018 11:14:49 +0800 Subject: [PATCH 0897/2269] selftests/efivarfs: add required kernel configs [ Upstream commit 53cf59d6c0ad3edc4f4449098706a8f8986258b6 ] add config file Signed-off-by: Lei Yang Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/efivarfs/config | 1 + 1 file changed, 1 insertion(+) create mode 100644 tools/testing/selftests/efivarfs/config diff --git a/tools/testing/selftests/efivarfs/config b/tools/testing/selftests/efivarfs/config new file mode 100644 index 0000000000000..4e151f1005b2b --- /dev/null +++ b/tools/testing/selftests/efivarfs/config @@ -0,0 +1 @@ +CONFIG_EFIVAR_FS=y -- GitLab From 6d52f3e1e729116b686d61481f85ff9c204c77a3 Mon Sep 17 00:00:00 2001 From: Lei Yang Date: Wed, 5 Sep 2018 17:57:15 +0800 Subject: [PATCH 0898/2269] selftests: memory-hotplug: add required configs [ Upstream commit 4d85af102a66ee6aeefa596f273169e77fb2b48e ] add CONFIG_MEMORY_HOTREMOVE=y in config without this config, /sys/devices/system/memory/memory*/removable always return 0, I endup getting an early skip during test Signed-off-by: Lei Yang Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/memory-hotplug/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/memory-hotplug/config b/tools/testing/selftests/memory-hotplug/config index 2fde30191a47e..a7e8cd5bb265d 100644 --- a/tools/testing/selftests/memory-hotplug/config +++ b/tools/testing/selftests/memory-hotplug/config @@ -2,3 +2,4 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTPLUG_SPARSE=y CONFIG_NOTIFIER_ERROR_INJECTION=y CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m +CONFIG_MEMORY_HOTREMOVE=y -- GitLab From ad9ad950a37bd6038ff61244d1a3c7c471399335 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 6 Sep 2018 03:21:33 +0000 Subject: [PATCH 0899/2269] ASoC: rsnd: adg: care clock-frequency size [ Upstream commit 69235ccf491d2e26aefd465c0d3ccd1e3b2a9a9c ] ADG has buffer over flow bug if DT has more than 3 clock-frequency. This patch fixup this issue, and uses first 2 values. clock-frequency = ; /* this is OK */ clock-frequency = ; /* this is NG */ Signed-off-by: Kuninori Morimoto Tested-by: Hiroyuki Yokoyama Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/sh/rcar/adg.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/soc/sh/rcar/adg.c b/sound/soc/sh/rcar/adg.c index e28edb1f72635..eb7879bcc6a79 100644 --- a/sound/soc/sh/rcar/adg.c +++ b/sound/soc/sh/rcar/adg.c @@ -467,6 +467,11 @@ static void rsnd_adg_get_clkout(struct rsnd_priv *priv, goto rsnd_adg_get_clkout_end; req_size = prop->length / sizeof(u32); + if (req_size > REQ_SIZE) { + dev_err(dev, + "too many clock-frequency, use top %d\n", REQ_SIZE); + req_size = REQ_SIZE; + } of_property_read_u32_array(np, "clock-frequency", req_rate, req_size); req_48kHz_rate = 0; -- GitLab From bac5611371559cd8b83fd6bdcb869b843a342500 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 6 Sep 2018 03:21:47 +0000 Subject: [PATCH 0900/2269] ASoC: rsnd: don't fallback to PIO mode when -EPROBE_DEFER [ Upstream commit 6c92d5a2744e27619a8fcc9d74b91ee9f1cdebd1 ] Current rsnd driver will fallback to PIO mode if it can't get DMA handler. But, DMA might return -EPROBE_DEFER when probe timing. This driver always fallback to PIO mode especially from commit ac6bbf0cdf4206c ("iommu: Remove IOMMU_OF_DECLARE") because of this reason. The DMA driver will be probed later, but sound driver might be probed as PIO mode in such case. This patch fixup this issue. Then, -EPROBE_DEFER is not error. Thus, let's don't indicate error message in such case. And it needs to call rsnd_adg_remove() individually if probe failed, because it registers clk which should be unregister. Maybe PIO fallback feature itself is not needed, but let's keep it so far. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/soc/sh/rcar/core.c | 10 +++++++++- sound/soc/sh/rcar/dma.c | 4 ++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index 9896e736fa5cc..710c01cd2ad2e 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -486,7 +486,7 @@ static int rsnd_status_update(u32 *status, (func_call && (mod)->ops->fn) ? #fn : ""); \ if (func_call && (mod)->ops->fn) \ tmp = (mod)->ops->fn(mod, io, param); \ - if (tmp) \ + if (tmp && (tmp != -EPROBE_DEFER)) \ dev_err(dev, "%s[%d] : %s error %d\n", \ rsnd_mod_name(mod), rsnd_mod_id(mod), \ #fn, tmp); \ @@ -1469,6 +1469,14 @@ exit_snd_probe: rsnd_dai_call(remove, &rdai->capture, priv); } + /* + * adg is very special mod which can't use rsnd_dai_call(remove), + * and it registers ADG clock on probe. + * It should be unregister if probe failed. + * Mainly it is assuming -EPROBE_DEFER case + */ + rsnd_adg_remove(priv); + return ret; } diff --git a/sound/soc/sh/rcar/dma.c b/sound/soc/sh/rcar/dma.c index 041ec1080d52f..39a46e302babc 100644 --- a/sound/soc/sh/rcar/dma.c +++ b/sound/soc/sh/rcar/dma.c @@ -330,6 +330,10 @@ static int rsnd_dmaen_attach(struct rsnd_dai_stream *io, /* try to get DMAEngine channel */ chan = rsnd_dmaen_request_channel(io, mod_from, mod_to); if (IS_ERR_OR_NULL(chan)) { + /* Let's follow when -EPROBE_DEFER case */ + if (PTR_ERR(chan) == -EPROBE_DEFER) + return PTR_ERR(chan); + /* * DMA failed. try to PIO mode * see -- GitLab From e3583d7b1bd9869c371aa1164a085e0966a86391 Mon Sep 17 00:00:00 2001 From: Hermes Zhang Date: Tue, 28 Aug 2018 09:48:30 +0800 Subject: [PATCH 0901/2269] Bluetooth: hci_ldisc: Free rw_semaphore on close [ Upstream commit e6a57d22f787e73635ce0d29eef0abb77928b3e9 ] The percpu_rw_semaphore is not currently freed, and this leads to a crash when the stale rcu callback is invoked. DEBUG_OBJECTS detects this. ODEBUG: free active (active state 1) object type: rcu_head hint: (null) ------------[ cut here ]------------ WARNING: CPU: 1 PID: 2024 at debug_print_object+0xac/0xc8 PC is at debug_print_object+0xac/0xc8 LR is at debug_print_object+0xac/0xc8 Call trace: [] debug_print_object+0xac/0xc8 [] debug_check_no_obj_freed+0x1e8/0x228 [] kfree+0x1cc/0x250 [] hci_uart_tty_close+0x54/0x108 [] tty_ldisc_close.isra.1+0x40/0x58 [] tty_ldisc_kill+0x1c/0x40 [] tty_ldisc_release+0x94/0x170 [] tty_release_struct+0x1c/0x58 [] tty_release+0x3b0/0x490 [] __fput+0x88/0x1d0 [] ____fput+0xc/0x18 [] task_work_run+0x9c/0xc0 [] do_exit+0x24c/0x8a0 [] do_group_exit+0x38/0xa0 [] __wake_up_parent+0x0/0x28 [] el0_svc_naked+0x34/0x38 ---[ end trace bfe08cbd89098cdf ]--- Signed-off-by: Hermes Zhang Signed-off-by: Marcel Holtmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/hci_ldisc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index c823914b3a80a..30bbe19b4b855 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -539,6 +539,8 @@ static void hci_uart_tty_close(struct tty_struct *tty) } clear_bit(HCI_UART_PROTO_SET, &hu->flags); + percpu_free_rwsem(&hu->proto_lock); + kfree(hu); } -- GitLab From 5d53f0d897c37b41a5b8cabc2ebae3bf4ce12eae Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 25 Apr 2018 07:29:22 -0700 Subject: [PATCH 0902/2269] mfd: omap-usb-host: Fix dts probe of children [ Upstream commit 10492ee8ed9188d6d420e1f79b2b9bdbc0624e65 ] It currently only works if the parent bus uses "simple-bus". We currently try to probe children with non-existing compatible values. And we're missing .probe. I noticed this while testing devices configured to probe using ti-sysc interconnect target module driver. For that we also may want to rebind the driver, so let's remove __init and __exit. Signed-off-by: Tony Lindgren Acked-by: Roger Quadros Signed-off-by: Lee Jones Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/omap-usb-host.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c index 7aab376ecb848..3785c638d5308 100644 --- a/drivers/mfd/omap-usb-host.c +++ b/drivers/mfd/omap-usb-host.c @@ -548,8 +548,8 @@ static int usbhs_omap_get_dt_pdata(struct device *dev, } static const struct of_device_id usbhs_child_match_table[] = { - { .compatible = "ti,omap-ehci", }, - { .compatible = "ti,omap-ohci", }, + { .compatible = "ti,ehci-omap", }, + { .compatible = "ti,ohci-omap3", }, { } }; @@ -875,6 +875,7 @@ static struct platform_driver usbhs_omap_driver = { .pm = &usbhsomap_dev_pm_ops, .of_match_table = usbhs_omap_dt_ids, }, + .probe = usbhs_omap_probe, .remove = usbhs_omap_remove, }; @@ -884,9 +885,9 @@ MODULE_ALIAS("platform:" USBHS_DRIVER_NAME); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("usb host common core driver for omap EHCI and OHCI"); -static int __init omap_usbhs_drvinit(void) +static int omap_usbhs_drvinit(void) { - return platform_driver_probe(&usbhs_omap_driver, usbhs_omap_probe); + return platform_driver_register(&usbhs_omap_driver); } /* @@ -898,7 +899,7 @@ static int __init omap_usbhs_drvinit(void) */ fs_initcall_sync(omap_usbhs_drvinit); -static void __exit omap_usbhs_drvexit(void) +static void omap_usbhs_drvexit(void) { platform_driver_unregister(&usbhs_omap_driver); } -- GitLab From 254cc00e53d7621d1d3b138db989bf47879e9ba9 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 4 Sep 2018 11:47:40 -0700 Subject: [PATCH 0903/2269] scsi: iscsi: target: Don't use stack buffer for scatterlist [ Upstream commit 679fcae46c8b2352bba3485d521da070cfbe68e6 ] Fedora got a bug report of a crash with iSCSI: kernel BUG at include/linux/scatterlist.h:143! ... RIP: 0010:iscsit_do_crypto_hash_buf+0x154/0x180 [iscsi_target_mod] ... Call Trace: ? iscsi_target_tx_thread+0x200/0x200 [iscsi_target_mod] iscsit_get_rx_pdu+0x4cd/0xa90 [iscsi_target_mod] ? native_sched_clock+0x3e/0xa0 ? iscsi_target_tx_thread+0x200/0x200 [iscsi_target_mod] iscsi_target_rx_thread+0x81/0xf0 [iscsi_target_mod] kthread+0x120/0x140 ? kthread_create_worker_on_cpu+0x70/0x70 ret_from_fork+0x3a/0x50 This is a BUG_ON for using a stack buffer with a scatterlist. There are two cases that trigger this bug. Switch to using a dynamically allocated buffer for one case and do not assign a NULL buffer in another case. Signed-off-by: Laura Abbott Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 52fa52c20be05..d2cafdae8317d 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1421,7 +1421,8 @@ static void iscsit_do_crypto_hash_buf( sg_init_table(sg, ARRAY_SIZE(sg)); sg_set_buf(sg, buf, payload_length); - sg_set_buf(sg + 1, pad_bytes, padding); + if (padding) + sg_set_buf(sg + 1, pad_bytes, padding); ahash_request_set_crypt(hash, sg, data_crc, payload_length + padding); @@ -3942,10 +3943,14 @@ static bool iscsi_target_check_conn_state(struct iscsi_conn *conn) static void iscsit_get_rx_pdu(struct iscsi_conn *conn) { int ret; - u8 buffer[ISCSI_HDR_LEN], opcode; + u8 *buffer, opcode; u32 checksum = 0, digest = 0; struct kvec iov; + buffer = kcalloc(ISCSI_HDR_LEN, sizeof(*buffer), GFP_KERNEL); + if (!buffer) + return; + while (!kthread_should_stop()) { /* * Ensure that both TX and RX per connection kthreads @@ -3953,7 +3958,6 @@ static void iscsit_get_rx_pdu(struct iscsi_conn *conn) */ iscsit_thread_check_cpumask(conn, current, 0); - memset(buffer, 0, ISCSI_HDR_LEN); memset(&iov, 0, sizeof(struct kvec)); iov.iov_base = buffer; @@ -3962,7 +3966,7 @@ static void iscsit_get_rx_pdu(struct iscsi_conn *conn) ret = rx_data(conn, &iov, 1, ISCSI_HDR_LEN); if (ret != ISCSI_HDR_LEN) { iscsit_rx_thread_wait_for_tcp(conn); - return; + break; } if (conn->conn_ops->HeaderDigest) { @@ -3972,7 +3976,7 @@ static void iscsit_get_rx_pdu(struct iscsi_conn *conn) ret = rx_data(conn, &iov, 1, ISCSI_CRC_LEN); if (ret != ISCSI_CRC_LEN) { iscsit_rx_thread_wait_for_tcp(conn); - return; + break; } iscsit_do_crypto_hash_buf(conn->conn_rx_hash, @@ -3996,7 +4000,7 @@ static void iscsit_get_rx_pdu(struct iscsi_conn *conn) } if (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT) - return; + break; opcode = buffer[0] & ISCSI_OPCODE_MASK; @@ -4007,13 +4011,15 @@ static void iscsit_get_rx_pdu(struct iscsi_conn *conn) " while in Discovery Session, rejecting.\n", opcode); iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR, buffer); - return; + break; } ret = iscsi_target_rx_opcode(conn, buffer); if (ret < 0) - return; + break; } + + kfree(buffer); } int iscsi_target_rx_thread(void *arg) -- GitLab From a5733703e38c9a1b79ae6f086d3b427ae8b49e93 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 8 Sep 2018 11:42:27 +0300 Subject: [PATCH 0904/2269] scsi: qla2xxx: Fix an endian bug in fcpcmd_is_corrupted() [ Upstream commit cbe3fd39d223f14b1c60c80fe9347a3dd08c2edb ] We should first do the le16_to_cpu endian conversion and then apply the FCP_CMD_LENGTH_MASK mask. Fixes: 5f35509db179 ("qla2xxx: Terminate exchange if corrupted") Signed-off-by: Dan Carpenter Acked-by: Quinn Tran Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_target.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index aba58d3848a6e..511a31b359c7c 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -374,8 +374,8 @@ struct atio_from_isp { static inline int fcpcmd_is_corrupted(struct atio *atio) { if (atio->entry_type == ATIO_TYPE7 && - (le16_to_cpu(atio->attr_n_length & FCP_CMD_LENGTH_MASK) < - FCP_CMD_LENGTH_MIN)) + ((le16_to_cpu(atio->attr_n_length) & FCP_CMD_LENGTH_MASK) < + FCP_CMD_LENGTH_MIN)) return 1; else return 0; -- GitLab From 2af2b70c107b8a2e75e3c3351f3777e8cf26229c Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Tue, 11 Sep 2018 15:14:04 -0600 Subject: [PATCH 0905/2269] sound: enable interrupt after dma buffer initialization [ Upstream commit b61749a89f826eb61fc59794d9e4697bd246eb61 ] In snd_hdac_bus_init_chip(), we enable interrupt before snd_hdac_bus_init_cmd_io() initializing dma buffers. If irq has been acquired and irq handler uses the dma buffer, kernel may crash when interrupt comes in. Fix the problem by postponing enabling irq after dma buffer initialization. And warn once on null dma buffer pointer during the initialization. Reviewed-by: Takashi Iwai Signed-off-by: Yu Zhao Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/hda/hdac_controller.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c index f6d2985b2520c..6da6f3b8a623d 100644 --- a/sound/hda/hdac_controller.c +++ b/sound/hda/hdac_controller.c @@ -40,6 +40,8 @@ static void azx_clear_corbrp(struct hdac_bus *bus) */ void snd_hdac_bus_init_cmd_io(struct hdac_bus *bus) { + WARN_ON_ONCE(!bus->rb.area); + spin_lock_irq(&bus->reg_lock); /* CORB set up */ bus->corb.addr = bus->rb.addr; @@ -478,13 +480,15 @@ bool snd_hdac_bus_init_chip(struct hdac_bus *bus, bool full_reset) /* reset controller */ azx_reset(bus, full_reset); - /* initialize interrupts */ + /* clear interrupts */ azx_int_clear(bus); - azx_int_enable(bus); /* initialize the codec command I/O */ snd_hdac_bus_init_cmd_io(bus); + /* enable interrupts after CORB/RIRB buffers are initialized above */ + azx_int_enable(bus); + /* program the position buffer */ if (bus->use_posbuf && bus->posbuf.addr) { snd_hdac_chip_writel(bus, DPLBASE, (u32)bus->posbuf.addr); -- GitLab From 37ca1cc8d4c021c8fe79e070ca7bc2fd26ed1530 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Tue, 11 Sep 2018 15:15:16 -0600 Subject: [PATCH 0906/2269] sound: don't call skl_init_chip() to reset intel skl soc [ Upstream commit 75383f8d39d4c0fb96083dd460b7b139fbdac492 ] Internally, skl_init_chip() calls snd_hdac_bus_init_chip() which 1) sets bus->chip_init to prevent multiple entrances before device is stopped; 2) enables interrupt. We shouldn't use it for the purpose of resetting device only because 1) when we really want to initialize device, we won't be able to do so; 2) we are ready to handle interrupt yet, and kernel crashes when interrupt comes in. Rename azx_reset() to snd_hdac_bus_reset_link(), and use it to reset device properly. Fixes: 60767abcea3d ("ASoC: Intel: Skylake: Reset the controller in probe") Reviewed-by: Takashi Iwai Signed-off-by: Yu Zhao Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/sound/hdaudio.h | 1 + sound/hda/hdac_controller.c | 7 ++++--- sound/soc/intel/skylake/skl.c | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h index d8afd8a5bd76f..926ea701cdc42 100644 --- a/include/sound/hdaudio.h +++ b/include/sound/hdaudio.h @@ -357,6 +357,7 @@ void snd_hdac_bus_init_cmd_io(struct hdac_bus *bus); void snd_hdac_bus_stop_cmd_io(struct hdac_bus *bus); void snd_hdac_bus_enter_link_reset(struct hdac_bus *bus); void snd_hdac_bus_exit_link_reset(struct hdac_bus *bus); +int snd_hdac_bus_reset_link(struct hdac_bus *bus, bool full_reset); void snd_hdac_bus_update_rirb(struct hdac_bus *bus); int snd_hdac_bus_handle_stream_irq(struct hdac_bus *bus, unsigned int status, diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c index 6da6f3b8a623d..778b42ba90b80 100644 --- a/sound/hda/hdac_controller.c +++ b/sound/hda/hdac_controller.c @@ -384,7 +384,7 @@ void snd_hdac_bus_exit_link_reset(struct hdac_bus *bus) EXPORT_SYMBOL_GPL(snd_hdac_bus_exit_link_reset); /* reset codec link */ -static int azx_reset(struct hdac_bus *bus, bool full_reset) +int snd_hdac_bus_reset_link(struct hdac_bus *bus, bool full_reset) { if (!full_reset) goto skip_reset; @@ -409,7 +409,7 @@ static int azx_reset(struct hdac_bus *bus, bool full_reset) skip_reset: /* check to see if controller is ready */ if (!snd_hdac_chip_readb(bus, GCTL)) { - dev_dbg(bus->dev, "azx_reset: controller not ready!\n"); + dev_dbg(bus->dev, "controller not ready!\n"); return -EBUSY; } @@ -424,6 +424,7 @@ static int azx_reset(struct hdac_bus *bus, bool full_reset) return 0; } +EXPORT_SYMBOL_GPL(snd_hdac_bus_reset_link); /* enable interrupts */ static void azx_int_enable(struct hdac_bus *bus) @@ -478,7 +479,7 @@ bool snd_hdac_bus_init_chip(struct hdac_bus *bus, bool full_reset) return false; /* reset controller */ - azx_reset(bus, full_reset); + snd_hdac_bus_reset_link(bus, full_reset); /* clear interrupts */ azx_int_clear(bus); diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c index f94b484abb992..a0bef63b8fb11 100644 --- a/sound/soc/intel/skylake/skl.c +++ b/sound/soc/intel/skylake/skl.c @@ -698,7 +698,7 @@ static int skl_first_init(struct hdac_ext_bus *ebus) return -ENXIO; } - skl_init_chip(bus, true); + snd_hdac_bus_reset_link(bus, true); snd_hdac_bus_parse_capabilities(bus); -- GitLab From 1826e55625164ab51a21e46eacac18b43899b65f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 13 Sep 2018 08:03:43 -0700 Subject: [PATCH 0907/2269] hv_netvsc: fix schedule in RCU context [ Upstream commit 018349d70f28a78d5343b3660cb66e1667005f8a ] When netvsc device is removed it can call reschedule in RCU context. This happens because canceling the subchannel setup work could (in theory) cause a reschedule when manipulating the timer. To reproduce, run with lockdep enabled kernel and unbind a network device from hv_netvsc (via sysfs). [ 160.682011] WARNING: suspicious RCU usage [ 160.707466] 4.19.0-rc3-uio+ #2 Not tainted [ 160.709937] ----------------------------- [ 160.712352] ./include/linux/rcupdate.h:302 Illegal context switch in RCU read-side critical section! [ 160.723691] [ 160.723691] other info that might help us debug this: [ 160.723691] [ 160.730955] [ 160.730955] rcu_scheduler_active = 2, debug_locks = 1 [ 160.762813] 5 locks held by rebind-eth.sh/1812: [ 160.766851] #0: 000000008befa37a (sb_writers#6){.+.+}, at: vfs_write+0x184/0x1b0 [ 160.773416] #1: 00000000b097f236 (&of->mutex){+.+.}, at: kernfs_fop_write+0xe2/0x1a0 [ 160.783766] #2: 0000000041ee6889 (kn->count#3){++++}, at: kernfs_fop_write+0xeb/0x1a0 [ 160.787465] #3: 0000000056d92a74 (&dev->mutex){....}, at: device_release_driver_internal+0x39/0x250 [ 160.816987] #4: 0000000030f6031e (rcu_read_lock){....}, at: netvsc_remove+0x1e/0x250 [hv_netvsc] [ 160.828629] [ 160.828629] stack backtrace: [ 160.831966] CPU: 1 PID: 1812 Comm: rebind-eth.sh Not tainted 4.19.0-rc3-uio+ #2 [ 160.832952] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v1.0 11/26/2012 [ 160.832952] Call Trace: [ 160.832952] dump_stack+0x85/0xcb [ 160.832952] ___might_sleep+0x1a3/0x240 [ 160.832952] __flush_work+0x57/0x2e0 [ 160.832952] ? __mutex_lock+0x83/0x990 [ 160.832952] ? __kernfs_remove+0x24f/0x2e0 [ 160.832952] ? __kernfs_remove+0x1b2/0x2e0 [ 160.832952] ? mark_held_locks+0x50/0x80 [ 160.832952] ? get_work_pool+0x90/0x90 [ 160.832952] __cancel_work_timer+0x13c/0x1e0 [ 160.832952] ? netvsc_remove+0x1e/0x250 [hv_netvsc] [ 160.832952] ? __lock_is_held+0x55/0x90 [ 160.832952] netvsc_remove+0x9a/0x250 [hv_netvsc] [ 160.832952] vmbus_remove+0x26/0x30 [ 160.832952] device_release_driver_internal+0x18a/0x250 [ 160.832952] unbind_store+0xb4/0x180 [ 160.832952] kernfs_fop_write+0x113/0x1a0 [ 160.832952] __vfs_write+0x36/0x1a0 [ 160.832952] ? rcu_read_lock_sched_held+0x6b/0x80 [ 160.832952] ? rcu_sync_lockdep_assert+0x2e/0x60 [ 160.832952] ? __sb_start_write+0x141/0x1a0 [ 160.832952] ? vfs_write+0x184/0x1b0 [ 160.832952] vfs_write+0xbe/0x1b0 [ 160.832952] ksys_write+0x55/0xc0 [ 160.832952] do_syscall_64+0x60/0x1b0 [ 160.832952] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 160.832952] RIP: 0033:0x7fe48f4c8154 Resolve this by getting RTNL earlier. This is safe because the subchannel work queue does trylock on RTNL and will detect the race. Fixes: 7b2ee50c0cd5 ("hv_netvsc: common detach logic") Signed-off-by: Stephen Hemminger Reviewed-by: Haiyang Zhang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/netvsc_drv.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index aba16d81e9bba..2d90cffae9ff0 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2110,17 +2110,15 @@ static int netvsc_remove(struct hv_device *dev) cancel_delayed_work_sync(&ndev_ctx->dwork); - rcu_read_lock(); - nvdev = rcu_dereference(ndev_ctx->nvdev); - - if (nvdev) + rtnl_lock(); + nvdev = rtnl_dereference(ndev_ctx->nvdev); + if (nvdev) cancel_work_sync(&nvdev->subchan_work); /* * Call to the vsc driver to let it know that the device is being * removed. Also blocks mtu and channel changes. */ - rtnl_lock(); vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev); if (vf_netdev) netvsc_unregister_vf(vf_netdev); @@ -2132,7 +2130,6 @@ static int netvsc_remove(struct hv_device *dev) list_del(&ndev_ctx->list); rtnl_unlock(); - rcu_read_unlock(); hv_set_drvdata(dev, NULL); -- GitLab From 3265bda5bd9f1891f29c741a5fae3db73a882fc7 Mon Sep 17 00:00:00 2001 From: Jongsung Kim Date: Thu, 13 Sep 2018 18:32:21 +0900 Subject: [PATCH 0908/2269] stmmac: fix valid numbers of unicast filter entries [ Upstream commit edf2ef7242805e53ec2e0841db26e06d8bc7da70 ] Synopsys DWC Ethernet MAC can be configured to have 1..32, 64, or 128 unicast filter entries. (Table 7-8 MAC Address Registers from databook) Fix dwmac1000_validate_ucast_entries() to accept values between 1 and 32 in addition. Signed-off-by: Jongsung Kim Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 195eb7e71473d..d48cc32dc5073 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -67,7 +67,7 @@ static int dwmac1000_validate_mcast_bins(int mcast_bins) * Description: * This function validates the number of Unicast address entries supported * by a particular Synopsys 10/100/1000 controller. The Synopsys controller - * supports 1, 32, 64, or 128 Unicast filter entries for it's Unicast filter + * supports 1..32, 64, or 128 Unicast filter entries for it's Unicast filter * logic. This function validates a valid, supported configuration is * selected, and defaults to 1 Unicast address if an unsupported * configuration is selected. @@ -77,8 +77,7 @@ static int dwmac1000_validate_ucast_entries(int ucast_entries) int x = ucast_entries; switch (x) { - case 1: - case 32: + case 1 ... 32: case 64: case 128: break; -- GitLab From c77295d6fa1c93a344dea79e9990351fc9cdffab Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Fri, 14 Sep 2018 17:48:10 +0200 Subject: [PATCH 0909/2269] net: macb: disable scatter-gather for macb on sama5d3 [ Upstream commit eb4ed8e2d7fecb5f40db38e4498b9ee23cddf196 ] Create a new configuration for the sama5d3-macb new compatibility string. This configuration disables scatter-gather because we experienced lock down of the macb interface of this particular SoC under very high load. Signed-off-by: Nicolas Ferre Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cadence/macb_main.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index c1787be6a2582..b4f92de1efbd1 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3301,6 +3301,13 @@ static const struct macb_config at91sam9260_config = { .init = macb_init, }; +static const struct macb_config sama5d3macb_config = { + .caps = MACB_CAPS_SG_DISABLED + | MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII, + .clk_init = macb_clk_init, + .init = macb_init, +}; + static const struct macb_config pc302gem_config = { .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE, .dma_burst_length = 16, @@ -3368,6 +3375,7 @@ static const struct of_device_id macb_dt_ids[] = { { .compatible = "cdns,gem", .data = &pc302gem_config }, { .compatible = "atmel,sama5d2-gem", .data = &sama5d2_config }, { .compatible = "atmel,sama5d3-gem", .data = &sama5d3_config }, + { .compatible = "atmel,sama5d3-macb", .data = &sama5d3macb_config }, { .compatible = "atmel,sama5d4-gem", .data = &sama5d4_config }, { .compatible = "cdns,at91rm9200-emac", .data = &emac_config }, { .compatible = "cdns,emac", .data = &emac_config }, -- GitLab From 18918ed70db940b22aad40068a8e20749e5fee74 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Fri, 14 Sep 2018 17:48:11 +0200 Subject: [PATCH 0910/2269] ARM: dts: at91: add new compatibility string for macb on sama5d3 [ Upstream commit 321cc359d899a8e988f3725d87c18a628e1cc624 ] We need this new compatibility string as we experienced different behavior for this 10/100Mbits/s macb interface on this particular SoC. Backward compatibility is preserved as we keep the alternative strings. Signed-off-by: Nicolas Ferre Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/net/macb.txt | 1 + arch/arm/boot/dts/sama5d3_emac.dtsi | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt index 27966ae741e09..141d8c1f714f1 100644 --- a/Documentation/devicetree/bindings/net/macb.txt +++ b/Documentation/devicetree/bindings/net/macb.txt @@ -10,6 +10,7 @@ Required properties: Use "cdns,pc302-gem" for Picochip picoXcell pc302 and later devices based on the Cadence GEM, or the generic form: "cdns,gem". Use "atmel,sama5d2-gem" for the GEM IP (10/100) available on Atmel sama5d2 SoCs. + Use "atmel,sama5d3-macb" for the 10/100Mbit IP available on Atmel sama5d3 SoCs. Use "atmel,sama5d3-gem" for the Gigabit IP available on Atmel sama5d3 SoCs. Use "atmel,sama5d4-gem" for the GEM IP (10/100) available on Atmel sama5d4 SoCs. Use "cdns,zynq-gem" Xilinx Zynq-7xxx SoC. diff --git a/arch/arm/boot/dts/sama5d3_emac.dtsi b/arch/arm/boot/dts/sama5d3_emac.dtsi index 7cb235ef0fb6d..6e9e1c2f9def9 100644 --- a/arch/arm/boot/dts/sama5d3_emac.dtsi +++ b/arch/arm/boot/dts/sama5d3_emac.dtsi @@ -41,7 +41,7 @@ }; macb1: ethernet@f802c000 { - compatible = "cdns,at91sam9260-macb", "cdns,macb"; + compatible = "atmel,sama5d3-macb", "cdns,at91sam9260-macb", "cdns,macb"; reg = <0xf802c000 0x100>; interrupts = <35 IRQ_TYPE_LEVEL_HIGH 3>; pinctrl-names = "default"; -- GitLab From cac34c122cf389f9537d20bada82ecf4939d9c4e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 14 Sep 2018 12:54:56 -0700 Subject: [PATCH 0911/2269] PCI: hv: support reporting serial number as slot information [ Upstream commit a15f2c08c70811f120d99288d81f70d7f3d104f1 ] The Hyper-V host API for PCI provides a unique "serial number" which can be used as basis for sysfs PCI slot table. This can be useful for cases where userspace wants to find the PCI device based on serial number. When an SR-IOV NIC is added, the host sends an attach message with serial number. The kernel doesn't use the serial number, but it is useful when doing the same thing in a userspace driver such as the DPDK. By having /sys/bus/pci/slots/N it provides a direct way to find the matching PCI device. There maybe some cases where serial number is not unique such as when using GPU's. But the PCI slot infrastructure will handle that. This has a side effect which may also be useful. The common udev network device naming policy uses the slot information (rather than PCI address). Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pci-hyperv.c | 37 +++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c index ffc87a956d974..53d1c08cef4dc 100644 --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -100,6 +100,9 @@ static enum pci_protocol_version_t pci_protocol_version; #define STATUS_REVISION_MISMATCH 0xC0000059 +/* space for 32bit serial number as string */ +#define SLOT_NAME_SIZE 11 + /* * Message Types */ @@ -516,6 +519,7 @@ struct hv_pci_dev { struct list_head list_entry; refcount_t refs; enum hv_pcichild_state state; + struct pci_slot *pci_slot; struct pci_function_description desc; bool reported_missing; struct hv_pcibus_device *hbus; @@ -1481,6 +1485,34 @@ static void prepopulate_bars(struct hv_pcibus_device *hbus) spin_unlock_irqrestore(&hbus->device_list_lock, flags); } +/* + * Assign entries in sysfs pci slot directory. + * + * Note that this function does not need to lock the children list + * because it is called from pci_devices_present_work which + * is serialized with hv_eject_device_work because they are on the + * same ordered workqueue. Therefore hbus->children list will not change + * even when pci_create_slot sleeps. + */ +static void hv_pci_assign_slots(struct hv_pcibus_device *hbus) +{ + struct hv_pci_dev *hpdev; + char name[SLOT_NAME_SIZE]; + int slot_nr; + + list_for_each_entry(hpdev, &hbus->children, list_entry) { + if (hpdev->pci_slot) + continue; + + slot_nr = PCI_SLOT(wslot_to_devfn(hpdev->desc.win_slot.slot)); + snprintf(name, SLOT_NAME_SIZE, "%u", hpdev->desc.ser); + hpdev->pci_slot = pci_create_slot(hbus->pci_bus, slot_nr, + name, NULL); + if (!hpdev->pci_slot) + pr_warn("pci_create slot %s failed\n", name); + } +} + /** * create_root_hv_pci_bus() - Expose a new root PCI bus * @hbus: Root PCI bus, as understood by this driver @@ -1504,6 +1536,7 @@ static int create_root_hv_pci_bus(struct hv_pcibus_device *hbus) pci_lock_rescan_remove(); pci_scan_child_bus(hbus->pci_bus); pci_bus_assign_resources(hbus->pci_bus); + hv_pci_assign_slots(hbus); pci_bus_add_devices(hbus->pci_bus); pci_unlock_rescan_remove(); hbus->state = hv_pcibus_installed; @@ -1787,6 +1820,7 @@ static void pci_devices_present_work(struct work_struct *work) */ pci_lock_rescan_remove(); pci_scan_child_bus(hbus->pci_bus); + hv_pci_assign_slots(hbus); pci_unlock_rescan_remove(); break; @@ -1895,6 +1929,9 @@ static void hv_eject_device_work(struct work_struct *work) list_del(&hpdev->list_entry); spin_unlock_irqrestore(&hpdev->hbus->device_list_lock, flags); + if (hpdev->pci_slot) + pci_destroy_slot(hpdev->pci_slot); + memset(&ctxt, 0, sizeof(ctxt)); ejct_pkt = (struct pci_eject_response *)&ctxt.pkt.message; ejct_pkt->message_type.type = PCI_EJECTION_COMPLETE; -- GitLab From ba54417f8d01b44907464777e5280eb2216d2d2f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 12 Sep 2018 11:34:54 +0200 Subject: [PATCH 0912/2269] clk: x86: add "ether_clk" alias for Bay Trail / Cherry Trail [ Upstream commit b1e3454d39f992e5409cd19f97782185950df6e7 ] Commit d31fd43c0f9a ("clk: x86: Do not gate clocks enabled by the firmware") causes all unclaimed PMC clocks on Cherry Trail devices to be on all the time, resulting on the device not being able to reach S0i2 or S0i3 when suspended. The reason for this commit is that on some Bay Trail / Cherry Trail devices the ethernet controller uses pmc_plt_clk_4. This commit adds an "ether_clk" alias, so that the relevant ethernet drivers can try to (optionally) use this, without needing X86 specific code / hacks, thus fixing ethernet on these devices without breaking S0i3 support. This commit uses clkdev_hw_create() to create the alias, mirroring the code for the already existing "mclk" alias for pmc_plt_clk_3. Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=193891#c102 Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=196861 Cc: Johannes Stezenbach Cc: Carlo Caione Reported-by: Johannes Stezenbach Acked-by: Stephen Boyd Reviewed-by: Andy Shevchenko Signed-off-by: Hans de Goede Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/clk/x86/clk-pmc-atom.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/clk/x86/clk-pmc-atom.c b/drivers/clk/x86/clk-pmc-atom.c index 08ef69945ffbf..75151901ff7df 100644 --- a/drivers/clk/x86/clk-pmc-atom.c +++ b/drivers/clk/x86/clk-pmc-atom.c @@ -55,6 +55,7 @@ struct clk_plt_data { u8 nparents; struct clk_plt *clks[PMC_CLK_NUM]; struct clk_lookup *mclk_lookup; + struct clk_lookup *ether_clk_lookup; }; /* Return an index in parent table */ @@ -351,11 +352,20 @@ static int plt_clk_probe(struct platform_device *pdev) goto err_unreg_clk_plt; } + data->ether_clk_lookup = clkdev_hw_create(&data->clks[4]->hw, + "ether_clk", NULL); + if (!data->ether_clk_lookup) { + err = -ENOMEM; + goto err_drop_mclk; + } + plt_clk_free_parent_names_loop(parent_names, data->nparents); platform_set_drvdata(pdev, data); return 0; +err_drop_mclk: + clkdev_drop(data->mclk_lookup); err_unreg_clk_plt: plt_clk_unregister_loop(data, i); plt_clk_unregister_parents(data); @@ -369,6 +379,7 @@ static int plt_clk_remove(struct platform_device *pdev) data = platform_get_drvdata(pdev); + clkdev_drop(data->ether_clk_lookup); clkdev_drop(data->mclk_lookup); plt_clk_unregister_loop(data, PMC_CLK_NUM); plt_clk_unregister_parents(data); -- GitLab From 88659387b9d509ee4bb8f0b0fe9ce2ff00988b46 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 12 Sep 2018 11:34:56 +0200 Subject: [PATCH 0913/2269] clk: x86: Stop marking clocks as CLK_IS_CRITICAL [ Upstream commit 648e921888ad96ea3dc922739e96716ad3225d7f ] Commit d31fd43c0f9a ("clk: x86: Do not gate clocks enabled by the firmware"), which added the code to mark clocks as CLK_IS_CRITICAL, causes all unclaimed PMC clocks on Cherry Trail devices to be on all the time, resulting on the device not being able to reach S0i3 when suspended. The reason for this commit is that on some Bay Trail / Cherry Trail devices the r8169 ethernet controller uses pmc_plt_clk_4. Now that the clk-pmc-atom driver exports an "ether_clk" alias for pmc_plt_clk_4 and the r8169 driver has been modified to get and enable this clock (if present) the marking of the clocks as CLK_IS_CRITICAL is no longer necessary. This commit removes the CLK_IS_CRITICAL marking, fixing Cherry Trail devices not being able to reach S0i3 greatly decreasing their battery drain when suspended. Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=193891#c102 Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=196861 Cc: Johannes Stezenbach Cc: Carlo Caione Reported-by: Johannes Stezenbach Reviewed-by: Andy Shevchenko Acked-by: Stephen Boyd Signed-off-by: Hans de Goede Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/clk/x86/clk-pmc-atom.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/clk/x86/clk-pmc-atom.c b/drivers/clk/x86/clk-pmc-atom.c index 75151901ff7df..d977193842dfe 100644 --- a/drivers/clk/x86/clk-pmc-atom.c +++ b/drivers/clk/x86/clk-pmc-atom.c @@ -187,13 +187,6 @@ static struct clk_plt *plt_clk_register(struct platform_device *pdev, int id, pclk->reg = base + PMC_CLK_CTL_OFFSET + id * PMC_CLK_CTL_SIZE; spin_lock_init(&pclk->lock); - /* - * If the clock was already enabled by the firmware mark it as critical - * to avoid it being gated by the clock framework if no driver owns it. - */ - if (plt_clk_is_enabled(&pclk->hw)) - init.flags |= CLK_IS_CRITICAL; - ret = devm_clk_hw_register(&pdev->dev, &pclk->hw); if (ret) { pclk = ERR_PTR(ret); -- GitLab From e4865b46e195e403915e6b630f187f80206b243d Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 2 Aug 2018 17:08:16 +0200 Subject: [PATCH 0914/2269] x86/kvm/lapic: always disable MMIO interface in x2APIC mode [ Upstream commit d1766202779e81d0f2a94c4650a6ba31497d369d ] When VMX is used with flexpriority disabled (because of no support or if disabled with module parameter) MMIO interface to lAPIC is still available in x2APIC mode while it shouldn't be (kvm-unit-tests): PASS: apic_disable: Local apic enabled in x2APIC mode PASS: apic_disable: CPUID.1H:EDX.APIC[bit 9] is set FAIL: apic_disable: *0xfee00030: 50014 The issue appears because we basically do nothing while switching to x2APIC mode when APIC access page is not used. apic_mmio_{read,write} only check if lAPIC is disabled before proceeding to actual write. When APIC access is virtualized we correctly manipulate with VMX controls in vmx_set_virtual_apic_mode() and we don't get vmexits from memory writes in x2APIC mode so there's no issue. Disabling MMIO interface seems to be easy. The question is: what do we do with these reads and writes? If we add apic_x2apic_mode() check to apic_mmio_in_range() and return -EOPNOTSUPP these reads and writes will go to userspace. When lAPIC is in kernel, Qemu uses this interface to inject MSIs only (see kvm_apic_mem_write() in hw/i386/kvm/apic.c). This somehow works with disabled lAPIC but when we're in xAPIC mode we will get a real injected MSI from every write to lAPIC. Not good. The simplest solution seems to be to just ignore writes to the region and return ~0 for all reads when we're in x2APIC mode. This is what this patch does. However, this approach is inconsistent with what currently happens when flexpriority is enabled: we allocate APIC access page and create KVM memory region so in x2APIC modes all reads and writes go to this pre-allocated page which is, btw, the same for all vCPUs. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/uapi/asm/kvm.h | 1 + arch/x86/kvm/lapic.c | 22 +++++++++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index f3a960488eae0..dcf4dc9bf3278 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -360,5 +360,6 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) #define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) +#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 6d0fbff71d7a5..13dfb55b84db5 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1282,9 +1282,8 @@ EXPORT_SYMBOL_GPL(kvm_lapic_reg_read); static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) { - return kvm_apic_hw_enabled(apic) && - addr >= apic->base_address && - addr < apic->base_address + LAPIC_MMIO_LENGTH; + return addr >= apic->base_address && + addr < apic->base_address + LAPIC_MMIO_LENGTH; } static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, @@ -1296,6 +1295,15 @@ static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, if (!apic_mmio_in_range(apic, address)) return -EOPNOTSUPP; + if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) { + if (!kvm_check_has_quirk(vcpu->kvm, + KVM_X86_QUIRK_LAPIC_MMIO_HOLE)) + return -EOPNOTSUPP; + + memset(data, 0xff, len); + return 0; + } + kvm_lapic_reg_read(apic, offset, len, data); return 0; @@ -1806,6 +1814,14 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, if (!apic_mmio_in_range(apic, address)) return -EOPNOTSUPP; + if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) { + if (!kvm_check_has_quirk(vcpu->kvm, + KVM_X86_QUIRK_LAPIC_MMIO_HOLE)) + return -EOPNOTSUPP; + + return 0; + } + /* * APIC register must be aligned on 128-bits boundary. * 32/64/128 bits registers must be accessed thru 32 bits. -- GitLab From 059726864271e14a6fbc5f47bb15098a7e1d1d89 Mon Sep 17 00:00:00 2001 From: Amber Lin Date: Wed, 12 Sep 2018 21:42:18 -0400 Subject: [PATCH 0915/2269] drm/amdgpu: Fix SDMA HQD destroy error on gfx_v7 [ Upstream commit caaa4c8a6be2a275bd14f2369ee364978ff74704 ] A wrong register bit was examinated for checking SDMA status so it reports false failures. This typo only appears on gfx_v7. gfx_v8 checks the correct bit. Acked-by: Alex Deucher Signed-off-by: Amber Lin Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index bdabaa3399db2..e2c0ff03f3860 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -576,7 +576,7 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, while (true) { temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); - if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT) + if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; if (timeout <= 0) return -ETIME; -- GitLab From 8676e0b4a28fa12200ec742e7d8250b11d52916f Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 5 Oct 2018 15:52:03 -0700 Subject: [PATCH 0916/2269] mm/vmstat.c: fix outdated vmstat_text commit 28e2c4bb99aa40f9d5f07ac130cbc4da0ea93079 upstream. 7a9cdebdcc17 ("mm: get rid of vmacache_flush_all() entirely") removed the VMACACHE_FULL_FLUSHES statistics, but didn't remove the corresponding entry in vmstat_text. This causes an out-of-bounds access in vmstat_show(). Luckily this only affects kernels with CONFIG_DEBUG_VM_VMACACHE=y, which is probably very rare. Link: http://lkml.kernel.org/r/20181001143138.95119-1-jannh@google.com Fixes: 7a9cdebdcc17 ("mm: get rid of vmacache_flush_all() entirely") Signed-off-by: Jann Horn Reviewed-by: Kees Cook Reviewed-by: Andrew Morton Acked-by: Michal Hocko Acked-by: Roman Gushchin Cc: Davidlohr Bueso Cc: Oleg Nesterov Cc: Christoph Lameter Cc: Kemi Wang Cc: Andy Lutomirski Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/vmstat.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index 2bdc962b2dfe9..50b4c11fb6121 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1214,7 +1214,6 @@ const char * const vmstat_text[] = { #ifdef CONFIG_DEBUG_VM_VMACACHE "vmacache_find_calls", "vmacache_find_hits", - "vmacache_full_flushes", #endif #ifdef CONFIG_SWAP "swap_ra", -- GitLab From 86717a97f9218de798c6d33e0014ab9325e18f56 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Tue, 25 Sep 2018 15:51:26 -0700 Subject: [PATCH 0917/2269] MIPS: VDSO: Always map near top of user memory commit ea7e0480a4b695d0aa6b3fa99bd658a003122113 upstream. When using the legacy mmap layout, for example triggered using ulimit -s unlimited, get_unmapped_area() fills memory from bottom to top starting from a fairly low address near TASK_UNMAPPED_BASE. This placement is suboptimal if the user application wishes to allocate large amounts of heap memory using the brk syscall. With the VDSO being located low in the user's virtual address space, the amount of space available for access using brk is limited much more than it was prior to the introduction of the VDSO. For example: # ulimit -s unlimited; cat /proc/self/maps 00400000-004ec000 r-xp 00000000 08:00 71436 /usr/bin/coreutils 004fc000-004fd000 rwxp 000ec000 08:00 71436 /usr/bin/coreutils 004fd000-0050f000 rwxp 00000000 00:00 0 00cc3000-00ce4000 rwxp 00000000 00:00 0 [heap] 2ab96000-2ab98000 r--p 00000000 00:00 0 [vvar] 2ab98000-2ab99000 r-xp 00000000 00:00 0 [vdso] 2ab99000-2ab9d000 rwxp 00000000 00:00 0 ... Resolve this by adjusting STACK_TOP to reserve space for the VDSO & providing an address hint to get_unmapped_area() causing it to use this space even when using the legacy mmap layout. We reserve enough space for the VDSO, plus 1MB or 256MB for 32 bit & 64 bit systems respectively within which we randomize the VDSO base address. Previously this randomization was taken care of by the mmap base address randomization performed by arch_mmap_rnd(). The 1MB & 256MB sizes are somewhat arbitrary but chosen such that we have some randomization without taking up too much of the user's virtual address space, which is often in short supply for 32 bit systems. With this the VDSO is always mapped at a high address, leaving lots of space for statically linked programs to make use of brk: # ulimit -s unlimited; cat /proc/self/maps 00400000-004ec000 r-xp 00000000 08:00 71436 /usr/bin/coreutils 004fc000-004fd000 rwxp 000ec000 08:00 71436 /usr/bin/coreutils 004fd000-0050f000 rwxp 00000000 00:00 0 00c28000-00c49000 rwxp 00000000 00:00 0 [heap] ... 7f67c000-7f69d000 rwxp 00000000 00:00 0 [stack] 7f7fc000-7f7fd000 rwxp 00000000 00:00 0 7fcf1000-7fcf3000 r--p 00000000 00:00 0 [vvar] 7fcf3000-7fcf4000 r-xp 00000000 00:00 0 [vdso] Signed-off-by: Paul Burton Reported-by: Huacai Chen Fixes: ebb5e78cc634 ("MIPS: Initial implementation of a VDSO") Cc: Huacai Chen Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v4.4+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/processor.h | 10 +++++----- arch/mips/kernel/process.c | 25 +++++++++++++++++++++++++ arch/mips/kernel/vdso.c | 18 +++++++++++++++++- 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index eb1f6030ab853..8bbbab611a3f1 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -80,11 +81,10 @@ extern unsigned int vced_count, vcei_count; #endif -/* - * One page above the stack is used for branch delay slot "emulation". - * See dsemul.c for details. - */ -#define STACK_TOP ((TASK_SIZE & PAGE_MASK) - PAGE_SIZE) +#define VDSO_RANDOMIZE_SIZE (TASK_IS_32BIT_ADDR ? SZ_1M : SZ_256M) + +extern unsigned long mips_stack_top(void); +#define STACK_TOP mips_stack_top() /* * This decides where the kernel will search for a free chunk of vm diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index e8d772a2597dc..e8b166e9146a3 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -38,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -644,6 +646,29 @@ out: return pc; } +unsigned long mips_stack_top(void) +{ + unsigned long top = TASK_SIZE & PAGE_MASK; + + /* One page for branch delay slot "emulation" */ + top -= PAGE_SIZE; + + /* Space for the VDSO, data page & GIC user page */ + top -= PAGE_ALIGN(current->thread.abi->vdso->size); + top -= PAGE_SIZE; + top -= mips_gic_present() ? PAGE_SIZE : 0; + + /* Space for cache colour alignment */ + if (cpu_has_dc_aliases) + top -= shm_align_mask + 1; + + /* Space to randomize the VDSO base */ + if (current->flags & PF_RANDOMIZE) + top -= VDSO_RANDOMIZE_SIZE; + + return top; +} + /* * Don't forget that the stack pointer must be aligned on a 8 bytes * boundary for 32-bits ABI and 16 bytes for 64-bits ABI. diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c index 8f845f6e5f426..48a9c6b90e079 100644 --- a/arch/mips/kernel/vdso.c +++ b/arch/mips/kernel/vdso.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -97,6 +98,21 @@ void update_vsyscall_tz(void) } } +static unsigned long vdso_base(void) +{ + unsigned long base; + + /* Skip the delay slot emulation page */ + base = STACK_TOP + PAGE_SIZE; + + if (current->flags & PF_RANDOMIZE) { + base += get_random_int() & (VDSO_RANDOMIZE_SIZE - 1); + base = PAGE_ALIGN(base); + } + + return base; +} + int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mips_vdso_image *image = current->thread.abi->vdso; @@ -137,7 +153,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (cpu_has_dc_aliases) size += shm_align_mask + 1; - base = get_unmapped_area(NULL, 0, size, 0, 0); + base = get_unmapped_area(NULL, vdso_base(), size, 0, 0); if (IS_ERR_VALUE(base)) { ret = base; goto out; -- GitLab From 6c8f4babb57bc9ef0e59aa50d3157610029f01a7 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 17 Aug 2018 15:19:37 -0400 Subject: [PATCH 0918/2269] mach64: detect the dot clock divider correctly on sparc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 76ebebd2464c5c8a4453c98b6dbf9c95a599e810 upstream. On Sun Ultra 5, it happens that the dot clock is not set up properly for some videomodes. For example, if we set the videomode "r1024x768x60" in the firmware, Linux would incorrectly set a videomode with refresh rate 180Hz when booting (suprisingly, my LCD monitor can display it, although display quality is very low). The reason is this: Older mach64 cards set the divider in the register VCLK_POST_DIV. The register has four 2-bit fields (the field that is actually used is specified in the lowest two bits of the register CLOCK_CNTL). The 2 bits select divider "1, 2, 4, 8". On newer mach64 cards, there's another bit added - the top four bits of PLL_EXT_CNTL extend the divider selection, so we have possible dividers "1, 2, 4, 8, 3, 5, 6, 12". The Linux driver clears the top four bits of PLL_EXT_CNTL and never sets them, so it can work regardless if the card supports them. However, the sparc64 firmware may set these extended dividers during boot - and the mach64 driver detects incorrect dot clock in this case. This patch makes the driver read the additional divider bit from PLL_EXT_CNTL and calculate the initial refresh rate properly. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Acked-by: David S. Miller Reviewed-by: Ville Syrjälä Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/aty/atyfb.h | 3 ++- drivers/video/fbdev/aty/atyfb_base.c | 7 ++++--- drivers/video/fbdev/aty/mach64_ct.c | 10 +++++----- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/video/fbdev/aty/atyfb.h b/drivers/video/fbdev/aty/atyfb.h index 8235b285dbb29..d09bab3bf2241 100644 --- a/drivers/video/fbdev/aty/atyfb.h +++ b/drivers/video/fbdev/aty/atyfb.h @@ -333,6 +333,8 @@ extern const struct aty_pll_ops aty_pll_ct; /* Integrated */ extern void aty_set_pll_ct(const struct fb_info *info, const union aty_pll *pll); extern u8 aty_ld_pll_ct(int offset, const struct atyfb_par *par); +extern const u8 aty_postdividers[8]; + /* * Hardware cursor support @@ -359,7 +361,6 @@ static inline void wait_for_idle(struct atyfb_par *par) extern void aty_reset_engine(const struct atyfb_par *par); extern void aty_init_engine(struct atyfb_par *par, struct fb_info *info); -extern u8 aty_ld_pll_ct(int offset, const struct atyfb_par *par); void atyfb_copyarea(struct fb_info *info, const struct fb_copyarea *area); void atyfb_fillrect(struct fb_info *info, const struct fb_fillrect *rect); diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c index 3ec72f19114ba..d4b938276d238 100644 --- a/drivers/video/fbdev/aty/atyfb_base.c +++ b/drivers/video/fbdev/aty/atyfb_base.c @@ -3087,17 +3087,18 @@ static int atyfb_setup_sparc(struct pci_dev *pdev, struct fb_info *info, /* * PLL Reference Divider M: */ - M = pll_regs[2]; + M = pll_regs[PLL_REF_DIV]; /* * PLL Feedback Divider N (Dependent on CLOCK_CNTL): */ - N = pll_regs[7 + (clock_cntl & 3)]; + N = pll_regs[VCLK0_FB_DIV + (clock_cntl & 3)]; /* * PLL Post Divider P (Dependent on CLOCK_CNTL): */ - P = 1 << (pll_regs[6] >> ((clock_cntl & 3) << 1)); + P = aty_postdividers[((pll_regs[VCLK_POST_DIV] >> ((clock_cntl & 3) << 1)) & 3) | + ((pll_regs[PLL_EXT_CNTL] >> (2 + (clock_cntl & 3))) & 4)]; /* * PLL Divider Q: diff --git a/drivers/video/fbdev/aty/mach64_ct.c b/drivers/video/fbdev/aty/mach64_ct.c index 7d3bd723d3d58..d55f4bacb41c7 100644 --- a/drivers/video/fbdev/aty/mach64_ct.c +++ b/drivers/video/fbdev/aty/mach64_ct.c @@ -115,7 +115,7 @@ static void aty_st_pll_ct(int offset, u8 val, const struct atyfb_par *par) */ #define Maximum_DSP_PRECISION 7 -static u8 postdividers[] = {1,2,4,8,3}; +const u8 aty_postdividers[8] = {1,2,4,8,3,5,6,12}; static int aty_dsp_gt(const struct fb_info *info, u32 bpp, struct pll_ct *pll) { @@ -222,7 +222,7 @@ static int aty_valid_pll_ct(const struct fb_info *info, u32 vclk_per, struct pll pll->vclk_post_div += (q < 64*8); pll->vclk_post_div += (q < 32*8); } - pll->vclk_post_div_real = postdividers[pll->vclk_post_div]; + pll->vclk_post_div_real = aty_postdividers[pll->vclk_post_div]; // pll->vclk_post_div <<= 6; pll->vclk_fb_div = q * pll->vclk_post_div_real / 8; pllvclk = (1000000 * 2 * pll->vclk_fb_div) / @@ -513,7 +513,7 @@ static int aty_init_pll_ct(const struct fb_info *info, union aty_pll *pll) u8 mclk_fb_div, pll_ext_cntl; pll->ct.pll_ref_div = aty_ld_pll_ct(PLL_REF_DIV, par); pll_ext_cntl = aty_ld_pll_ct(PLL_EXT_CNTL, par); - pll->ct.xclk_post_div_real = postdividers[pll_ext_cntl & 0x07]; + pll->ct.xclk_post_div_real = aty_postdividers[pll_ext_cntl & 0x07]; mclk_fb_div = aty_ld_pll_ct(MCLK_FB_DIV, par); if (pll_ext_cntl & PLL_MFB_TIMES_4_2B) mclk_fb_div <<= 1; @@ -535,7 +535,7 @@ static int aty_init_pll_ct(const struct fb_info *info, union aty_pll *pll) xpost_div += (q < 64*8); xpost_div += (q < 32*8); } - pll->ct.xclk_post_div_real = postdividers[xpost_div]; + pll->ct.xclk_post_div_real = aty_postdividers[xpost_div]; pll->ct.mclk_fb_div = q * pll->ct.xclk_post_div_real / 8; #ifdef CONFIG_PPC @@ -584,7 +584,7 @@ static int aty_init_pll_ct(const struct fb_info *info, union aty_pll *pll) mpost_div += (q < 64*8); mpost_div += (q < 32*8); } - sclk_post_div_real = postdividers[mpost_div]; + sclk_post_div_real = aty_postdividers[mpost_div]; pll->ct.sclk_fb_div = q * sclk_post_div_real / 8; pll->ct.spll_cntl2 = mpost_div << 4; #ifdef DEBUG -- GitLab From 54886c97839732650f0aabb90e674cad791e82c0 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sun, 7 Oct 2018 11:31:51 +0300 Subject: [PATCH 0919/2269] percpu: stop leaking bitmap metadata blocks commit 6685b357363bfe295e3ae73665014db4aed62c58 upstream. The commit ca460b3c9627 ("percpu: introduce bitmap metadata blocks") introduced bitmap metadata blocks. These metadata blocks are allocated whenever a new chunk is created, but they are never freed. Fix it. Fixes: ca460b3c9627 ("percpu: introduce bitmap metadata blocks") Signed-off-by: Mike Rapoport Cc: stable@vger.kernel.org Signed-off-by: Dennis Zhou Signed-off-by: Greg Kroah-Hartman --- mm/percpu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/percpu.c b/mm/percpu.c index 5fa5e79b69f05..3074148b7e0d3 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1208,6 +1208,7 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk) { if (!chunk) return; + pcpu_mem_free(chunk->md_blocks); pcpu_mem_free(chunk->bound_map); pcpu_mem_free(chunk->alloc_map); pcpu_mem_free(chunk); -- GitLab From 82ac2740aa74668b694c04de659155571b6514e7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 11 Sep 2018 14:45:03 +0300 Subject: [PATCH 0920/2269] perf script python: Fix export-to-postgresql.py occasional failure commit 25e11700b54c7b6b5ebfc4361981dae12299557b upstream. Occasional export failures were found to be caused by truncating 64-bit pointers to 32-bits. Fix by explicitly setting types for all ctype arguments and results. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20180911114504.28516-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/scripts/python/export-to-postgresql.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index efcaf6cac2eb9..e46f51b175131 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -204,14 +204,23 @@ from ctypes import * libpq = CDLL("libpq.so.5") PQconnectdb = libpq.PQconnectdb PQconnectdb.restype = c_void_p +PQconnectdb.argtypes = [ c_char_p ] PQfinish = libpq.PQfinish +PQfinish.argtypes = [ c_void_p ] PQstatus = libpq.PQstatus +PQstatus.restype = c_int +PQstatus.argtypes = [ c_void_p ] PQexec = libpq.PQexec PQexec.restype = c_void_p +PQexec.argtypes = [ c_void_p, c_char_p ] PQresultStatus = libpq.PQresultStatus +PQresultStatus.restype = c_int +PQresultStatus.argtypes = [ c_void_p ] PQputCopyData = libpq.PQputCopyData +PQputCopyData.restype = c_int PQputCopyData.argtypes = [ c_void_p, c_void_p, c_int ] PQputCopyEnd = libpq.PQputCopyEnd +PQputCopyEnd.restype = c_int PQputCopyEnd.argtypes = [ c_void_p, c_void_p ] sys.path.append(os.environ['PERF_EXEC_PATH'] + \ -- GitLab From e3f725f5c46aa7e989430b0ecc3463a5c4e4cd49 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 11 Sep 2018 14:45:04 +0300 Subject: [PATCH 0921/2269] perf script python: Fix export-to-sqlite.py sample columns commit d005efe18db0b4a123dd92ea8e77e27aee8f99fd upstream. With the "branches" export option, not all sample columns are exported. However the unwanted columns are not at the end of the tuple, as assumed by the code. Fix by taking the first 15 and last 3 values, instead of the first 18. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20180911114504.28516-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/scripts/python/export-to-sqlite.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py index f827bf77e9d26..e4bb82c8aba9e 100644 --- a/tools/perf/scripts/python/export-to-sqlite.py +++ b/tools/perf/scripts/python/export-to-sqlite.py @@ -440,7 +440,11 @@ def branch_type_table(*x): def sample_table(*x): if branches: - bind_exec(sample_query, 18, x) + for xx in x[0:15]: + sample_query.addBindValue(str(xx)) + for xx in x[19:22]: + sample_query.addBindValue(str(xx)) + do_query_(sample_query) else: bind_exec(sample_query, 22, x) -- GitLab From 8d2f62cb2d463456a4dbc84c59d1d2a05947fe20 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Tue, 2 Oct 2018 03:02:35 +0200 Subject: [PATCH 0922/2269] s390/cio: Fix how vfio-ccw checks pinned pages commit 24abf2901b18bf941b9f21ea2ce5791f61097ae4 upstream. We have two nested loops to check the entries within the pfn_array_table arrays. But we mistakenly use the outer array as an index in our check, and completely ignore the indexing performed by the inner loop. Cc: stable@vger.kernel.org Signed-off-by: Eric Farman Message-Id: <20181002010235.42483-1-farman@linux.ibm.com> Signed-off-by: Cornelia Huck Signed-off-by: Greg Kroah-Hartman --- drivers/s390/cio/vfio_ccw_cp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index 72ce6ad95767b..1419eaea03d84 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -172,7 +172,7 @@ static bool pfn_array_table_iova_pinned(struct pfn_array_table *pat, for (i = 0; i < pat->pat_nr; i++, pa++) for (j = 0; j < pa->pa_nr; j++) - if (pa->pa_iova_pfn[i] == iova_pfn) + if (pa->pa_iova_pfn[j] == iova_pfn) return true; return false; -- GitLab From 6c8faa19e9cc5fc0010737a2de38ad6b477bea30 Mon Sep 17 00:00:00 2001 From: Shenghui Wang Date: Sun, 7 Oct 2018 14:45:41 +0800 Subject: [PATCH 0923/2269] dm cache: destroy migration_cache if cache target registration failed commit c7cd55504a5b0fc826a2cd9540845979d24ae542 upstream. Commit 7e6358d244e47 ("dm: fix various targets to dm_register_target after module __init resources created") inadvertently introduced this bug when it moved dm_register_target() after the call to KMEM_CACHE(). Fixes: 7e6358d244e47 ("dm: fix various targets to dm_register_target after module __init resources created") Cc: stable@vger.kernel.org Signed-off-by: Shenghui Wang Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-target.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index e2ea57d5376e8..b5f541112fca0 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -3571,14 +3571,13 @@ static int __init dm_cache_init(void) int r; migration_cache = KMEM_CACHE(dm_cache_migration, 0); - if (!migration_cache) { - dm_unregister_target(&cache_target); + if (!migration_cache) return -ENOMEM; - } r = dm_register_target(&cache_target); if (r) { DMERR("cache target registration failed: %d", r); + kmem_cache_destroy(migration_cache); return r; } -- GitLab From 261f2cba100bacbfa1e9d3f331461069450fc75d Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 9 Oct 2018 14:24:31 +0900 Subject: [PATCH 0924/2269] dm: fix report zone remapping to account for partition offset commit 9864cd5dc54cade89fd4b0954c2e522841aa247c upstream. If dm-linear or dm-flakey are layered on top of a partition of a zoned block device, remapping of the start sector and write pointer position of the zones reported by a report zones BIO must be modified to account for the target table entry mapping (start offset within the device and entry mapping with the dm device). If the target's backing device is a partition of a whole disk, the start sector on the physical device of the partition must also be accounted for when modifying the zone information. However, dm_remap_zone_report() was not considering this last case, resulting in incorrect zone information remapping with targets using disk partitions. Fix this by calculating the target backing device start sector using the position of the completed report zones BIO and the unchanged position and size of the original report zone BIO. With this value calculated, the start sector and write pointer position of the target zones can be correctly remapped. Fixes: 10999307c14e ("dm: introduce dm_remap_zone_report()") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 24ec6e0394485..a56008b2e7c27 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1034,12 +1034,14 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) EXPORT_SYMBOL_GPL(dm_accept_partial_bio); /* - * The zone descriptors obtained with a zone report indicate - * zone positions within the target device. The zone descriptors - * must be remapped to match their position within the dm device. - * A target may call dm_remap_zone_report after completion of a - * REQ_OP_ZONE_REPORT bio to remap the zone descriptors obtained - * from the target device mapping to the dm device. + * The zone descriptors obtained with a zone report indicate zone positions + * within the target backing device, regardless of that device is a partition + * and regardless of the target mapping start sector on the device or partition. + * The zone descriptors start sector and write pointer position must be adjusted + * to match their relative position within the dm device. + * A target may call dm_remap_zone_report() after completion of a + * REQ_OP_ZONE_REPORT bio to remap the zone descriptors obtained from the + * backing device. */ void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) { @@ -1050,6 +1052,7 @@ void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) struct blk_zone *zone; unsigned int nr_rep = 0; unsigned int ofst; + sector_t part_offset; struct bio_vec bvec; struct bvec_iter iter; void *addr; @@ -1057,6 +1060,15 @@ void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) if (bio->bi_status) return; + /* + * bio sector was incremented by the request size on completion. Taking + * into account the original request sector, the target start offset on + * the backing device and the target mapping offset (ti->begin), the + * start sector of the backing device. The partition offset is always 0 + * if the target uses a whole device. + */ + part_offset = bio->bi_iter.bi_sector + ti->begin - (start + bio_end_sector(report_bio)); + /* * Remap the start sector of the reported zones. For sequential zones, * also remap the write pointer position. @@ -1074,6 +1086,7 @@ void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) /* Set zones start sector */ while (hdr->nr_zones && ofst < bvec.bv_len) { zone = addr + ofst; + zone->start -= part_offset; if (zone->start >= start + ti->len) { hdr->nr_zones = 0; break; @@ -1085,7 +1098,7 @@ void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) else if (zone->cond == BLK_ZONE_COND_EMPTY) zone->wp = zone->start; else - zone->wp = zone->wp + ti->begin - start; + zone->wp = zone->wp + ti->begin - start - part_offset; } ofst += sizeof(struct blk_zone); hdr->nr_zones--; -- GitLab From efd6537984d5399b9225ac678f9ff5d0001e8f2b Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 10 Oct 2018 12:01:55 -0400 Subject: [PATCH 0925/2269] dm linear: eliminate linear_end_io call if CONFIG_DM_ZONED disabled commit beb9caac211c1be1bc118bb62d5cf09c4107e6a5 upstream. It is best to avoid any extra overhead associated with bio completion. DM core will indirectly call a DM target's .end_io if it is defined. In the case of DM linear, there is no need to do so (for every bio that completes) if CONFIG_DM_ZONED is not enabled. Avoiding an extra indirect call for every bio completion is very important for ensuring DM linear doesn't incur more overhead that further widens the performance gap between dm-linear and raw block devices. Fixes: 0be12c1c7fce7 ("dm linear: add support for zoned block devices") Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-linear.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index d5f8eff7c11d8..da40b1e852be6 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -101,6 +101,7 @@ static int linear_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } +#ifdef CONFIG_DM_ZONED static int linear_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error) { @@ -111,6 +112,7 @@ static int linear_end_io(struct dm_target *ti, struct bio *bio, return DM_ENDIO_DONE; } +#endif static void linear_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) @@ -187,12 +189,16 @@ static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, static struct target_type linear_target = { .name = "linear", .version = {1, 4, 0}, +#ifdef CONFIG_DM_ZONED + .end_io = linear_end_io, .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_ZONED_HM, +#else + .features = DM_TARGET_PASSES_INTEGRITY, +#endif .module = THIS_MODULE, .ctr = linear_ctr, .dtr = linear_dtr, .map = linear_map, - .end_io = linear_end_io, .status = linear_status, .prepare_ioctl = linear_prepare_ioctl, .iterate_devices = linear_iterate_devices, -- GitLab From c339fab172a98d0fe8fc7d87ba5d819026ccfa3b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 Oct 2018 11:45:30 +0900 Subject: [PATCH 0926/2269] dm linear: fix linear_end_io conditional definition commit 118aa47c7072bce05fc39bd40a1c0a90caed72ab upstream. The dm-linear target is independent of the dm-zoned target. For code requiring support for zoned block devices, use CONFIG_BLK_DEV_ZONED instead of CONFIG_DM_ZONED. While at it, similarly to dm linear, also enable the DM_TARGET_ZONED_HM feature in dm-flakey only if CONFIG_BLK_DEV_ZONED is defined. Fixes: beb9caac211c1 ("dm linear: eliminate linear_end_io call if CONFIG_DM_ZONED disabled") Fixes: 0be12c1c7fce7 ("dm linear: add support for zoned block devices") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-flakey.c | 2 ++ drivers/md/dm-linear.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index b82cb1ab1eaa3..0c1ef63c3461b 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -463,7 +463,9 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_ static struct target_type flakey_target = { .name = "flakey", .version = {1, 5, 0}, +#ifdef CONFIG_BLK_DEV_ZONED .features = DM_TARGET_ZONED_HM, +#endif .module = THIS_MODULE, .ctr = flakey_ctr, .dtr = flakey_dtr, diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index da40b1e852be6..a53de71bc30c0 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -101,7 +101,7 @@ static int linear_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } -#ifdef CONFIG_DM_ZONED +#ifdef CONFIG_BLK_DEV_ZONED static int linear_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error) { @@ -189,7 +189,7 @@ static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, static struct target_type linear_target = { .name = "linear", .version = {1, 4, 0}, -#ifdef CONFIG_DM_ZONED +#ifdef CONFIG_BLK_DEV_ZONED .end_io = linear_end_io, .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_ZONED_HM, #else -- GitLab From bc183079ddfdeda8282b30a7f9a64aaca11c19a1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 4 Oct 2018 13:28:08 -0700 Subject: [PATCH 0927/2269] cgroup: Fix dom_cgrp propagation when enabling threaded mode commit 479adb89a97b0a33e5a9d702119872cc82ca21aa upstream. A cgroup which is already a threaded domain may be converted into a threaded cgroup if the prerequisite conditions are met. When this happens, all threaded descendant should also have their ->dom_cgrp updated to the new threaded domain cgroup. Unfortunately, this propagation was missing leading to the following failure. # cd /sys/fs/cgroup/unified # cat cgroup.subtree_control # show that no controllers are enabled # mkdir -p mycgrp/a/b/c # echo threaded > mycgrp/a/b/cgroup.type At this point, the hierarchy looks as follows: mycgrp [d] a [dt] b [t] c [inv] Now let's make node "a" threaded (and thus "mycgrp" s made "domain threaded"): # echo threaded > mycgrp/a/cgroup.type By this point, we now have a hierarchy that looks as follows: mycgrp [dt] a [t] b [t] c [inv] But, when we try to convert the node "c" from "domain invalid" to "threaded", we get ENOTSUP on the write(): # echo threaded > mycgrp/a/b/c/cgroup.type sh: echo: write error: Operation not supported This patch fixes the problem by * Moving the opencoded ->dom_cgrp save and restoration in cgroup_enable_threaded() into cgroup_{save|restore}_control() so that mulitple cgroups can be handled. * Updating all threaded descendants' ->dom_cgrp to point to the new dom_cgrp when enabling threaded mode. Signed-off-by: Tejun Heo Reported-and-tested-by: "Michael Kerrisk (man-pages)" Reported-by: Amin Jamali Reported-by: Joao De Almeida Pereira Link: https://lore.kernel.org/r/CAKgNAkhHYCMn74TCNiMJ=ccLd7DcmXSbvw3CbZ1YREeG7iJM5g@mail.gmail.com Fixes: 454000adaa2a ("cgroup: introduce cgroup->dom_cgrp and threaded css_set handling") Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Greg Kroah-Hartman --- include/linux/cgroup-defs.h | 1 + kernel/cgroup/cgroup.c | 25 ++++++++++++++++--------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 4e8f77504a579..e7905d9353e89 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -353,6 +353,7 @@ struct cgroup { * specific task are charged to the dom_cgrp. */ struct cgroup *dom_cgrp; + struct cgroup *old_dom_cgrp; /* used while enabling threaded */ /* * list of pidlists, up to two for each namespace (one for procs, one diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 76c0ef2cb5098..3fc11b8851acb 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2780,11 +2780,12 @@ restart: } /** - * cgroup_save_control - save control masks of a subtree + * cgroup_save_control - save control masks and dom_cgrp of a subtree * @cgrp: root of the target subtree * - * Save ->subtree_control and ->subtree_ss_mask to the respective old_ - * prefixed fields for @cgrp's subtree including @cgrp itself. + * Save ->subtree_control, ->subtree_ss_mask and ->dom_cgrp to the + * respective old_ prefixed fields for @cgrp's subtree including @cgrp + * itself. */ static void cgroup_save_control(struct cgroup *cgrp) { @@ -2794,6 +2795,7 @@ static void cgroup_save_control(struct cgroup *cgrp) cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) { dsct->old_subtree_control = dsct->subtree_control; dsct->old_subtree_ss_mask = dsct->subtree_ss_mask; + dsct->old_dom_cgrp = dsct->dom_cgrp; } } @@ -2819,11 +2821,12 @@ static void cgroup_propagate_control(struct cgroup *cgrp) } /** - * cgroup_restore_control - restore control masks of a subtree + * cgroup_restore_control - restore control masks and dom_cgrp of a subtree * @cgrp: root of the target subtree * - * Restore ->subtree_control and ->subtree_ss_mask from the respective old_ - * prefixed fields for @cgrp's subtree including @cgrp itself. + * Restore ->subtree_control, ->subtree_ss_mask and ->dom_cgrp from the + * respective old_ prefixed fields for @cgrp's subtree including @cgrp + * itself. */ static void cgroup_restore_control(struct cgroup *cgrp) { @@ -2833,6 +2836,7 @@ static void cgroup_restore_control(struct cgroup *cgrp) cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) { dsct->subtree_control = dsct->old_subtree_control; dsct->subtree_ss_mask = dsct->old_subtree_ss_mask; + dsct->dom_cgrp = dsct->old_dom_cgrp; } } @@ -3140,6 +3144,8 @@ static int cgroup_enable_threaded(struct cgroup *cgrp) { struct cgroup *parent = cgroup_parent(cgrp); struct cgroup *dom_cgrp = parent->dom_cgrp; + struct cgroup *dsct; + struct cgroup_subsys_state *d_css; int ret; lockdep_assert_held(&cgroup_mutex); @@ -3169,12 +3175,13 @@ static int cgroup_enable_threaded(struct cgroup *cgrp) */ cgroup_save_control(cgrp); - cgrp->dom_cgrp = dom_cgrp; + cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) + if (dsct == cgrp || cgroup_is_threaded(dsct)) + dsct->dom_cgrp = dom_cgrp; + ret = cgroup_apply_control(cgrp); if (!ret) parent->nr_threaded_children++; - else - cgrp->dom_cgrp = cgrp; cgroup_finalize_control(cgrp, ret); return ret; -- GitLab From d5833a50c6a323bfc42dffe2e78aa0f06bb8263b Mon Sep 17 00:00:00 2001 From: Chris Boot Date: Mon, 8 Oct 2018 17:07:30 +0200 Subject: [PATCH 0928/2269] mmc: block: avoid multiblock reads for the last sector in SPI mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 41591b38f5f8f78344954b68582b5f00e56ffe61 upstream. On some SD cards over SPI, reading with the multiblock read command the last sector will leave the card in a bad state. Remove last sectors from the multiblock reading cmd. Signed-off-by: Chris Boot Signed-off-by: Clément Péron Cc: stable@vger.kernel.org # v4.10+ Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/block.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 4281fdc0a13c5..ce6dd49fbb98d 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1613,6 +1613,16 @@ static void mmc_blk_data_prep(struct mmc_queue *mq, struct mmc_queue_req *mqrq, brq->data.blocks = card->host->max_blk_count; if (brq->data.blocks > 1) { + /* + * Some SD cards in SPI mode return a CRC error or even lock up + * completely when trying to read the last block using a + * multiblock read command. + */ + if (mmc_host_is_spi(card->host) && (rq_data_dir(req) == READ) && + (blk_rq_pos(req) + blk_rq_sectors(req) == + get_capacity(md->disk))) + brq->data.blocks--; + /* * After a read error, we redo the request one sector * at a time in order to accurately determine which -- GitLab From b3e4b3c70a0a4028d481b8b1e84b06acdc80d1ec Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Tue, 2 Oct 2018 10:06:46 +0200 Subject: [PATCH 0929/2269] pinctrl: mcp23s08: fix irq and irqchip setup order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f259f896f2348f0302f6f88d4382378cf9d23a7e upstream. Since 'commit 02e389e63e35 ("pinctrl: mcp23s08: fix irq setup order")' the irq request isn't the last devm_* allocation. Without a deeper look at the irq and testing this isn't a good solution. Since this driver relies on the devm mechanism, requesting a interrupt should be the last thing to avoid memory corruptions during unbinding. 'Commit 02e389e63e35 ("pinctrl: mcp23s08: fix irq setup order")' fixed the order for the interrupt-controller use case only. The mcp23s08_irq_setup() must be split into two to fix it for the interrupt-controller use case and to register the irq at last. So the irq will be freed first during unbind. Cc: stable@vger.kernel.org Cc: Jan Kundrát Cc: Dmitry Mastykin Cc: Sebastian Reichel Fixes: 82039d244f87 ("pinctrl: mcp23s08: add pinconf support") Fixes: 02e389e63e35 ("pinctrl: mcp23s08: fix irq setup order") Signed-off-by: Marco Felsch Tested-by: Phil Reid Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-mcp23s08.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-mcp23s08.c b/drivers/pinctrl/pinctrl-mcp23s08.c index db9cca4a83ff1..22558bf294246 100644 --- a/drivers/pinctrl/pinctrl-mcp23s08.c +++ b/drivers/pinctrl/pinctrl-mcp23s08.c @@ -643,6 +643,14 @@ static int mcp23s08_irq_setup(struct mcp23s08 *mcp) return err; } + return 0; +} + +static int mcp23s08_irqchip_setup(struct mcp23s08 *mcp) +{ + struct gpio_chip *chip = &mcp->chip; + int err; + err = gpiochip_irqchip_add_nested(chip, &mcp23s08_irq_chip, 0, @@ -907,7 +915,7 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, } if (mcp->irq && mcp->irq_controller) { - ret = mcp23s08_irq_setup(mcp); + ret = mcp23s08_irqchip_setup(mcp); if (ret) goto fail; } @@ -932,6 +940,9 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, goto fail; } + if (mcp->irq) + ret = mcp23s08_irq_setup(mcp); + fail: if (ret < 0) dev_dbg(dev, "can't setup chip %d, --> %d\n", addr, ret); -- GitLab From 3e6275d940a4419083de25de38d0cc9585117d25 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 5 Oct 2018 13:24:36 +0100 Subject: [PATCH 0930/2269] arm64: perf: Reject stand-alone CHAIN events for PMUv3 commit ca2b497253ad01c80061a1f3ee9eb91b5d54a849 upstream. It doesn't make sense for a perf event to be configured as a CHAIN event in isolation, so extend the arm_pmu structure with a ->filter_match() function to allow the backend PMU implementation to reject CHAIN events early. Cc: Reviewed-by: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/perf_event.c | 7 +++++++ drivers/perf/arm_pmu.c | 8 +++++++- include/linux/perf/arm_pmu.h | 1 + 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 1984e739f155f..86249a24592d8 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -824,6 +824,12 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, return 0; } +static int armv8pmu_filter_match(struct perf_event *event) +{ + unsigned long evtype = event->hw.config_base & ARMV8_PMU_EVTYPE_EVENT; + return evtype != ARMV8_PMUV3_PERFCTR_CHAIN; +} + static void armv8pmu_reset(void *info) { struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; @@ -970,6 +976,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->reset = armv8pmu_reset, cpu_pmu->max_period = (1LLU << 32) - 1, cpu_pmu->set_event_filter = armv8pmu_set_event_filter; + cpu_pmu->filter_match = armv8pmu_filter_match; return 0; } diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index d14fc2e67f933..5e06917b4cefe 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -483,7 +483,13 @@ static int armpmu_filter_match(struct perf_event *event) { struct arm_pmu *armpmu = to_arm_pmu(event->pmu); unsigned int cpu = smp_processor_id(); - return cpumask_test_cpu(cpu, &armpmu->supported_cpus); + int ret; + + ret = cpumask_test_cpu(cpu, &armpmu->supported_cpus); + if (ret && armpmu->filter_match) + return armpmu->filter_match(event); + + return ret; } static ssize_t armpmu_cpumask_show(struct device *dev, diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index af0f44effd44a..251bc43fdcfc4 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -110,6 +110,7 @@ struct arm_pmu { void (*stop)(struct arm_pmu *); void (*reset)(void *); int (*map_event)(struct perf_event *event); + int (*filter_match)(struct perf_event *event); int num_events; u64 max_period; bool secure_access; /* 32-bit ARM only */ -- GitLab From 68ba0bdfe4941a4ef2423fdf900f9b22089ed227 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Fri, 12 Oct 2018 21:34:36 -0700 Subject: [PATCH 0931/2269] mm/thp: fix call to mmu_notifier in set_pmd_migration_entry() v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bfba8e5cf28f413aa05571af493871d74438979f upstream. Inside set_pmd_migration_entry() we are holding page table locks and thus we can not sleep so we can not call invalidate_range_start/end() So remove call to mmu_notifier_invalidate_range_start/end() because they are call inside the function calling set_pmd_migration_entry() (see try_to_unmap_one()). Link: http://lkml.kernel.org/r/20181012181056.7864-1-jglisse@redhat.com Signed-off-by: Jérôme Glisse Reported-by: Andrea Arcangeli Reviewed-by: Zi Yan Acked-by: Michal Hocko Cc: Greg Kroah-Hartman Cc: Kirill A. Shutemov Cc: "H. Peter Anvin" Cc: Anshuman Khandual Cc: Dave Hansen Cc: David Nellans Cc: Ingo Molnar Cc: Mel Gorman Cc: Minchan Kim Cc: Naoya Horiguchi Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 174612f8339cf..39c1fedcfdb45 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2843,9 +2843,6 @@ void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, if (!(pvmw->pmd && !pvmw->pte)) return; - mmu_notifier_invalidate_range_start(mm, address, - address + HPAGE_PMD_SIZE); - flush_cache_range(vma, address, address + HPAGE_PMD_SIZE); pmdval = *pvmw->pmd; pmdp_invalidate(vma, address, pvmw->pmd); @@ -2858,9 +2855,6 @@ void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, set_pmd_at(mm, address, pvmw->pmd, pmdswp); page_remove_rmap(page, true); put_page(page); - - mmu_notifier_invalidate_range_end(mm, address, - address + HPAGE_PMD_SIZE); } void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) -- GitLab From 1b7ff5208d2f154c26b9692ddf1b761c9b7339f2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 9 Oct 2018 12:19:17 +0200 Subject: [PATCH 0932/2269] mm: Preserve _PAGE_DEVMAP across mprotect() calls commit 4628a64591e6cee181237060961e98c615c33966 upstream. Currently _PAGE_DEVMAP bit is not preserved in mprotect(2) calls. As a result we will see warnings such as: BUG: Bad page map in process JobWrk0013 pte:800001803875ea25 pmd:7624381067 addr:00007f0930720000 vm_flags:280000f9 anon_vma: (null) mapping:ffff97f2384056f0 index:0 file:457-000000fe00000030-00000009-000000ca-00000001_2001.fileblock fault:xfs_filemap_fault [xfs] mmap:xfs_file_mmap [xfs] readpage: (null) CPU: 3 PID: 15848 Comm: JobWrk0013 Tainted: G W 4.12.14-2.g7573215-default #1 SLE12-SP4 (unreleased) Hardware name: Intel Corporation S2600WFD/S2600WFD, BIOS SE5C620.86B.01.00.0833.051120182255 05/11/2018 Call Trace: dump_stack+0x5a/0x75 print_bad_pte+0x217/0x2c0 ? enqueue_task_fair+0x76/0x9f0 _vm_normal_page+0xe5/0x100 zap_pte_range+0x148/0x740 unmap_page_range+0x39a/0x4b0 unmap_vmas+0x42/0x90 unmap_region+0x99/0xf0 ? vma_gap_callbacks_rotate+0x1a/0x20 do_munmap+0x255/0x3a0 vm_munmap+0x54/0x80 SyS_munmap+0x1d/0x30 do_syscall_64+0x74/0x150 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 ... when mprotect(2) gets used on DAX mappings. Also there is a wide variety of other failures that can result from the missing _PAGE_DEVMAP flag when the area gets used by get_user_pages() later. Fix the problem by including _PAGE_DEVMAP in a set of flags that get preserved by mprotect(2). Fixes: 69660fd797c3 ("x86, mm: introduce _PAGE_DEVMAP") Fixes: ebd31197931d ("powerpc/mm: Add devmap support for ppc64") Cc: Signed-off-by: Jan Kara Acked-by: Michal Hocko Reviewed-by: Johannes Thumshirn Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 4 ++-- arch/x86/include/asm/pgtable_types.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 9a677cd5997f9..4dd13b503dbbd 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -102,7 +102,7 @@ */ #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ _PAGE_ACCESSED | H_PAGE_THP_HUGE | _PAGE_PTE | \ - _PAGE_SOFT_DIRTY) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) /* * user access blocked by key */ @@ -120,7 +120,7 @@ */ #define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \ - _PAGE_SOFT_DIRTY) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) /* * Mask of bits returned by pte_pgprot() */ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 246f15b4e64ce..85f8279c885ac 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -124,7 +124,7 @@ */ #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \ - _PAGE_SOFT_DIRTY) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) /* -- GitLab From 74a960430a8d01c33844884a382264599072d3c8 Mon Sep 17 00:00:00 2001 From: Edgar Cherkasov Date: Thu, 27 Sep 2018 11:56:03 +0300 Subject: [PATCH 0933/2269] i2c: i2c-scmi: fix for i2c_smbus_write_block_data commit 08d9db00fe0e300d6df976e6c294f974988226dd upstream. The i2c-scmi driver crashes when the SMBus Write Block transaction is executed: WARNING: CPU: 9 PID: 2194 at mm/page_alloc.c:3931 __alloc_pages_slowpath+0x9db/0xec0 Call Trace: ? get_page_from_freelist+0x49d/0x11f0 ? alloc_pages_current+0x6a/0xe0 ? new_slab+0x499/0x690 __alloc_pages_nodemask+0x265/0x280 alloc_pages_current+0x6a/0xe0 kmalloc_order+0x18/0x40 kmalloc_order_trace+0x24/0xb0 ? acpi_ut_allocate_object_desc_dbg+0x62/0x10c __kmalloc+0x203/0x220 acpi_os_allocate_zeroed+0x34/0x36 acpi_ut_copy_eobject_to_iobject+0x266/0x31e acpi_evaluate_object+0x166/0x3b2 acpi_smbus_cmi_access+0x144/0x530 [i2c_scmi] i2c_smbus_xfer+0xda/0x370 i2cdev_ioctl_smbus+0x1bd/0x270 i2cdev_ioctl+0xaa/0x250 do_vfs_ioctl+0xa4/0x600 SyS_ioctl+0x79/0x90 do_syscall_64+0x73/0x130 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 ACPI Error: Evaluating _SBW: 4 (20170831/smbus_cmi-185) This problem occurs because the length of ACPI Buffer object is not defined/initialized in the code before a corresponding ACPI method is called. The obvious patch below fixes this issue. Signed-off-by: Edgar Cherkasov Acked-by: Viktor Krasnov Acked-by: Michael Brunner Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-scmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-scmi.c b/drivers/i2c/busses/i2c-scmi.c index 7aa7b9cb6203f..efefcfa24a4c0 100644 --- a/drivers/i2c/busses/i2c-scmi.c +++ b/drivers/i2c/busses/i2c-scmi.c @@ -152,6 +152,7 @@ acpi_smbus_cmi_access(struct i2c_adapter *adap, u16 addr, unsigned short flags, mt_params[3].type = ACPI_TYPE_INTEGER; mt_params[3].integer.value = len; mt_params[4].type = ACPI_TYPE_BUFFER; + mt_params[4].buffer.length = len; mt_params[4].buffer.pointer = data->block + 1; } break; -- GitLab From 7a4f9efdb213fc5ae014b3a551a1364b94cd2533 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 12 Feb 2018 14:24:47 +0200 Subject: [PATCH 0934/2269] xhci: Don't print a warning when setting link state for disabled ports commit 1208d8a84fdcae6b395c57911cdf907450d30e70 upstream. When disabling a USB3 port the hub driver will set the port link state to U3 to prevent "ejected" or "safely removed" devices that are still physically connected from immediately re-enumerating. If the device was really unplugged, then error messages were printed as the hub tries to set the U3 link state for a port that is no longer enabled. xhci-hcd ee000000.usb: Cannot set link state. usb usb8-port1: cannot disable (err = -32) Don't print error message in xhci-hub if hub tries to set port link state for a disabled port. Return -ENODEV instead which also silences hub driver. Signed-off-by: Mathias Nyman Tested-by: Yoshihiro Shimoda Signed-off-by: Ross Zwisler Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index c01d1f3a1c7d1..d2a9767a8e9c5 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1236,17 +1236,17 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, temp = readl(port_array[wIndex]); break; } - - /* Software should not attempt to set - * port link state above '3' (U3) and the port - * must be enabled. - */ - if ((temp & PORT_PE) == 0 || - (link_state > USB_SS_PORT_LS_U3)) { - xhci_warn(xhci, "Cannot set link state.\n"); + /* Port must be enabled */ + if (!(temp & PORT_PE)) { + retval = -ENODEV; + break; + } + /* Can't set port link state above '3' (U3) */ + if (link_state > USB_SS_PORT_LS_U3) { + xhci_warn(xhci, "Cannot set port %d link state %d\n", + wIndex, link_state); goto error; } - if (link_state == USB_SS_PORT_LS_U3) { slot_id = xhci_find_slot_id_by_port(hcd, xhci, wIndex + 1); -- GitLab From c605894c84b9d26324bdbc31a9526cb0a5c8d48e Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 10 Apr 2018 16:27:36 -0700 Subject: [PATCH 0935/2269] mm: introduce NR_INDIRECTLY_RECLAIMABLE_BYTES commit eb59254608bc1d42c4c6afdcdce9c0d3ce02b318 upstream. Patch series "indirectly reclaimable memory", v2. This patchset introduces the concept of indirectly reclaimable memory and applies it to fix the issue of when a big number of dentries with external names can significantly affect the MemAvailable value. This patch (of 3): Introduce a concept of indirectly reclaimable memory and adds the corresponding memory counter and /proc/vmstat item. Indirectly reclaimable memory is any sort of memory, used by the kernel (except of reclaimable slabs), which is actually reclaimable, i.e. will be released under memory pressure. The counter is in bytes, as it's not always possible to count such objects in pages. The name contains BYTES by analogy to NR_KERNEL_STACK_KB. Link: http://lkml.kernel.org/r/20180305133743.12746-2-guro@fb.com Signed-off-by: Roman Gushchin Reviewed-by: Andrew Morton Cc: Alexander Viro Cc: Michal Hocko Cc: Johannes Weiner Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/mmzone.h | 1 + mm/vmstat.c | 1 + 2 files changed, 2 insertions(+) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index f0938257ee6d3..f679f52684675 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -180,6 +180,7 @@ enum node_stat_item { NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */ NR_DIRTIED, /* page dirtyings since bootup */ NR_WRITTEN, /* page writings since bootup */ + NR_INDIRECTLY_RECLAIMABLE_BYTES, /* measured in bytes */ NR_VM_NODE_STAT_ITEMS }; diff --git a/mm/vmstat.c b/mm/vmstat.c index 50b4c11fb6121..de547e1c7adc1 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1090,6 +1090,7 @@ const char * const vmstat_text[] = { "nr_vmscan_immediate_reclaim", "nr_dirtied", "nr_written", + "nr_indirectly_reclaimable", /* enum writeback_stat_item counters */ "nr_dirty_threshold", -- GitLab From dc09a5b68d830c2d25c4c1321d3db93b03b9be6a Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 10 Apr 2018 16:27:40 -0700 Subject: [PATCH 0936/2269] mm: treat indirectly reclaimable memory as available in MemAvailable commit 034ebf65c3c21d85b963d39f992258a64a85e3a9 upstream. Adjust /proc/meminfo MemAvailable calculation by adding the amount of indirectly reclaimable memory (rounded to the PAGE_SIZE). Link: http://lkml.kernel.org/r/20180305133743.12746-4-guro@fb.com Signed-off-by: Roman Gushchin Reviewed-by: Andrew Morton Cc: Alexander Viro Cc: Michal Hocko Cc: Johannes Weiner Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 59ccf455fcbd3..a604b5da6755e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4557,6 +4557,13 @@ long si_mem_available(void) min(global_node_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low); + /* + * Part of the kernel memory, which can be released under memory + * pressure. + */ + available += global_node_page_state(NR_INDIRECTLY_RECLAIMABLE_BYTES) >> + PAGE_SHIFT; + if (available < 0) available = 0; return available; -- GitLab From 6d7942377c88ef51783057713f9610aa3e307f24 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 10 Apr 2018 16:27:44 -0700 Subject: [PATCH 0937/2269] dcache: account external names as indirectly reclaimable memory commit f1782c9bc547754f4bd3043fe8cfda53db85f13f upstream. I received a report about suspicious growth of unreclaimable slabs on some machines. I've found that it happens on machines with low memory pressure, and these unreclaimable slabs are external names attached to dentries. External names are allocated using generic kmalloc() function, so they are accounted as unreclaimable. But they are held by dentries, which are reclaimable, and they will be reclaimed under the memory pressure. In particular, this breaks MemAvailable calculation, as it doesn't take unreclaimable slabs into account. This leads to a silly situation, when a machine is almost idle, has no memory pressure and therefore has a big dentry cache. And the resulting MemAvailable is too low to start a new workload. To address the issue, the NR_INDIRECTLY_RECLAIMABLE_BYTES counter is used to track the amount of memory, consumed by external names. The counter is increased in the dentry allocation path, if an external name structure is allocated; and it's decreased in the dentry freeing path. To reproduce the problem I've used the following Python script: import os for iter in range (0, 10000000): try: name = ("/some_long_name_%d" % iter) + "_" * 220 os.stat(name) except Exception: pass Without this patch: $ cat /proc/meminfo | grep MemAvailable MemAvailable: 7811688 kB $ python indirect.py $ cat /proc/meminfo | grep MemAvailable MemAvailable: 2753052 kB With the patch: $ cat /proc/meminfo | grep MemAvailable MemAvailable: 7809516 kB $ python indirect.py $ cat /proc/meminfo | grep MemAvailable MemAvailable: 7749144 kB [guro@fb.com: fix indirectly reclaimable memory accounting for CONFIG_SLOB] Link: http://lkml.kernel.org/r/20180312194140.19517-1-guro@fb.com [guro@fb.com: fix indirectly reclaimable memory accounting] Link: http://lkml.kernel.org/r/20180313125701.7955-1-guro@fb.com Link: http://lkml.kernel.org/r/20180305133743.12746-5-guro@fb.com Signed-off-by: Roman Gushchin Reviewed-by: Andrew Morton Cc: Alexander Viro Cc: Michal Hocko Cc: Johannes Weiner Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index c1a7c174a9050..28b2e770bb69b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -270,11 +270,25 @@ static void __d_free(struct rcu_head *head) kmem_cache_free(dentry_cache, dentry); } +static void __d_free_external_name(struct rcu_head *head) +{ + struct external_name *name = container_of(head, struct external_name, + u.head); + + mod_node_page_state(page_pgdat(virt_to_page(name)), + NR_INDIRECTLY_RECLAIMABLE_BYTES, + -ksize(name)); + + kfree(name); +} + static void __d_free_external(struct rcu_head *head) { struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); - kfree(external_name(dentry)); - kmem_cache_free(dentry_cache, dentry); + + __d_free_external_name(&external_name(dentry)->u.head); + + kmem_cache_free(dentry_cache, dentry); } static inline int dname_external(const struct dentry *dentry) @@ -305,7 +319,7 @@ void release_dentry_name_snapshot(struct name_snapshot *name) struct external_name *p; p = container_of(name->name, struct external_name, name[0]); if (unlikely(atomic_dec_and_test(&p->u.count))) - kfree_rcu(p, u.head); + call_rcu(&p->u.head, __d_free_external_name); } } EXPORT_SYMBOL(release_dentry_name_snapshot); @@ -1605,6 +1619,7 @@ EXPORT_SYMBOL(d_invalidate); struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) { + struct external_name *ext = NULL; struct dentry *dentry; char *dname; int err; @@ -1625,14 +1640,13 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) dname = dentry->d_iname; } else if (name->len > DNAME_INLINE_LEN-1) { size_t size = offsetof(struct external_name, name[1]); - struct external_name *p = kmalloc(size + name->len, - GFP_KERNEL_ACCOUNT); - if (!p) { + ext = kmalloc(size + name->len, GFP_KERNEL_ACCOUNT); + if (!ext) { kmem_cache_free(dentry_cache, dentry); return NULL; } - atomic_set(&p->u.count, 1); - dname = p->name; + atomic_set(&ext->u.count, 1); + dname = ext->name; if (IS_ENABLED(CONFIG_DCACHE_WORD_ACCESS)) kasan_unpoison_shadow(dname, round_up(name->len + 1, sizeof(unsigned long))); @@ -1675,6 +1689,12 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) } } + if (unlikely(ext)) { + pg_data_t *pgdat = page_pgdat(virt_to_page(ext)); + mod_node_page_state(pgdat, NR_INDIRECTLY_RECLAIMABLE_BYTES, + ksize(ext)); + } + this_cpu_inc(nr_dentry); return dentry; @@ -2769,7 +2789,7 @@ static void copy_name(struct dentry *dentry, struct dentry *target) dentry->d_name.hash_len = target->d_name.hash_len; } if (old_name && likely(atomic_dec_and_test(&old_name->u.count))) - kfree_rcu(old_name, u.head); + call_rcu(&old_name->u.head, __d_free_external_name); } static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) -- GitLab From 5de69d648a09acc3c58bcead8cfc9d19356f79e4 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 10 Apr 2018 16:27:47 -0700 Subject: [PATCH 0938/2269] mm: treat indirectly reclaimable memory as free in overcommit logic commit d79f7aa496fc94d763f67b833a1f36f4c171176f upstream. Indirectly reclaimable memory can consume a significant part of total memory and it's actually reclaimable (it will be released under actual memory pressure). So, the overcommit logic should treat it as free. Otherwise, it's possible to cause random system-wide memory allocation failures by consuming a significant amount of memory by indirectly reclaimable memory, e.g. dentry external names. If overcommit policy GUESS is used, it might be used for denial of service attack under some conditions. The following program illustrates the approach. It causes the kernel to allocate an unreclaimable kmalloc-256 chunk for each stat() call, so that at some point the overcommit logic may start blocking large allocation system-wide. int main() { char buf[256]; unsigned long i; struct stat statbuf; buf[0] = '/'; for (i = 1; i < sizeof(buf); i++) buf[i] = '_'; for (i = 0; 1; i++) { sprintf(&buf[248], "%8lu", i); stat(buf, &statbuf); } return 0; } This patch in combination with related indirectly reclaimable memory patches closes this issue. Link: http://lkml.kernel.org/r/20180313130041.8078-1-guro@fb.com Signed-off-by: Roman Gushchin Reviewed-by: Andrew Morton Cc: Alexander Viro Cc: Michal Hocko Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/util.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mm/util.c b/mm/util.c index 34e57fae959de..547e04b5cfff4 100644 --- a/mm/util.c +++ b/mm/util.c @@ -635,6 +635,13 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) */ free += global_node_page_state(NR_SLAB_RECLAIMABLE); + /* + * Part of the kernel memory, which can be released + * under memory pressure. + */ + free += global_node_page_state( + NR_INDIRECTLY_RECLAIMABLE_BYTES) >> PAGE_SHIFT; + /* * Leave reserved pages. The pages are not for anonymous pages. */ -- GitLab From d62b8ac8cd540c39a10d94c1fe5389f994a82e1a Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 11 May 2018 16:01:53 -0700 Subject: [PATCH 0939/2269] mm: don't show nr_indirectly_reclaimable in /proc/vmstat commit 7aaf7727235870f497eb928f728f7773d6df3b40 upstream. Don't show nr_indirectly_reclaimable in /proc/vmstat, because there is no need to export this vm counter to userspace, and some changes are expected in reclaimable object accounting, which can alter this counter. Link: http://lkml.kernel.org/r/20180425191422.9159-1-guro@fb.com Signed-off-by: Roman Gushchin Acked-by: Vlastimil Babka Reviewed-by: Andrew Morton Cc: Matthew Wilcox Cc: Alexander Viro Cc: Michal Hocko Cc: Johannes Weiner Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/vmstat.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index de547e1c7adc1..527ae727d547e 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1090,7 +1090,7 @@ const char * const vmstat_text[] = { "nr_vmscan_immediate_reclaim", "nr_dirtied", "nr_written", - "nr_indirectly_reclaimable", + "", /* nr_indirectly_reclaimable */ /* enum writeback_stat_item counters */ "nr_dirty_threshold", @@ -1673,6 +1673,10 @@ static int vmstat_show(struct seq_file *m, void *arg) unsigned long *l = arg; unsigned long off = l - (unsigned long *)m->private; + /* Skip hidden vmstat items. */ + if (*vmstat_text[off] == '\0') + return 0; + seq_puts(m, vmstat_text[off]); seq_put_decimal_ull(m, " ", *l); seq_putc(m, '\n'); -- GitLab From 1789de335428b57be254175aa5748a7620e5fb97 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Oct 2018 11:31:55 -0400 Subject: [PATCH 0940/2269] ARM: add more CPU part numbers for Cortex and Brahma B15 CPUs Commit f5683e76f35b4ec5891031b6a29036efe0a1ff84 upstream. Add CPU part numbers for Cortex A53, A57, A72, A73, A75 and the Broadcom Brahma B15 CPU. Signed-off-by: Russell King Acked-by: Florian Fainelli Boot-tested-by: Tony Lindgren Reviewed-by: Tony Lindgren Acked-by: Marc Zyngier Signed-off-by: David A. Long Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/cputype.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index 441933311bbf8..3379c2c684c23 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -77,8 +77,16 @@ #define ARM_CPU_PART_CORTEX_A12 0x4100c0d0 #define ARM_CPU_PART_CORTEX_A17 0x4100c0e0 #define ARM_CPU_PART_CORTEX_A15 0x4100c0f0 +#define ARM_CPU_PART_CORTEX_A53 0x4100d030 +#define ARM_CPU_PART_CORTEX_A57 0x4100d070 +#define ARM_CPU_PART_CORTEX_A72 0x4100d080 +#define ARM_CPU_PART_CORTEX_A73 0x4100d090 +#define ARM_CPU_PART_CORTEX_A75 0x4100d0a0 #define ARM_CPU_PART_MASK 0xff00fff0 +/* Broadcom cores */ +#define ARM_CPU_PART_BRAHMA_B15 0x420000f0 + /* DEC implemented cores */ #define ARM_CPU_PART_SA1100 0x4400a110 -- GitLab From 9a42b70744b1620ddfa8b2373a52e1d0552049da Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Oct 2018 11:31:56 -0400 Subject: [PATCH 0941/2269] ARM: bugs: prepare processor bug infrastructure Commit a5b9177f69329314721aa7022b7e69dab23fa1f0 upstream. Prepare the processor bug infrastructure so that it can be expanded to check for per-processor bugs. Signed-off-by: Russell King Reviewed-by: Florian Fainelli Boot-tested-by: Tony Lindgren Reviewed-by: Tony Lindgren Acked-by: Marc Zyngier Signed-off-by: David A. Long Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/bugs.h | 4 ++-- arch/arm/kernel/Makefile | 1 + arch/arm/kernel/bugs.c | 9 +++++++++ 3 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 arch/arm/kernel/bugs.c diff --git a/arch/arm/include/asm/bugs.h b/arch/arm/include/asm/bugs.h index a97f1ea708d19..ed122d294f3f9 100644 --- a/arch/arm/include/asm/bugs.h +++ b/arch/arm/include/asm/bugs.h @@ -10,10 +10,10 @@ #ifndef __ASM_BUGS_H #define __ASM_BUGS_H -#ifdef CONFIG_MMU extern void check_writebuffer_bugs(void); -#define check_bugs() check_writebuffer_bugs() +#ifdef CONFIG_MMU +extern void check_bugs(void); #else #define check_bugs() do { } while (0) #endif diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 499f978fb1fd6..50de918252b71 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -31,6 +31,7 @@ else obj-y += entry-armv.o endif +obj-$(CONFIG_MMU) += bugs.o obj-$(CONFIG_CPU_IDLE) += cpuidle.o obj-$(CONFIG_ISA_DMA_API) += dma.o obj-$(CONFIG_FIQ) += fiq.o fiqasm.o diff --git a/arch/arm/kernel/bugs.c b/arch/arm/kernel/bugs.c new file mode 100644 index 0000000000000..88024028bb704 --- /dev/null +++ b/arch/arm/kernel/bugs.c @@ -0,0 +1,9 @@ +// SPDX-Identifier: GPL-2.0 +#include +#include +#include + +void __init check_bugs(void) +{ + check_writebuffer_bugs(); +} -- GitLab From c7825c277bad3fc28ec29d5d73d54b7f3b12c573 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Oct 2018 11:31:57 -0400 Subject: [PATCH 0942/2269] ARM: bugs: hook processor bug checking into SMP and suspend paths Commit 26602161b5ba795928a5a719fe1d5d9f2ab5c3ef upstream. Check for CPU bugs when secondary processors are being brought online, and also when CPUs are resuming from a low power mode. This gives an opportunity to check that processor specific bug workarounds are correctly enabled for all paths that a CPU re-enters the kernel. Signed-off-by: Russell King Reviewed-by: Florian Fainelli Boot-tested-by: Tony Lindgren Reviewed-by: Tony Lindgren Acked-by: Marc Zyngier Signed-off-by: David A. Long Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/bugs.h | 2 ++ arch/arm/kernel/bugs.c | 5 +++++ arch/arm/kernel/smp.c | 4 ++++ arch/arm/kernel/suspend.c | 2 ++ 4 files changed, 13 insertions(+) diff --git a/arch/arm/include/asm/bugs.h b/arch/arm/include/asm/bugs.h index ed122d294f3f9..73a99c72a930a 100644 --- a/arch/arm/include/asm/bugs.h +++ b/arch/arm/include/asm/bugs.h @@ -14,8 +14,10 @@ extern void check_writebuffer_bugs(void); #ifdef CONFIG_MMU extern void check_bugs(void); +extern void check_other_bugs(void); #else #define check_bugs() do { } while (0) +#define check_other_bugs() do { } while (0) #endif #endif diff --git a/arch/arm/kernel/bugs.c b/arch/arm/kernel/bugs.c index 88024028bb704..16e7ba2a9cc40 100644 --- a/arch/arm/kernel/bugs.c +++ b/arch/arm/kernel/bugs.c @@ -3,7 +3,12 @@ #include #include +void check_other_bugs(void) +{ +} + void __init check_bugs(void) { check_writebuffer_bugs(); + check_other_bugs(); } diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index c9a0a52998279..e61af0600133b 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -402,6 +403,9 @@ asmlinkage void secondary_start_kernel(void) * before we continue - which happens after __cpu_up returns. */ set_cpu_online(cpu, true); + + check_other_bugs(); + complete(&cpu_running); local_irq_enable(); diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c index a40ebb7c0896b..d08099269e35b 100644 --- a/arch/arm/kernel/suspend.c +++ b/arch/arm/kernel/suspend.c @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -36,6 +37,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) cpu_switch_mm(mm->pgd, mm); local_flush_bp_all(); local_flush_tlb_all(); + check_other_bugs(); } return ret; -- GitLab From 0d5360ee15e98cb04ee98b92a90b99cfd3e154a3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Oct 2018 11:31:58 -0400 Subject: [PATCH 0943/2269] ARM: bugs: add support for per-processor bug checking Commit 9d3a04925deeabb97c8e26d940b501a2873e8af3 upstream. Add support for per-processor bug checking - each processor function descriptor gains a function pointer for this check, which must not be an __init function. If non-NULL, this will be called whenever a CPU enters the kernel via which ever path (boot CPU, secondary CPU startup, CPU resuming, etc.) This allows processor specific bug checks to validate that workaround bits are properly enabled by firmware via all entry paths to the kernel. Signed-off-by: Russell King Reviewed-by: Florian Fainelli Boot-tested-by: Tony Lindgren Reviewed-by: Tony Lindgren Acked-by: Marc Zyngier Signed-off-by: David A. Long Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/proc-fns.h | 4 ++++ arch/arm/kernel/bugs.c | 4 ++++ arch/arm/mm/proc-macros.S | 3 ++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index f2e1af45bd6fa..e25f4392e1b28 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -36,6 +36,10 @@ extern struct processor { * Set up any processor specifics */ void (*_proc_init)(void); + /* + * Check for processor bugs + */ + void (*check_bugs)(void); /* * Disable any processor specifics */ diff --git a/arch/arm/kernel/bugs.c b/arch/arm/kernel/bugs.c index 16e7ba2a9cc40..7be5113101915 100644 --- a/arch/arm/kernel/bugs.c +++ b/arch/arm/kernel/bugs.c @@ -5,6 +5,10 @@ void check_other_bugs(void) { +#ifdef MULTI_CPU + if (processor.check_bugs) + processor.check_bugs(); +#endif } void __init check_bugs(void) diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index f10e31d0730af..81d0efb055c66 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -273,13 +273,14 @@ mcr p15, 0, ip, c7, c10, 4 @ data write barrier .endm -.macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0 +.macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0, bugs=0 .type \name\()_processor_functions, #object .align 2 ENTRY(\name\()_processor_functions) .word \dabort .word \pabort .word cpu_\name\()_proc_init + .word \bugs .word cpu_\name\()_proc_fin .word cpu_\name\()_reset .word cpu_\name\()_do_idle -- GitLab From c0f64070a310c9f9c948841c0ec7a9635ad8c08d Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Oct 2018 11:31:59 -0400 Subject: [PATCH 0944/2269] ARM: spectre: add Kconfig symbol for CPUs vulnerable to Spectre Commit c58d237d0852a57fde9bc2c310972e8f4e3d155d upstream. Add a Kconfig symbol for CPUs which are vulnerable to the Spectre attacks. Signed-off-by: Russell King Reviewed-by: Florian Fainelli Boot-tested-by: Tony Lindgren Reviewed-by: Tony Lindgren Acked-by: Marc Zyngier Signed-off-by: David A. Long Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index fd9077a74fce9..575e11d91d8ac 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -415,6 +415,7 @@ config CPU_V7 select CPU_CP15_MPU if !MMU select CPU_HAS_ASID if MMU select CPU_PABRT_V7 + select CPU_SPECTRE if MMU select CPU_THUMB_CAPABLE select CPU_TLB_V7 if MMU @@ -826,6 +827,9 @@ config CPU_BPREDICT_DISABLE help Say Y here to disable branch prediction. If unsure, say N. +config CPU_SPECTRE + bool + config TLS_REG_EMUL bool select NEED_KUSER_HELPERS -- GitLab From 3e52aff79d5eeac0f71acd054997c0cffa8d6c55 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Oct 2018 11:32:00 -0400 Subject: [PATCH 0945/2269] ARM: spectre-v2: harden branch predictor on context switches Commit 06c23f5ffe7ad45b908d0fff604dae08a7e334b9 upstream. Required manual merge of arch/arm/mm/proc-v7.S. Harden the branch predictor against Spectre v2 attacks on context switches for ARMv7 and later CPUs. We do this by: Cortex A9, A12, A17, A73, A75: invalidating the BTB. Cortex A15, Brahma B15: invalidating the instruction cache. Cortex A57 and Cortex A72 are not addressed in this patch. Cortex R7 and Cortex R8 are also not addressed as we do not enforce memory protection on these cores. Signed-off-by: Russell King Boot-tested-by: Tony Lindgren Reviewed-by: Tony Lindgren Acked-by: Marc Zyngier Signed-off-by: David A. Long Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/Kconfig | 19 ++++++ arch/arm/mm/proc-v7-2level.S | 6 -- arch/arm/mm/proc-v7.S | 125 +++++++++++++++++++++++++++-------- 3 files changed, 115 insertions(+), 35 deletions(-) diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 575e11d91d8ac..50e0b45a22dba 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -830,6 +830,25 @@ config CPU_BPREDICT_DISABLE config CPU_SPECTRE bool +config HARDEN_BRANCH_PREDICTOR + bool "Harden the branch predictor against aliasing attacks" if EXPERT + depends on CPU_SPECTRE + default y + help + Speculation attacks against some high-performance processors rely + on being able to manipulate the branch predictor for a victim + context by executing aliasing branches in the attacker context. + Such attacks can be partially mitigated against by clearing + internal branch predictor state and limiting the prediction + logic in some situations. + + This config option will take CPU-specific actions to harden + the branch predictor against aliasing attacks and may rely on + specific instruction sequences or control bits being set by + the system firmware. + + If unsure, say Y. + config TLS_REG_EMUL bool select NEED_KUSER_HELPERS diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index c6141a5435c3c..f8d45ad2a515b 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -41,11 +41,6 @@ * even on Cortex-A8 revisions not affected by 430973. * If IBE is not set, the flush BTAC/BTB won't do anything. */ -ENTRY(cpu_ca8_switch_mm) -#ifdef CONFIG_MMU - mov r2, #0 - mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB -#endif ENTRY(cpu_v7_switch_mm) #ifdef CONFIG_MMU mmid r1, r1 @ get mm->context.id @@ -66,7 +61,6 @@ ENTRY(cpu_v7_switch_mm) #endif bx lr ENDPROC(cpu_v7_switch_mm) -ENDPROC(cpu_ca8_switch_mm) /* * cpu_v7_set_pte_ext(ptep, pte) diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 01d64c0b2563b..53ba29444a2b7 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -93,6 +93,17 @@ ENTRY(cpu_v7_dcache_clean_area) ret lr ENDPROC(cpu_v7_dcache_clean_area) +ENTRY(cpu_v7_iciallu_switch_mm) + mov r3, #0 + mcr p15, 0, r3, c7, c5, 0 @ ICIALLU + b cpu_v7_switch_mm +ENDPROC(cpu_v7_iciallu_switch_mm) +ENTRY(cpu_v7_bpiall_switch_mm) + mov r3, #0 + mcr p15, 0, r3, c7, c5, 6 @ flush BTAC/BTB + b cpu_v7_switch_mm +ENDPROC(cpu_v7_bpiall_switch_mm) + string cpu_v7_name, "ARMv7 Processor" .align @@ -158,31 +169,6 @@ ENTRY(cpu_v7_do_resume) ENDPROC(cpu_v7_do_resume) #endif -/* - * Cortex-A8 - */ - globl_equ cpu_ca8_proc_init, cpu_v7_proc_init - globl_equ cpu_ca8_proc_fin, cpu_v7_proc_fin - globl_equ cpu_ca8_reset, cpu_v7_reset - globl_equ cpu_ca8_do_idle, cpu_v7_do_idle - globl_equ cpu_ca8_dcache_clean_area, cpu_v7_dcache_clean_area - globl_equ cpu_ca8_set_pte_ext, cpu_v7_set_pte_ext - globl_equ cpu_ca8_suspend_size, cpu_v7_suspend_size -#ifdef CONFIG_ARM_CPU_SUSPEND - globl_equ cpu_ca8_do_suspend, cpu_v7_do_suspend - globl_equ cpu_ca8_do_resume, cpu_v7_do_resume -#endif - -/* - * Cortex-A9 processor functions - */ - globl_equ cpu_ca9mp_proc_init, cpu_v7_proc_init - globl_equ cpu_ca9mp_proc_fin, cpu_v7_proc_fin - globl_equ cpu_ca9mp_reset, cpu_v7_reset - globl_equ cpu_ca9mp_do_idle, cpu_v7_do_idle - globl_equ cpu_ca9mp_dcache_clean_area, cpu_v7_dcache_clean_area - globl_equ cpu_ca9mp_switch_mm, cpu_v7_switch_mm - globl_equ cpu_ca9mp_set_pte_ext, cpu_v7_set_pte_ext .globl cpu_ca9mp_suspend_size .equ cpu_ca9mp_suspend_size, cpu_v7_suspend_size + 4 * 2 #ifdef CONFIG_ARM_CPU_SUSPEND @@ -548,10 +534,75 @@ __v7_setup_stack: @ define struct processor (see and proc-macros.S) define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 + +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR + @ generic v7 bpiall on context switch + globl_equ cpu_v7_bpiall_proc_init, cpu_v7_proc_init + globl_equ cpu_v7_bpiall_proc_fin, cpu_v7_proc_fin + globl_equ cpu_v7_bpiall_reset, cpu_v7_reset + globl_equ cpu_v7_bpiall_do_idle, cpu_v7_do_idle + globl_equ cpu_v7_bpiall_dcache_clean_area, cpu_v7_dcache_clean_area + globl_equ cpu_v7_bpiall_set_pte_ext, cpu_v7_set_pte_ext + globl_equ cpu_v7_bpiall_suspend_size, cpu_v7_suspend_size +#ifdef CONFIG_ARM_CPU_SUSPEND + globl_equ cpu_v7_bpiall_do_suspend, cpu_v7_do_suspend + globl_equ cpu_v7_bpiall_do_resume, cpu_v7_do_resume +#endif + define_processor_functions v7_bpiall, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 + +#define HARDENED_BPIALL_PROCESSOR_FUNCTIONS v7_bpiall_processor_functions +#else +#define HARDENED_BPIALL_PROCESSOR_FUNCTIONS v7_processor_functions +#endif + #ifndef CONFIG_ARM_LPAE + @ Cortex-A8 - always needs bpiall switch_mm implementation + globl_equ cpu_ca8_proc_init, cpu_v7_proc_init + globl_equ cpu_ca8_proc_fin, cpu_v7_proc_fin + globl_equ cpu_ca8_reset, cpu_v7_reset + globl_equ cpu_ca8_do_idle, cpu_v7_do_idle + globl_equ cpu_ca8_dcache_clean_area, cpu_v7_dcache_clean_area + globl_equ cpu_ca8_set_pte_ext, cpu_v7_set_pte_ext + globl_equ cpu_ca8_switch_mm, cpu_v7_bpiall_switch_mm + globl_equ cpu_ca8_suspend_size, cpu_v7_suspend_size +#ifdef CONFIG_ARM_CPU_SUSPEND + globl_equ cpu_ca8_do_suspend, cpu_v7_do_suspend + globl_equ cpu_ca8_do_resume, cpu_v7_do_resume +#endif define_processor_functions ca8, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 + + @ Cortex-A9 - needs more registers preserved across suspend/resume + @ and bpiall switch_mm for hardening + globl_equ cpu_ca9mp_proc_init, cpu_v7_proc_init + globl_equ cpu_ca9mp_proc_fin, cpu_v7_proc_fin + globl_equ cpu_ca9mp_reset, cpu_v7_reset + globl_equ cpu_ca9mp_do_idle, cpu_v7_do_idle + globl_equ cpu_ca9mp_dcache_clean_area, cpu_v7_dcache_clean_area +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR + globl_equ cpu_ca9mp_switch_mm, cpu_v7_bpiall_switch_mm +#else + globl_equ cpu_ca9mp_switch_mm, cpu_v7_switch_mm +#endif + globl_equ cpu_ca9mp_set_pte_ext, cpu_v7_set_pte_ext define_processor_functions ca9mp, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 #endif + + @ Cortex-A15 - needs iciallu switch_mm for hardening + globl_equ cpu_ca15_proc_init, cpu_v7_proc_init + globl_equ cpu_ca15_proc_fin, cpu_v7_proc_fin + globl_equ cpu_ca15_reset, cpu_v7_reset + globl_equ cpu_ca15_do_idle, cpu_v7_do_idle + globl_equ cpu_ca15_dcache_clean_area, cpu_v7_dcache_clean_area +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR + globl_equ cpu_ca15_switch_mm, cpu_v7_iciallu_switch_mm +#else + globl_equ cpu_ca15_switch_mm, cpu_v7_switch_mm +#endif + globl_equ cpu_ca15_set_pte_ext, cpu_v7_set_pte_ext + globl_equ cpu_ca15_suspend_size, cpu_v7_suspend_size + globl_equ cpu_ca15_do_suspend, cpu_v7_do_suspend + globl_equ cpu_ca15_do_resume, cpu_v7_do_resume + define_processor_functions ca15, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 #ifdef CONFIG_CPU_PJ4B define_processor_functions pj4b, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 #endif @@ -658,7 +709,7 @@ __v7_ca7mp_proc_info: __v7_ca12mp_proc_info: .long 0x410fc0d0 .long 0xff0ffff0 - __v7_proc __v7_ca12mp_proc_info, __v7_ca12mp_setup + __v7_proc __v7_ca12mp_proc_info, __v7_ca12mp_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS .size __v7_ca12mp_proc_info, . - __v7_ca12mp_proc_info /* @@ -668,7 +719,7 @@ __v7_ca12mp_proc_info: __v7_ca15mp_proc_info: .long 0x410fc0f0 .long 0xff0ffff0 - __v7_proc __v7_ca15mp_proc_info, __v7_ca15mp_setup + __v7_proc __v7_ca15mp_proc_info, __v7_ca15mp_setup, proc_fns = ca15_processor_functions .size __v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info /* @@ -678,7 +729,7 @@ __v7_ca15mp_proc_info: __v7_b15mp_proc_info: .long 0x420f00f0 .long 0xff0ffff0 - __v7_proc __v7_b15mp_proc_info, __v7_b15mp_setup + __v7_proc __v7_b15mp_proc_info, __v7_b15mp_setup, proc_fns = ca15_processor_functions .size __v7_b15mp_proc_info, . - __v7_b15mp_proc_info /* @@ -688,9 +739,25 @@ __v7_b15mp_proc_info: __v7_ca17mp_proc_info: .long 0x410fc0e0 .long 0xff0ffff0 - __v7_proc __v7_ca17mp_proc_info, __v7_ca17mp_setup + __v7_proc __v7_ca17mp_proc_info, __v7_ca17mp_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS .size __v7_ca17mp_proc_info, . - __v7_ca17mp_proc_info + /* ARM Ltd. Cortex A73 processor */ + .type __v7_ca73_proc_info, #object +__v7_ca73_proc_info: + .long 0x410fd090 + .long 0xff0ffff0 + __v7_proc __v7_ca73_proc_info, __v7_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS + .size __v7_ca73_proc_info, . - __v7_ca73_proc_info + + /* ARM Ltd. Cortex A75 processor */ + .type __v7_ca75_proc_info, #object +__v7_ca75_proc_info: + .long 0x410fd0a0 + .long 0xff0ffff0 + __v7_proc __v7_ca75_proc_info, __v7_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS + .size __v7_ca75_proc_info, . - __v7_ca75_proc_info + /* * Qualcomm Inc. Krait processors. */ -- GitLab From 81b215a5b80b32020daf481f9f037942e4552aa7 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Oct 2018 11:32:01 -0400 Subject: [PATCH 0946/2269] ARM: spectre-v2: add Cortex A8 and A15 validation of the IBE bit Commit e388b80288aade31135aca23d32eee93dd106795 upstream. When the branch predictor hardening is enabled, firmware must have set the IBE bit in the auxiliary control register. If this bit has not been set, the Spectre workarounds will not be functional. Add validation that this bit is set, and print a warning at alert level if this is not the case. Signed-off-by: Russell King Reviewed-by: Florian Fainelli Boot-tested-by: Tony Lindgren Reviewed-by: Tony Lindgren Signed-off-by: David A. Long Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/Makefile | 2 +- arch/arm/mm/proc-v7-bugs.c | 36 ++++++++++++++++++++++++++++++++++++ arch/arm/mm/proc-v7.S | 4 ++-- 3 files changed, 39 insertions(+), 3 deletions(-) create mode 100644 arch/arm/mm/proc-v7-bugs.c diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index f353ee569f6ba..93a622a18cbac 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -95,7 +95,7 @@ obj-$(CONFIG_CPU_MOHAWK) += proc-mohawk.o obj-$(CONFIG_CPU_FEROCEON) += proc-feroceon.o obj-$(CONFIG_CPU_V6) += proc-v6.o obj-$(CONFIG_CPU_V6K) += proc-v6.o -obj-$(CONFIG_CPU_V7) += proc-v7.o +obj-$(CONFIG_CPU_V7) += proc-v7.o proc-v7-bugs.o obj-$(CONFIG_CPU_V7M) += proc-v7m.o AFLAGS_proc-v6.o :=-Wa,-march=armv6 diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c new file mode 100644 index 0000000000000..e46557db64465 --- /dev/null +++ b/arch/arm/mm/proc-v7-bugs.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +static __maybe_unused void cpu_v7_check_auxcr_set(bool *warned, + u32 mask, const char *msg) +{ + u32 aux_cr; + + asm("mrc p15, 0, %0, c1, c0, 1" : "=r" (aux_cr)); + + if ((aux_cr & mask) != mask) { + if (!*warned) + pr_err("CPU%u: %s", smp_processor_id(), msg); + *warned = true; + } +} + +static DEFINE_PER_CPU(bool, spectre_warned); + +static void check_spectre_auxcr(bool *warned, u32 bit) +{ + if (IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR) && + cpu_v7_check_auxcr_set(warned, bit, + "Spectre v2: firmware did not set auxiliary control register IBE bit, system vulnerable\n"); +} + +void cpu_v7_ca8_ibe(void) +{ + check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(6)); +} + +void cpu_v7_ca15_ibe(void) +{ + check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(0)); +} diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 53ba29444a2b7..14cb0dbf21d5d 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -569,7 +569,7 @@ __v7_setup_stack: globl_equ cpu_ca8_do_suspend, cpu_v7_do_suspend globl_equ cpu_ca8_do_resume, cpu_v7_do_resume #endif - define_processor_functions ca8, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 + define_processor_functions ca8, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_ca8_ibe @ Cortex-A9 - needs more registers preserved across suspend/resume @ and bpiall switch_mm for hardening @@ -602,7 +602,7 @@ __v7_setup_stack: globl_equ cpu_ca15_suspend_size, cpu_v7_suspend_size globl_equ cpu_ca15_do_suspend, cpu_v7_do_suspend globl_equ cpu_ca15_do_resume, cpu_v7_do_resume - define_processor_functions ca15, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 + define_processor_functions ca15, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_ca15_ibe #ifdef CONFIG_CPU_PJ4B define_processor_functions pj4b, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 #endif -- GitLab From 5ab8c6e8879c3eee7d70714b630a4770ff8a2678 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Oct 2018 11:32:02 -0400 Subject: [PATCH 0947/2269] ARM: spectre-v2: harden user aborts in kernel space Commit f5fe12b1eaee220ce62ff9afb8b90929c396595f upstream. In order to prevent aliasing attacks on the branch predictor, invalidate the BTB or instruction cache on CPUs that are known to be affected when taking an abort on a address that is outside of a user task limit: Cortex A8, A9, A12, A17, A73, A75: flush BTB. Cortex A15, Brahma B15: invalidate icache. If the IBE bit is not set, then there is little point to enabling the workaround. Signed-off-by: Russell King Boot-tested-by: Tony Lindgren Reviewed-by: Tony Lindgren Signed-off-by: David A. Long Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/cp15.h | 3 ++ arch/arm/include/asm/system_misc.h | 15 ++++++ arch/arm/mm/fault.c | 3 ++ arch/arm/mm/proc-v7-bugs.c | 73 ++++++++++++++++++++++++++++-- arch/arm/mm/proc-v7.S | 8 ++-- 5 files changed, 94 insertions(+), 8 deletions(-) diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h index 4c9fa72b59f57..07e27f212dc75 100644 --- a/arch/arm/include/asm/cp15.h +++ b/arch/arm/include/asm/cp15.h @@ -65,6 +65,9 @@ #define __write_sysreg(v, r, w, c, t) asm volatile(w " " c : : "r" ((t)(v))) #define write_sysreg(v, ...) __write_sysreg(v, __VA_ARGS__) +#define BPIALL __ACCESS_CP15(c7, 0, c5, 6) +#define ICIALLU __ACCESS_CP15(c7, 0, c5, 0) + extern unsigned long cr_alignment; /* defined in entry-armv.S */ static inline unsigned long get_cr(void) diff --git a/arch/arm/include/asm/system_misc.h b/arch/arm/include/asm/system_misc.h index 78f6db114faf6..8e76db83c4987 100644 --- a/arch/arm/include/asm/system_misc.h +++ b/arch/arm/include/asm/system_misc.h @@ -8,6 +8,7 @@ #include #include #include +#include extern void cpu_init(void); @@ -15,6 +16,20 @@ void soft_restart(unsigned long); extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); extern void (*arm_pm_idle)(void); +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR +typedef void (*harden_branch_predictor_fn_t)(void); +DECLARE_PER_CPU(harden_branch_predictor_fn_t, harden_branch_predictor_fn); +static inline void harden_branch_predictor(void) +{ + harden_branch_predictor_fn_t fn = per_cpu(harden_branch_predictor_fn, + smp_processor_id()); + if (fn) + fn(); +} +#else +#define harden_branch_predictor() do { } while (0) +#endif + #define UDBG_UNDEFINED (1 << 0) #define UDBG_SYSCALL (1 << 1) #define UDBG_BADABORT (1 << 2) diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 42f585379e19c..49b1b80486358 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -164,6 +164,9 @@ __do_user_fault(struct task_struct *tsk, unsigned long addr, { struct siginfo si; + if (addr > TASK_SIZE) + harden_branch_predictor(); + #ifdef CONFIG_DEBUG_USER if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) || ((user_debug & UDBG_BUS) && (sig == SIGBUS))) { diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index e46557db64465..85a2e3d6263c2 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -2,7 +2,61 @@ #include #include -static __maybe_unused void cpu_v7_check_auxcr_set(bool *warned, +#include +#include +#include + +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR +DEFINE_PER_CPU(harden_branch_predictor_fn_t, harden_branch_predictor_fn); + +static void harden_branch_predictor_bpiall(void) +{ + write_sysreg(0, BPIALL); +} + +static void harden_branch_predictor_iciallu(void) +{ + write_sysreg(0, ICIALLU); +} + +static void cpu_v7_spectre_init(void) +{ + const char *spectre_v2_method = NULL; + int cpu = smp_processor_id(); + + if (per_cpu(harden_branch_predictor_fn, cpu)) + return; + + switch (read_cpuid_part()) { + case ARM_CPU_PART_CORTEX_A8: + case ARM_CPU_PART_CORTEX_A9: + case ARM_CPU_PART_CORTEX_A12: + case ARM_CPU_PART_CORTEX_A17: + case ARM_CPU_PART_CORTEX_A73: + case ARM_CPU_PART_CORTEX_A75: + per_cpu(harden_branch_predictor_fn, cpu) = + harden_branch_predictor_bpiall; + spectre_v2_method = "BPIALL"; + break; + + case ARM_CPU_PART_CORTEX_A15: + case ARM_CPU_PART_BRAHMA_B15: + per_cpu(harden_branch_predictor_fn, cpu) = + harden_branch_predictor_iciallu; + spectre_v2_method = "ICIALLU"; + break; + } + if (spectre_v2_method) + pr_info("CPU%u: Spectre v2: using %s workaround\n", + smp_processor_id(), spectre_v2_method); +} +#else +static void cpu_v7_spectre_init(void) +{ +} +#endif + +static __maybe_unused bool cpu_v7_check_auxcr_set(bool *warned, u32 mask, const char *msg) { u32 aux_cr; @@ -13,24 +67,33 @@ static __maybe_unused void cpu_v7_check_auxcr_set(bool *warned, if (!*warned) pr_err("CPU%u: %s", smp_processor_id(), msg); *warned = true; + return false; } + return true; } static DEFINE_PER_CPU(bool, spectre_warned); -static void check_spectre_auxcr(bool *warned, u32 bit) +static bool check_spectre_auxcr(bool *warned, u32 bit) { - if (IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR) && + return IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR) && cpu_v7_check_auxcr_set(warned, bit, "Spectre v2: firmware did not set auxiliary control register IBE bit, system vulnerable\n"); } void cpu_v7_ca8_ibe(void) { - check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(6)); + if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(6))) + cpu_v7_spectre_init(); } void cpu_v7_ca15_ibe(void) { - check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(0)); + if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(0))) + cpu_v7_spectre_init(); +} + +void cpu_v7_bugs_init(void) +{ + cpu_v7_spectre_init(); } diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 14cb0dbf21d5d..562bcf2657f38 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -532,8 +532,10 @@ __v7_setup_stack: __INITDATA + .weak cpu_v7_bugs_init + @ define struct processor (see and proc-macros.S) - define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 + define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_bugs_init #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR @ generic v7 bpiall on context switch @@ -548,7 +550,7 @@ __v7_setup_stack: globl_equ cpu_v7_bpiall_do_suspend, cpu_v7_do_suspend globl_equ cpu_v7_bpiall_do_resume, cpu_v7_do_resume #endif - define_processor_functions v7_bpiall, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 + define_processor_functions v7_bpiall, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_bugs_init #define HARDENED_BPIALL_PROCESSOR_FUNCTIONS v7_bpiall_processor_functions #else @@ -584,7 +586,7 @@ __v7_setup_stack: globl_equ cpu_ca9mp_switch_mm, cpu_v7_switch_mm #endif globl_equ cpu_ca9mp_set_pte_ext, cpu_v7_set_pte_ext - define_processor_functions ca9mp, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 + define_processor_functions ca9mp, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_bugs_init #endif @ Cortex-A15 - needs iciallu switch_mm for hardening -- GitLab From 510155b2d95b3561ec214a16fa9e28ce10d0d9c7 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Oct 2018 11:32:03 -0400 Subject: [PATCH 0948/2269] ARM: spectre-v2: add firmware based hardening Commit 10115105cb3aa17b5da1cb726ae8dd5f6854bd93 upstream. Add firmware based hardening for cores that require more complex handling in firmware. Signed-off-by: Russell King Boot-tested-by: Tony Lindgren Reviewed-by: Tony Lindgren Reviewed-by: Marc Zyngier Signed-off-by: David A. Long Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/proc-v7-bugs.c | 60 ++++++++++++++++++++++++++++++++++++++ arch/arm/mm/proc-v7.S | 21 +++++++++++++ 2 files changed, 81 insertions(+) diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index 85a2e3d6263c2..da25a38e18976 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -1,14 +1,20 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include +#include #include #include #include +#include #include #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR DEFINE_PER_CPU(harden_branch_predictor_fn_t, harden_branch_predictor_fn); +extern void cpu_v7_smc_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +extern void cpu_v7_hvc_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); + static void harden_branch_predictor_bpiall(void) { write_sysreg(0, BPIALL); @@ -19,6 +25,16 @@ static void harden_branch_predictor_iciallu(void) write_sysreg(0, ICIALLU); } +static void __maybe_unused call_smc_arch_workaround_1(void) +{ + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); +} + +static void __maybe_unused call_hvc_arch_workaround_1(void) +{ + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); +} + static void cpu_v7_spectre_init(void) { const char *spectre_v2_method = NULL; @@ -45,7 +61,51 @@ static void cpu_v7_spectre_init(void) harden_branch_predictor_iciallu; spectre_v2_method = "ICIALLU"; break; + +#ifdef CONFIG_ARM_PSCI + default: + /* Other ARM CPUs require no workaround */ + if (read_cpuid_implementor() == ARM_CPU_IMP_ARM) + break; + /* fallthrough */ + /* Cortex A57/A72 require firmware workaround */ + case ARM_CPU_PART_CORTEX_A57: + case ARM_CPU_PART_CORTEX_A72: { + struct arm_smccc_res res; + + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) + break; + + switch (psci_ops.conduit) { + case PSCI_CONDUIT_HVC: + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + if ((int)res.a0 != 0) + break; + per_cpu(harden_branch_predictor_fn, cpu) = + call_hvc_arch_workaround_1; + processor.switch_mm = cpu_v7_hvc_switch_mm; + spectre_v2_method = "hypervisor"; + break; + + case PSCI_CONDUIT_SMC: + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + if ((int)res.a0 != 0) + break; + per_cpu(harden_branch_predictor_fn, cpu) = + call_smc_arch_workaround_1; + processor.switch_mm = cpu_v7_smc_switch_mm; + spectre_v2_method = "firmware"; + break; + + default: + break; + } } +#endif + } + if (spectre_v2_method) pr_info("CPU%u: Spectre v2: using %s workaround\n", smp_processor_id(), spectre_v2_method); diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 562bcf2657f38..12468d9378d88 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -9,6 +9,7 @@ * * This is the "shell" of the ARMv7 processor support. */ +#include #include #include #include @@ -93,6 +94,26 @@ ENTRY(cpu_v7_dcache_clean_area) ret lr ENDPROC(cpu_v7_dcache_clean_area) +#ifdef CONFIG_ARM_PSCI + .arch_extension sec +ENTRY(cpu_v7_smc_switch_mm) + stmfd sp!, {r0 - r3} + movw r0, #:lower16:ARM_SMCCC_ARCH_WORKAROUND_1 + movt r0, #:upper16:ARM_SMCCC_ARCH_WORKAROUND_1 + smc #0 + ldmfd sp!, {r0 - r3} + b cpu_v7_switch_mm +ENDPROC(cpu_v7_smc_switch_mm) + .arch_extension virt +ENTRY(cpu_v7_hvc_switch_mm) + stmfd sp!, {r0 - r3} + movw r0, #:lower16:ARM_SMCCC_ARCH_WORKAROUND_1 + movt r0, #:upper16:ARM_SMCCC_ARCH_WORKAROUND_1 + hvc #0 + ldmfd sp!, {r0 - r3} + b cpu_v7_switch_mm +ENDPROC(cpu_v7_smc_switch_mm) +#endif ENTRY(cpu_v7_iciallu_switch_mm) mov r3, #0 mcr p15, 0, r3, c7, c5, 0 @ ICIALLU -- GitLab From 6d75fe7ed2f69f5debc35098281516e6737a8229 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Oct 2018 11:32:04 -0400 Subject: [PATCH 0949/2269] ARM: spectre-v2: warn about incorrect context switching functions Commit c44f366ea7c85e1be27d08f2f0880f4120698125 upstream. Warn at error level if the context switching function is not what we are expecting. This can happen with big.Little systems, which we currently do not support. Signed-off-by: Russell King Boot-tested-by: Tony Lindgren Reviewed-by: Tony Lindgren Acked-by: Marc Zyngier Signed-off-by: David A. Long Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/proc-v7-bugs.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index da25a38e18976..5544b82a2e7a5 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -12,6 +12,8 @@ #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR DEFINE_PER_CPU(harden_branch_predictor_fn_t, harden_branch_predictor_fn); +extern void cpu_v7_iciallu_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +extern void cpu_v7_bpiall_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); extern void cpu_v7_smc_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); extern void cpu_v7_hvc_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); @@ -50,6 +52,8 @@ static void cpu_v7_spectre_init(void) case ARM_CPU_PART_CORTEX_A17: case ARM_CPU_PART_CORTEX_A73: case ARM_CPU_PART_CORTEX_A75: + if (processor.switch_mm != cpu_v7_bpiall_switch_mm) + goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = harden_branch_predictor_bpiall; spectre_v2_method = "BPIALL"; @@ -57,6 +61,8 @@ static void cpu_v7_spectre_init(void) case ARM_CPU_PART_CORTEX_A15: case ARM_CPU_PART_BRAHMA_B15: + if (processor.switch_mm != cpu_v7_iciallu_switch_mm) + goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = harden_branch_predictor_iciallu; spectre_v2_method = "ICIALLU"; @@ -82,6 +88,8 @@ static void cpu_v7_spectre_init(void) ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 != 0) break; + if (processor.switch_mm != cpu_v7_hvc_switch_mm && cpu) + goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = call_hvc_arch_workaround_1; processor.switch_mm = cpu_v7_hvc_switch_mm; @@ -93,6 +101,8 @@ static void cpu_v7_spectre_init(void) ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 != 0) break; + if (processor.switch_mm != cpu_v7_smc_switch_mm && cpu) + goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = call_smc_arch_workaround_1; processor.switch_mm = cpu_v7_smc_switch_mm; @@ -109,6 +119,11 @@ static void cpu_v7_spectre_init(void) if (spectre_v2_method) pr_info("CPU%u: Spectre v2: using %s workaround\n", smp_processor_id(), spectre_v2_method); + return; + +bl_error: + pr_err("CPU%u: Spectre v2: incorrect context switching function, system vulnerable\n", + cpu); } #else static void cpu_v7_spectre_init(void) -- GitLab From 75e48eff8aae50cabc9124cb39b31ef94c713c6a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Oct 2018 11:32:05 -0400 Subject: [PATCH 0950/2269] ARM: KVM: invalidate BTB on guest exit for Cortex-A12/A17 Commit 3f7e8e2e1ebda787f156ce46e3f0a9ce2833fa4f upstream. In order to avoid aliasing attacks against the branch predictor, let's invalidate the BTB on guest exit. This is made complicated by the fact that we cannot take a branch before invalidating the BTB. We only apply this to A12 and A17, which are the only two ARM cores on which this useful. Signed-off-by: Marc Zyngier Signed-off-by: Russell King Boot-tested-by: Tony Lindgren Reviewed-by: Tony Lindgren Signed-off-by: David A. Long Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/kvm_asm.h | 2 - arch/arm/include/asm/kvm_mmu.h | 17 +++++++- arch/arm/kvm/hyp/hyp-entry.S | 71 +++++++++++++++++++++++++++++++++- 3 files changed, 85 insertions(+), 5 deletions(-) diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 14d68a4d826f6..b598e666da4c4 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -61,8 +61,6 @@ struct kvm_vcpu; extern char __kvm_hyp_init[]; extern char __kvm_hyp_init_end[]; -extern char __kvm_hyp_vector[]; - extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); extern void __kvm_tlb_flush_vmid(struct kvm *kvm); diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 8a098e65f5f85..85d48c9e1587a 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -246,7 +246,22 @@ static inline int kvm_read_guest_lock(struct kvm *kvm, static inline void *kvm_get_hyp_vector(void) { - return kvm_ksym_ref(__kvm_hyp_vector); + switch(read_cpuid_part()) { +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR + case ARM_CPU_PART_CORTEX_A12: + case ARM_CPU_PART_CORTEX_A17: + { + extern char __kvm_hyp_vector_bp_inv[]; + return kvm_ksym_ref(__kvm_hyp_vector_bp_inv); + } + +#endif + default: + { + extern char __kvm_hyp_vector[]; + return kvm_ksym_ref(__kvm_hyp_vector); + } + } } static inline int kvm_map_vectors(void) diff --git a/arch/arm/kvm/hyp/hyp-entry.S b/arch/arm/kvm/hyp/hyp-entry.S index 95a2faefc070f..e789f52a51290 100644 --- a/arch/arm/kvm/hyp/hyp-entry.S +++ b/arch/arm/kvm/hyp/hyp-entry.S @@ -71,6 +71,66 @@ __kvm_hyp_vector: W(b) hyp_irq W(b) hyp_fiq +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR + .align 5 +__kvm_hyp_vector_bp_inv: + .global __kvm_hyp_vector_bp_inv + + /* + * We encode the exception entry in the bottom 3 bits of + * SP, and we have to guarantee to be 8 bytes aligned. + */ + W(add) sp, sp, #1 /* Reset 7 */ + W(add) sp, sp, #1 /* Undef 6 */ + W(add) sp, sp, #1 /* Syscall 5 */ + W(add) sp, sp, #1 /* Prefetch abort 4 */ + W(add) sp, sp, #1 /* Data abort 3 */ + W(add) sp, sp, #1 /* HVC 2 */ + W(add) sp, sp, #1 /* IRQ 1 */ + W(nop) /* FIQ 0 */ + + mcr p15, 0, r0, c7, c5, 6 /* BPIALL */ + isb + +#ifdef CONFIG_THUMB2_KERNEL + /* + * Yet another silly hack: Use VPIDR as a temp register. + * Thumb2 is really a pain, as SP cannot be used with most + * of the bitwise instructions. The vect_br macro ensures + * things gets cleaned-up. + */ + mcr p15, 4, r0, c0, c0, 0 /* VPIDR */ + mov r0, sp + and r0, r0, #7 + sub sp, sp, r0 + push {r1, r2} + mov r1, r0 + mrc p15, 4, r0, c0, c0, 0 /* VPIDR */ + mrc p15, 0, r2, c0, c0, 0 /* MIDR */ + mcr p15, 4, r2, c0, c0, 0 /* VPIDR */ +#endif + +.macro vect_br val, targ +ARM( eor sp, sp, #\val ) +ARM( tst sp, #7 ) +ARM( eorne sp, sp, #\val ) + +THUMB( cmp r1, #\val ) +THUMB( popeq {r1, r2} ) + + beq \targ +.endm + + vect_br 0, hyp_fiq + vect_br 1, hyp_irq + vect_br 2, hyp_hvc + vect_br 3, hyp_dabt + vect_br 4, hyp_pabt + vect_br 5, hyp_svc + vect_br 6, hyp_undef + vect_br 7, hyp_reset +#endif + .macro invalid_vector label, cause .align \label: mov r0, #\cause @@ -149,7 +209,14 @@ hyp_hvc: bx ip 1: - push {lr} + /* + * Pushing r2 here is just a way of keeping the stack aligned to + * 8 bytes on any path that can trigger a HYP exception. Here, + * we may well be about to jump into the guest, and the guest + * exit would otherwise be badly decoded by our fancy + * "decode-exception-without-a-branch" code... + */ + push {r2, lr} mov lr, r0 mov r0, r1 @@ -159,7 +226,7 @@ hyp_hvc: THUMB( orr lr, #1) blx lr @ Call the HYP function - pop {lr} + pop {r2, lr} eret guest_trap: -- GitLab From 1df9a0a8201b4ba2e58f54c0b88810a8cc4f3fbd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Oct 2018 11:32:06 -0400 Subject: [PATCH 0951/2269] ARM: KVM: invalidate icache on guest exit for Cortex-A15 Commit 0c47ac8cd157727e7a532d665d6fb1b5fd333977 upstream. In order to avoid aliasing attacks against the branch predictor on Cortex-A15, let's invalidate the BTB on guest exit, which can only be done by invalidating the icache (with ACTLR[0] being set). We use the same hack as for A12/A17 to perform the vector decoding. Signed-off-by: Marc Zyngier Signed-off-by: Russell King Boot-tested-by: Tony Lindgren Reviewed-by: Tony Lindgren Signed-off-by: David A. Long Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/kvm_mmu.h | 5 +++++ arch/arm/kvm/hyp/hyp-entry.S | 24 ++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 85d48c9e1587a..082b2861926df 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -255,6 +255,11 @@ static inline void *kvm_get_hyp_vector(void) return kvm_ksym_ref(__kvm_hyp_vector_bp_inv); } + case ARM_CPU_PART_CORTEX_A15: + { + extern char __kvm_hyp_vector_ic_inv[]; + return kvm_ksym_ref(__kvm_hyp_vector_ic_inv); + } #endif default: { diff --git a/arch/arm/kvm/hyp/hyp-entry.S b/arch/arm/kvm/hyp/hyp-entry.S index e789f52a51290..918a05dd2d63b 100644 --- a/arch/arm/kvm/hyp/hyp-entry.S +++ b/arch/arm/kvm/hyp/hyp-entry.S @@ -72,6 +72,28 @@ __kvm_hyp_vector: W(b) hyp_fiq #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR + .align 5 +__kvm_hyp_vector_ic_inv: + .global __kvm_hyp_vector_ic_inv + + /* + * We encode the exception entry in the bottom 3 bits of + * SP, and we have to guarantee to be 8 bytes aligned. + */ + W(add) sp, sp, #1 /* Reset 7 */ + W(add) sp, sp, #1 /* Undef 6 */ + W(add) sp, sp, #1 /* Syscall 5 */ + W(add) sp, sp, #1 /* Prefetch abort 4 */ + W(add) sp, sp, #1 /* Data abort 3 */ + W(add) sp, sp, #1 /* HVC 2 */ + W(add) sp, sp, #1 /* IRQ 1 */ + W(nop) /* FIQ 0 */ + + mcr p15, 0, r0, c7, c5, 0 /* ICIALLU */ + isb + + b decode_vectors + .align 5 __kvm_hyp_vector_bp_inv: .global __kvm_hyp_vector_bp_inv @@ -92,6 +114,8 @@ __kvm_hyp_vector_bp_inv: mcr p15, 0, r0, c7, c5, 6 /* BPIALL */ isb +decode_vectors: + #ifdef CONFIG_THUMB2_KERNEL /* * Yet another silly hack: Use VPIDR as a temp register. -- GitLab From ee4e537d3aa18e9a35fef400961ba0ce8edaf7b9 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Oct 2018 11:32:07 -0400 Subject: [PATCH 0952/2269] ARM: spectre-v2: KVM: invalidate icache on guest exit for Brahma B15 Commit 3c908e16396d130608e831b7fac4b167a2ede6ba upstream. Include Brahma B15 in the Spectre v2 KVM workarounds. Signed-off-by: Russell King Acked-by: Florian Fainelli Boot-tested-by: Tony Lindgren Reviewed-by: Tony Lindgren Acked-by: Marc Zyngier Signed-off-by: David A. Long Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/kvm_mmu.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 082b2861926df..ca62f95f3b4c9 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -255,6 +255,7 @@ static inline void *kvm_get_hyp_vector(void) return kvm_ksym_ref(__kvm_hyp_vector_bp_inv); } + case ARM_CPU_PART_BRAHMA_B15: case ARM_CPU_PART_CORTEX_A15: { extern char __kvm_hyp_vector_ic_inv[]; -- GitLab From 8502541ee21650603972834c3560df9329f3a1a4 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Oct 2018 11:32:08 -0400 Subject: [PATCH 0953/2269] ARM: KVM: Add SMCCC_ARCH_WORKAROUND_1 fast handling Commit b800acfc70d9fb81fbd6df70f2cf5e20f70023d0 upstream. We want SMCCC_ARCH_WORKAROUND_1 to be fast. As fast as possible. So let's intercept it as early as we can by testing for the function call number as soon as we've identified a HVC call coming from the guest. Signed-off-by: Russell King Boot-tested-by: Tony Lindgren Reviewed-by: Tony Lindgren Reviewed-by: Marc Zyngier Signed-off-by: David A. Long Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/hyp/hyp-entry.S | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/arm/kvm/hyp/hyp-entry.S b/arch/arm/kvm/hyp/hyp-entry.S index 918a05dd2d63b..aa3f9a9837aca 100644 --- a/arch/arm/kvm/hyp/hyp-entry.S +++ b/arch/arm/kvm/hyp/hyp-entry.S @@ -16,6 +16,7 @@ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include #include #include #include @@ -202,7 +203,7 @@ hyp_hvc: lsr r2, r2, #16 and r2, r2, #0xff cmp r2, #0 - bne guest_trap @ Guest called HVC + bne guest_hvc_trap @ Guest called HVC /* * Getting here means host called HVC, we shift parameters and branch @@ -253,6 +254,20 @@ THUMB( orr lr, #1) pop {r2, lr} eret +guest_hvc_trap: + movw r2, #:lower16:ARM_SMCCC_ARCH_WORKAROUND_1 + movt r2, #:upper16:ARM_SMCCC_ARCH_WORKAROUND_1 + ldr r0, [sp] @ Guest's r0 + teq r0, r2 + bne guest_trap + add sp, sp, #12 + @ Returns: + @ r0 = 0 + @ r1 = HSR value (perfectly predictable) + @ r2 = ARM_SMCCC_ARCH_WORKAROUND_1 + mov r0, #0 + eret + guest_trap: load_vcpu r0 @ Load VCPU pointer to r0 -- GitLab From e7fc401a8800b9d657e2d1ba7188592b449c9c84 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Oct 2018 11:32:09 -0400 Subject: [PATCH 0954/2269] ARM: KVM: report support for SMCCC_ARCH_WORKAROUND_1 Commit add5609877c6785cc002c6ed7e008b1d61064439 upstream. Report support for SMCCC_ARCH_WORKAROUND_1 to KVM guests for affected CPUs. Signed-off-by: Russell King Boot-tested-by: Tony Lindgren Reviewed-by: Tony Lindgren Reviewed-by: Marc Zyngier Signed-off-by: David A. Long Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/kvm_host.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 65572e14306c8..b602326399845 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -298,8 +299,17 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, static inline bool kvm_arm_harden_branch_predictor(void) { - /* No way to detect it yet, pretend it is not there. */ - return false; + switch(read_cpuid_part()) { +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR + case ARM_CPU_PART_BRAHMA_B15: + case ARM_CPU_PART_CORTEX_A12: + case ARM_CPU_PART_CORTEX_A15: + case ARM_CPU_PART_CORTEX_A17: + return true; +#endif + default: + return false; + } } #define KVM_SSBD_UNKNOWN -1 -- GitLab From f6909113ad1ff2a168643e7fee3136b8adbb4aa9 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Oct 2018 11:32:10 -0400 Subject: [PATCH 0955/2269] ARM: spectre-v1: add speculation barrier (csdb) macros Commit a78d156587931a2c3b354534aa772febf6c9e855 upstream. Add assembly and C macros for the new CSDB instruction. Signed-off-by: Russell King Acked-by: Mark Rutland Boot-tested-by: Tony Lindgren Reviewed-by: Tony Lindgren Signed-off-by: David A. Long Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/assembler.h | 8 ++++++++ arch/arm/include/asm/barrier.h | 13 +++++++++++++ 2 files changed, 21 insertions(+) diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 9342904cccca6..0cd4dccbae788 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -447,6 +447,14 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) .size \name , . - \name .endm + .macro csdb +#ifdef CONFIG_THUMB2_KERNEL + .inst.w 0xf3af8014 +#else + .inst 0xe320f014 +#endif + .endm + .macro check_uaccess, addr:req, size:req, limit:req, tmp:req, bad:req #ifndef CONFIG_CPU_USE_DOMAINS adds \tmp, \addr, #\size - 1 diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index 40f5c410fd8c3..3d9c1d4b7e753 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -17,6 +17,12 @@ #define isb(option) __asm__ __volatile__ ("isb " #option : : : "memory") #define dsb(option) __asm__ __volatile__ ("dsb " #option : : : "memory") #define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory") +#ifdef CONFIG_THUMB2_KERNEL +#define CSDB ".inst.w 0xf3af8014" +#else +#define CSDB ".inst 0xe320f014" +#endif +#define csdb() __asm__ __volatile__(CSDB : : : "memory") #elif defined(CONFIG_CPU_XSC3) || __LINUX_ARM_ARCH__ == 6 #define isb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \ : : "r" (0) : "memory") @@ -37,6 +43,13 @@ #define dmb(x) __asm__ __volatile__ ("" : : : "memory") #endif +#ifndef CSDB +#define CSDB +#endif +#ifndef csdb +#define csdb() +#endif + #ifdef CONFIG_ARM_HEAVY_MB extern void (*soc_mb)(void); extern void arm_heavy_mb(void); -- GitLab From 4186f7cfa1d6083e969638c8e98b205163da7e2a Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Oct 2018 11:32:11 -0400 Subject: [PATCH 0956/2269] ARM: spectre-v1: add array_index_mask_nospec() implementation Commit 1d4238c56f9816ce0f9c8dbe42d7f2ad81cb6613 upstream. Add an implementation of the array_index_mask_nospec() function for mitigating Spectre variant 1 throughout the kernel. Signed-off-by: Russell King Acked-by: Mark Rutland Boot-tested-by: Tony Lindgren Reviewed-by: Tony Lindgren Signed-off-by: David A. Long Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/barrier.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index 3d9c1d4b7e753..69772e742a0ac 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -76,6 +76,25 @@ extern void arm_heavy_mb(void); #define __smp_rmb() __smp_mb() #define __smp_wmb() dmb(ishst) +#ifdef CONFIG_CPU_SPECTRE +static inline unsigned long array_index_mask_nospec(unsigned long idx, + unsigned long sz) +{ + unsigned long mask; + + asm volatile( + "cmp %1, %2\n" + " sbc %0, %1, %1\n" + CSDB + : "=r" (mask) + : "r" (idx), "Ir" (sz) + : "cc"); + + return mask; +} +#define array_index_mask_nospec array_index_mask_nospec +#endif + #include #endif /* !__ASSEMBLY__ */ -- GitLab From b690ec0dc735545bca2b78bee639d1545bda97e6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Oct 2018 11:32:12 -0400 Subject: [PATCH 0957/2269] ARM: spectre-v1: fix syscall entry Commit 10573ae547c85b2c61417ff1a106cffbfceada35 upstream. Prevent speculation at the syscall table decoding by clamping the index used to zero on invalid system call numbers, and using the csdb speculative barrier. Signed-off-by: Russell King Acked-by: Mark Rutland Boot-tested-by: Tony Lindgren Reviewed-by: Tony Lindgren Signed-off-by: David A. Long Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/entry-common.S | 18 +++++++----------- arch/arm/kernel/entry-header.S | 25 +++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 99c908226065c..54c10503d71ff 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -241,9 +241,7 @@ local_restart: tst r10, #_TIF_SYSCALL_WORK @ are we tracing syscalls? bne __sys_trace - cmp scno, #NR_syscalls @ check upper syscall limit - badr lr, ret_fast_syscall @ return address - ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine + invoke_syscall tbl, scno, r10, ret_fast_syscall add r1, sp, #S_OFF 2: cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE) @@ -277,14 +275,8 @@ __sys_trace: mov r1, scno add r0, sp, #S_OFF bl syscall_trace_enter - - badr lr, __sys_trace_return @ return address - mov scno, r0 @ syscall number (possibly new) - add r1, sp, #S_R0 + S_OFF @ pointer to regs - cmp scno, #NR_syscalls @ check upper syscall limit - ldmccia r1, {r0 - r6} @ have to reload r0 - r6 - stmccia sp, {r4, r5} @ and update the stack args - ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine + mov scno, r0 + invoke_syscall tbl, scno, r10, __sys_trace_return, reload=1 cmp scno, #-1 @ skip the syscall? bne 2b add sp, sp, #S_OFF @ restore stack @@ -362,6 +354,10 @@ sys_syscall: bic scno, r0, #__NR_OABI_SYSCALL_BASE cmp scno, #__NR_syscall - __NR_SYSCALL_BASE cmpne scno, #NR_syscalls @ check range +#ifdef CONFIG_CPU_SPECTRE + movhs scno, #0 + csdb +#endif stmloia sp, {r5, r6} @ shuffle args movlo r0, r1 movlo r1, r2 diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 0f07579af472c..773424843d6ef 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -378,6 +378,31 @@ #endif .endm + .macro invoke_syscall, table, nr, tmp, ret, reload=0 +#ifdef CONFIG_CPU_SPECTRE + mov \tmp, \nr + cmp \tmp, #NR_syscalls @ check upper syscall limit + movcs \tmp, #0 + csdb + badr lr, \ret @ return address + .if \reload + add r1, sp, #S_R0 + S_OFF @ pointer to regs + ldmccia r1, {r0 - r6} @ reload r0-r6 + stmccia sp, {r4, r5} @ update stack arguments + .endif + ldrcc pc, [\table, \tmp, lsl #2] @ call sys_* routine +#else + cmp \nr, #NR_syscalls @ check upper syscall limit + badr lr, \ret @ return address + .if \reload + add r1, sp, #S_R0 + S_OFF @ pointer to regs + ldmccia r1, {r0 - r6} @ reload r0-r6 + stmccia sp, {r4, r5} @ update stack arguments + .endif + ldrcc pc, [\table, \nr, lsl #2] @ call sys_* routine +#endif + .endm + /* * These are the registers used in the syscall handler, and allow us to * have in theory up to 7 arguments to a function - r0 to r6. -- GitLab From faac72dc91507049c55f3de6731428f50f33fc88 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Oct 2018 11:32:13 -0400 Subject: [PATCH 0958/2269] ARM: signal: copy registers using __copy_from_user() Commit c32cd419d6650e42b9cdebb83c672ec945e6bd7e upstream. __get_user_error() is used as a fast accessor to make copying structure members in the signal handling path as efficient as possible. However, with software PAN and the recent Spectre variant 1, the efficiency is reduced as these are no longer fast accessors. In the case of software PAN, it has to switch the domain register around each access, and with Spectre variant 1, it would have to repeat the access_ok() check for each access. It becomes much more efficient to use __copy_from_user() instead, so let's use this for the ARM integer registers. Acked-by: Mark Rutland Signed-off-by: Russell King Signed-off-by: David A. Long Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/signal.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index b67ae12503f30..80da0cdac6623 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -184,6 +184,7 @@ struct rt_sigframe { static int restore_sigframe(struct pt_regs *regs, struct sigframe __user *sf) { + struct sigcontext context; char __user *aux; sigset_t set; int err; @@ -192,23 +193,26 @@ static int restore_sigframe(struct pt_regs *regs, struct sigframe __user *sf) if (err == 0) set_current_blocked(&set); - __get_user_error(regs->ARM_r0, &sf->uc.uc_mcontext.arm_r0, err); - __get_user_error(regs->ARM_r1, &sf->uc.uc_mcontext.arm_r1, err); - __get_user_error(regs->ARM_r2, &sf->uc.uc_mcontext.arm_r2, err); - __get_user_error(regs->ARM_r3, &sf->uc.uc_mcontext.arm_r3, err); - __get_user_error(regs->ARM_r4, &sf->uc.uc_mcontext.arm_r4, err); - __get_user_error(regs->ARM_r5, &sf->uc.uc_mcontext.arm_r5, err); - __get_user_error(regs->ARM_r6, &sf->uc.uc_mcontext.arm_r6, err); - __get_user_error(regs->ARM_r7, &sf->uc.uc_mcontext.arm_r7, err); - __get_user_error(regs->ARM_r8, &sf->uc.uc_mcontext.arm_r8, err); - __get_user_error(regs->ARM_r9, &sf->uc.uc_mcontext.arm_r9, err); - __get_user_error(regs->ARM_r10, &sf->uc.uc_mcontext.arm_r10, err); - __get_user_error(regs->ARM_fp, &sf->uc.uc_mcontext.arm_fp, err); - __get_user_error(regs->ARM_ip, &sf->uc.uc_mcontext.arm_ip, err); - __get_user_error(regs->ARM_sp, &sf->uc.uc_mcontext.arm_sp, err); - __get_user_error(regs->ARM_lr, &sf->uc.uc_mcontext.arm_lr, err); - __get_user_error(regs->ARM_pc, &sf->uc.uc_mcontext.arm_pc, err); - __get_user_error(regs->ARM_cpsr, &sf->uc.uc_mcontext.arm_cpsr, err); + err |= __copy_from_user(&context, &sf->uc.uc_mcontext, sizeof(context)); + if (err == 0) { + regs->ARM_r0 = context.arm_r0; + regs->ARM_r1 = context.arm_r1; + regs->ARM_r2 = context.arm_r2; + regs->ARM_r3 = context.arm_r3; + regs->ARM_r4 = context.arm_r4; + regs->ARM_r5 = context.arm_r5; + regs->ARM_r6 = context.arm_r6; + regs->ARM_r7 = context.arm_r7; + regs->ARM_r8 = context.arm_r8; + regs->ARM_r9 = context.arm_r9; + regs->ARM_r10 = context.arm_r10; + regs->ARM_fp = context.arm_fp; + regs->ARM_ip = context.arm_ip; + regs->ARM_sp = context.arm_sp; + regs->ARM_lr = context.arm_lr; + regs->ARM_pc = context.arm_pc; + regs->ARM_cpsr = context.arm_cpsr; + } err |= !valid_user_regs(regs); -- GitLab From 38752f41748728cbd176a50d10f02f1dda1c1a90 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Oct 2018 11:32:14 -0400 Subject: [PATCH 0959/2269] ARM: vfp: use __copy_from_user() when restoring VFP state Commit 42019fc50dfadb219f9e6ddf4c354f3837057d80 upstream. __get_user_error() is used as a fast accessor to make copying structure members in the signal handling path as efficient as possible. However, with software PAN and the recent Spectre variant 1, the efficiency is reduced as these are no longer fast accessors. In the case of software PAN, it has to switch the domain register around each access, and with Spectre variant 1, it would have to repeat the access_ok() check for each access. Use __copy_from_user() rather than __get_user_err() for individual members when restoring VFP state. Acked-by: Mark Rutland Signed-off-by: Russell King Signed-off-by: David A. Long Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/thread_info.h | 4 ++-- arch/arm/kernel/signal.c | 20 ++++++++------------ arch/arm/vfp/vfpmodule.c | 17 +++++++---------- 3 files changed, 17 insertions(+), 24 deletions(-) diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 776757d1604ab..57d2ad9c75cae 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -126,8 +126,8 @@ struct user_vfp_exc; extern int vfp_preserve_user_clear_hwstate(struct user_vfp __user *, struct user_vfp_exc __user *); -extern int vfp_restore_user_hwstate(struct user_vfp __user *, - struct user_vfp_exc __user *); +extern int vfp_restore_user_hwstate(struct user_vfp *, + struct user_vfp_exc *); #endif /* diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 80da0cdac6623..cdfe52b15a0a9 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -149,22 +149,18 @@ static int preserve_vfp_context(struct vfp_sigframe __user *frame) static int restore_vfp_context(char __user **auxp) { - struct vfp_sigframe __user *frame = - (struct vfp_sigframe __user *)*auxp; - unsigned long magic; - unsigned long size; - int err = 0; - - __get_user_error(magic, &frame->magic, err); - __get_user_error(size, &frame->size, err); + struct vfp_sigframe frame; + int err; + err = __copy_from_user(&frame, *auxp, sizeof(frame)); if (err) - return -EFAULT; - if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE) + return err; + + if (frame.magic != VFP_MAGIC || frame.size != VFP_STORAGE_SIZE) return -EINVAL; - *auxp += size; - return vfp_restore_user_hwstate(&frame->ufp, &frame->ufp_exc); + *auxp += sizeof(frame); + return vfp_restore_user_hwstate(&frame.ufp, &frame.ufp_exc); } #endif diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index aa7496be311df..6abcd4af8274a 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -597,13 +597,11 @@ int vfp_preserve_user_clear_hwstate(struct user_vfp __user *ufp, } /* Sanitise and restore the current VFP state from the provided structures. */ -int vfp_restore_user_hwstate(struct user_vfp __user *ufp, - struct user_vfp_exc __user *ufp_exc) +int vfp_restore_user_hwstate(struct user_vfp *ufp, struct user_vfp_exc *ufp_exc) { struct thread_info *thread = current_thread_info(); struct vfp_hard_struct *hwstate = &thread->vfpstate.hard; unsigned long fpexc; - int err = 0; /* Disable VFP to avoid corrupting the new thread state. */ vfp_flush_hwstate(thread); @@ -612,17 +610,16 @@ int vfp_restore_user_hwstate(struct user_vfp __user *ufp, * Copy the floating point registers. There can be unused * registers see asm/hwcap.h for details. */ - err |= __copy_from_user(&hwstate->fpregs, &ufp->fpregs, - sizeof(hwstate->fpregs)); + memcpy(&hwstate->fpregs, &ufp->fpregs, sizeof(hwstate->fpregs)); /* * Copy the status and control register. */ - __get_user_error(hwstate->fpscr, &ufp->fpscr, err); + hwstate->fpscr = ufp->fpscr; /* * Sanitise and restore the exception registers. */ - __get_user_error(fpexc, &ufp_exc->fpexc, err); + fpexc = ufp_exc->fpexc; /* Ensure the VFP is enabled. */ fpexc |= FPEXC_EN; @@ -631,10 +628,10 @@ int vfp_restore_user_hwstate(struct user_vfp __user *ufp, fpexc &= ~(FPEXC_EX | FPEXC_FP2V); hwstate->fpexc = fpexc; - __get_user_error(hwstate->fpinst, &ufp_exc->fpinst, err); - __get_user_error(hwstate->fpinst2, &ufp_exc->fpinst2, err); + hwstate->fpinst = ufp_exc->fpinst; + hwstate->fpinst2 = ufp_exc->fpinst2; - return err ? -EFAULT : 0; + return 0; } /* -- GitLab From 70b96be10d151cf088c991a018889ee76b1c6c0e Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Oct 2018 11:32:15 -0400 Subject: [PATCH 0960/2269] ARM: oabi-compat: copy semops using __copy_from_user() Commit 8c8484a1c18e3231648f5ba7cc5ffb7fd70b3ca4 upstream. __get_user_error() is used as a fast accessor to make copying structure members as efficient as possible. However, with software PAN and the recent Spectre variant 1, the efficiency is reduced as these are no longer fast accessors. In the case of software PAN, it has to switch the domain register around each access, and with Spectre variant 1, it would have to repeat the access_ok() check for each access. Rather than using __get_user_error() to copy each semops element member, copy each semops element in full using __copy_from_user(). Acked-by: Mark Rutland Signed-off-by: Russell King Signed-off-by: David A. Long Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/sys_oabi-compat.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index b9786f491873f..4abe4909417fd 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -329,9 +329,11 @@ asmlinkage long sys_oabi_semtimedop(int semid, return -ENOMEM; err = 0; for (i = 0; i < nsops; i++) { - __get_user_error(sops[i].sem_num, &tsops->sem_num, err); - __get_user_error(sops[i].sem_op, &tsops->sem_op, err); - __get_user_error(sops[i].sem_flg, &tsops->sem_flg, err); + struct oabi_sembuf osb; + err |= __copy_from_user(&osb, tsops, sizeof(osb)); + sops[i].sem_num = osb.sem_num; + sops[i].sem_op = osb.sem_op; + sops[i].sem_flg = osb.sem_flg; tsops++; } if (timeout) { -- GitLab From f64824a3d475b573cbab5c35942223e0474096be Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Oct 2018 11:32:16 -0400 Subject: [PATCH 0961/2269] ARM: use __inttype() in get_user() Commit d09fbb327d670737ab40fd8bbb0765ae06b8b739 upstream. Borrow the x86 implementation of __inttype() to use in get_user() to select an integer type suitable to temporarily hold the result value. This is necessary to avoid propagating the volatile nature of the result argument, which can cause the following warning: lib/iov_iter.c:413:5: warning: optimization may eliminate reads and/or writes to register variables [-Wvolatile-register-var] Acked-by: Mark Rutland Signed-off-by: Russell King Signed-off-by: David A. Long Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/uaccess.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 0bf2347495f13..29fa6f3ea25ab 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -84,6 +84,13 @@ static inline void set_fs(mm_segment_t fs) : "cc"); \ flag; }) +/* + * This is a type: either unsigned long, if the argument fits into + * that type, or otherwise unsigned long long. + */ +#define __inttype(x) \ + __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) + /* * Single-value transfer routines. They automatically use the right * size if we just have the right pointer type. Note that the functions @@ -153,7 +160,7 @@ extern int __get_user_64t_4(void *); ({ \ unsigned long __limit = current_thread_info()->addr_limit - 1; \ register const typeof(*(p)) __user *__p asm("r0") = (p);\ - register typeof(x) __r2 asm("r2"); \ + register __inttype(x) __r2 asm("r2"); \ register unsigned long __l asm("r1") = __limit; \ register int __e asm("r0"); \ unsigned int __ua_flags = uaccess_save_and_enable(); \ -- GitLab From 4a1948d692f13cafaf2ca5c228f789a7ee74f6c7 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Oct 2018 11:32:17 -0400 Subject: [PATCH 0962/2269] ARM: spectre-v1: use get_user() for __get_user() Commit b1cd0a14806321721aae45f5446ed83a3647c914 upstream. Fixing __get_user() for spectre variant 1 is not sane: we would have to add address space bounds checking in order to validate that the location should be accessed, and then zero the address if found to be invalid. Since __get_user() is supposed to avoid the bounds check, and this is exactly what get_user() does, there's no point having two different implementations that are doing the same thing. So, when the Spectre workarounds are required, make __get_user() an alias of get_user(). Acked-by: Mark Rutland Signed-off-by: Russell King Signed-off-by: David A. Long Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/uaccess.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 29fa6f3ea25ab..4140be4310873 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -250,6 +250,16 @@ static inline void set_fs(mm_segment_t fs) #define user_addr_max() \ (uaccess_kernel() ? ~0UL : get_fs()) +#ifdef CONFIG_CPU_SPECTRE +/* + * When mitigating Spectre variant 1, it is not worth fixing the non- + * verifying accessors, because we need to add verification of the + * address space there. Force these to use the standard get_user() + * version instead. + */ +#define __get_user(x, ptr) get_user(x, ptr) +#else + /* * The "__xxx" versions of the user access functions do not verify the * address space - it must have been done previously with a separate @@ -266,12 +276,6 @@ static inline void set_fs(mm_segment_t fs) __gu_err; \ }) -#define __get_user_error(x, ptr, err) \ -({ \ - __get_user_err((x), (ptr), err); \ - (void) 0; \ -}) - #define __get_user_err(x, ptr, err) \ do { \ unsigned long __gu_addr = (unsigned long)(ptr); \ @@ -331,6 +335,7 @@ do { \ #define __get_user_asm_word(x, addr, err) \ __get_user_asm(x, addr, err, ldr) +#endif #define __put_user_switch(x, ptr, __err, __fn) \ -- GitLab From 57bff812c4e2ca91a876f759198f2cd9e15967ad Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 15 Oct 2018 11:32:18 -0400 Subject: [PATCH 0963/2269] ARM: spectre-v1: mitigate user accesses Commit a3c0f84765bb429ba0fd23de1c57b5e1591c9389 upstream. Spectre variant 1 attacks are about this sequence of pseudo-code: index = load(user-manipulated pointer); access(base + index * stride); In order for the cache side-channel to work, the access() must me made to memory which userspace can detect whether cache lines have been loaded. On 32-bit ARM, this must be either user accessible memory, or a kernel mapping of that same user accessible memory. The problem occurs when the load() speculatively loads privileged data, and the subsequent access() is made to user accessible memory. Any load() which makes use of a user-maniplated pointer is a potential problem if the data it has loaded is used in a subsequent access. This also applies for the access() if the data loaded by that access is used by a subsequent access. Harden the get_user() accessors against Spectre attacks by forcing out of bounds addresses to a NULL pointer. This prevents get_user() being used as the load() step above. As a side effect, put_user() will also be affected even though it isn't implicated. Also harden copy_from_user() by redoing the bounds check within the arm_copy_from_user() code, and NULLing the pointer if out of bounds. Acked-by: Mark Rutland Signed-off-by: Russell King Signed-off-by: David A. Long Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/assembler.h | 4 ++++ arch/arm/lib/copy_from_user.S | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 0cd4dccbae788..b17ee03d280b6 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -460,6 +460,10 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) adds \tmp, \addr, #\size - 1 sbcccs \tmp, \tmp, \limit bcs \bad +#ifdef CONFIG_CPU_SPECTRE + movcs \addr, #0 + csdb +#endif #endif .endm diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S index 7a4b060490012..a826df3d3814b 100644 --- a/arch/arm/lib/copy_from_user.S +++ b/arch/arm/lib/copy_from_user.S @@ -90,6 +90,15 @@ .text ENTRY(arm_copy_from_user) +#ifdef CONFIG_CPU_SPECTRE + get_thread_info r3 + ldr r3, [r3, #TI_ADDR_LIMIT] + adds ip, r1, r2 @ ip=addr+size + sub r3, r3, #1 @ addr_limit - 1 + cmpcc ip, r3 @ if (addr+size > addr_limit - 1) + movcs r1, #0 @ addr = NULL + csdb +#endif #include "copy_template.S" -- GitLab From d0c9f9f9fb446e31b36d76e9a47001188f961a44 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 19 Mar 2018 09:29:01 +0100 Subject: [PATCH 0964/2269] perf tools: Fix snprint warnings for gcc 8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 77f18153c080855e1c3fb520ca31a4e61530121d upstream. With gcc 8 we get new set of snprintf() warnings that breaks the compilation, one example: tests/mem.c: In function ‘check’: tests/mem.c:19:48: error: ‘%s’ directive output may be truncated writing \ up to 99 bytes into a region of size 89 [-Werror=format-truncation=] snprintf(failure, sizeof failure, "unexpected %s", out); The gcc docs says: To avoid the warning either use a bigger buffer or handle the function's return value which indicates whether or not its output has been truncated. Given that all these warnings are harmless, because the code either properly fails due to uncomplete file path or we don't care for truncated output at all, I'm changing all those snprintf() calls to scnprintf(), which actually 'checks' for the snprint return value so the gcc stays silent. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: David Ahern Cc: Josh Poimboeuf Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sergey Senozhatsky Link: http://lkml.kernel.org/r/20180319082902.4518-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Cc: Ignat Korchagin Signed-off-by: Greg Kroah-Hartman --- tools/perf/builtin-script.c | 22 +++++++++++----------- tools/perf/tests/attr.c | 4 ++-- tools/perf/tests/mem.c | 2 +- tools/perf/tests/pmu.c | 2 +- tools/perf/util/cgroup.c | 2 +- tools/perf/util/parse-events.c | 4 ++-- tools/perf/util/pmu.c | 2 +- 7 files changed, 19 insertions(+), 19 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index e37653b0f2d0c..76789523429ad 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2304,8 +2304,8 @@ static int list_available_scripts(const struct option *opt __maybe_unused, } for_each_lang(scripts_path, scripts_dir, lang_dirent) { - snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, - lang_dirent->d_name); + scnprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, + lang_dirent->d_name); lang_dir = opendir(lang_path); if (!lang_dir) continue; @@ -2314,8 +2314,8 @@ static int list_available_scripts(const struct option *opt __maybe_unused, script_root = get_script_root(script_dirent, REPORT_SUFFIX); if (script_root) { desc = script_desc__findnew(script_root); - snprintf(script_path, MAXPATHLEN, "%s/%s", - lang_path, script_dirent->d_name); + scnprintf(script_path, MAXPATHLEN, "%s/%s", + lang_path, script_dirent->d_name); read_script_info(desc, script_path); free(script_root); } @@ -2351,7 +2351,7 @@ static int check_ev_match(char *dir_name, char *scriptname, int match, len; FILE *fp; - sprintf(filename, "%s/bin/%s-record", dir_name, scriptname); + scnprintf(filename, MAXPATHLEN, "%s/bin/%s-record", dir_name, scriptname); fp = fopen(filename, "r"); if (!fp) @@ -2427,8 +2427,8 @@ int find_scripts(char **scripts_array, char **scripts_path_array) } for_each_lang(scripts_path, scripts_dir, lang_dirent) { - snprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path, - lang_dirent->d_name); + scnprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path, + lang_dirent->d_name); #ifdef NO_LIBPERL if (strstr(lang_path, "perl")) continue; @@ -2483,8 +2483,8 @@ static char *get_script_path(const char *script_root, const char *suffix) return NULL; for_each_lang(scripts_path, scripts_dir, lang_dirent) { - snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, - lang_dirent->d_name); + scnprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, + lang_dirent->d_name); lang_dir = opendir(lang_path); if (!lang_dir) continue; @@ -2495,8 +2495,8 @@ static char *get_script_path(const char *script_root, const char *suffix) free(__script_root); closedir(lang_dir); closedir(scripts_dir); - snprintf(script_path, MAXPATHLEN, "%s/%s", - lang_path, script_dirent->d_name); + scnprintf(script_path, MAXPATHLEN, "%s/%s", + lang_path, script_dirent->d_name); return strdup(script_path); } free(__script_root); diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index 0e1367f90af53..60fea0a376fc8 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -164,8 +164,8 @@ static int run_dir(const char *d, const char *perf) if (verbose > 0) vcnt++; - snprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s", - d, d, perf, vcnt, v); + scnprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s", + d, d, perf, vcnt, v); return system(cmd) ? TEST_FAIL : TEST_OK; } diff --git a/tools/perf/tests/mem.c b/tools/perf/tests/mem.c index 21952e1e6e6d8..0f82ee9fd3f72 100644 --- a/tools/perf/tests/mem.c +++ b/tools/perf/tests/mem.c @@ -16,7 +16,7 @@ static int check(union perf_mem_data_src data_src, n = perf_mem__snp_scnprintf(out, sizeof out, &mi); n += perf_mem__lvl_scnprintf(out + n, sizeof out - n, &mi); - snprintf(failure, sizeof failure, "unexpected %s", out); + scnprintf(failure, sizeof failure, "unexpected %s", out); TEST_ASSERT_VAL(failure, !strcmp(string, out)); return 0; } diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c index 9abca267afa91..7bedf8608fdde 100644 --- a/tools/perf/tests/pmu.c +++ b/tools/perf/tests/pmu.c @@ -98,7 +98,7 @@ static char *test_format_dir_get(void) struct test_format *format = &test_formats[i]; FILE *file; - snprintf(name, PATH_MAX, "%s/%s", dir, format->name); + scnprintf(name, PATH_MAX, "%s/%s", dir, format->name); file = fopen(name, "w"); if (!file) diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index d9ffc1e6eb399..ce6bcb0a5368e 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -78,7 +78,7 @@ static int open_cgroup(char *name) if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1)) return -1; - snprintf(path, PATH_MAX, "%s/%s", mnt, name); + scnprintf(path, PATH_MAX, "%s/%s", mnt, name); fd = open(path, O_RDONLY); if (fd == -1) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index b25635e945f30..53f620472151f 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -202,8 +202,8 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) for_each_event(sys_dirent, evt_dir, evt_dirent) { - snprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path, - evt_dirent->d_name); + scnprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path, + evt_dirent->d_name); fd = open(evt_path, O_RDONLY); if (fd < 0) continue; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 9dff41bcc776a..d87d458996b7a 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -349,7 +349,7 @@ static int pmu_aliases_parse(char *dir, struct list_head *head) if (pmu_alias_info_file(name)) continue; - snprintf(path, PATH_MAX, "%s/%s", dir, name); + scnprintf(path, PATH_MAX, "%s/%s", dir, name); file = fopen(path, "r"); if (!file) { -- GitLab From 8263087bf62739362d50ec965c8c34fe3ee7a7cd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 18 Oct 2018 09:16:28 +0200 Subject: [PATCH 0965/2269] Linux 4.14.77 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 332dd011b3b93..16d1a18496fb1 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 76 +SUBLEVEL = 77 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 88be243901ff08770981d497b188a8a323cd7f56 Mon Sep 17 00:00:00 2001 From: Jozef Balga Date: Tue, 21 Aug 2018 05:01:04 -0400 Subject: [PATCH 0966/2269] media: af9035: prevent buffer overflow on write [ Upstream commit 312f73b648626a0526a3aceebb0a3192aaba05ce ] When less than 3 bytes are written to the device, memcpy is called with negative array size which leads to buffer overflow and kernel panic. This patch adds a condition and returns -EOPNOTSUPP instead. Fixes bugzilla issue 64871 [mchehab+samsung@kernel.org: fix a merge conflict and changed the condition to match the patch's comment, e. g. len == 3 could also be valid] Signed-off-by: Jozef Balga Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb-v2/af9035.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/media/usb/dvb-usb-v2/af9035.c b/drivers/media/usb/dvb-usb-v2/af9035.c index 666d319d3d1ab..1f6c1eefe3892 100644 --- a/drivers/media/usb/dvb-usb-v2/af9035.c +++ b/drivers/media/usb/dvb-usb-v2/af9035.c @@ -402,8 +402,10 @@ static int af9035_i2c_master_xfer(struct i2c_adapter *adap, if (msg[0].addr == state->af9033_i2c_addr[1]) reg |= 0x100000; - ret = af9035_wr_regs(d, reg, &msg[0].buf[3], - msg[0].len - 3); + ret = (msg[0].len >= 3) ? af9035_wr_regs(d, reg, + &msg[0].buf[3], + msg[0].len - 3) + : -EOPNOTSUPP; } else { /* I2C write */ u8 buf[MAX_XFER_SIZE]; -- GitLab From fa04cc5e7b817ddf11b6d174de0cda257ae33aa6 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 31 Aug 2018 15:08:44 +0200 Subject: [PATCH 0967/2269] batman-adv: Avoid probe ELP information leak [ Upstream commit 88d0895d0ea9d4431507d576c963f2ff9918144d ] The probe ELPs for WiFi interfaces are expanded to contain at least BATADV_ELP_MIN_PROBE_SIZE bytes. This is usually a lot more than the number of bytes which the template ELP packet requires. These extra padding bytes were not initialized and thus could contain data which were previously stored at the same location. It is therefore required to set it to some predefined or random values to avoid leaking private information from the system transmitting these kind of packets. Fixes: e4623c913508 ("batman-adv: Avoid probe ELP information leak") Signed-off-by: Sven Eckelmann Acked-by: Antonio Quartulli Signed-off-by: Simon Wunderlich Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bat_v_elp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index bd1064d98e16a..bfc49d48eb57d 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -227,7 +227,7 @@ batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh) * the packet to be exactly of that size to make the link * throughput estimation effective. */ - skb_put(skb, probe_len - hard_iface->bat_v.elp_skb->len); + skb_put_zero(skb, probe_len - hard_iface->bat_v.elp_skb->len); batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Sending unicast (probe) ELP packet on interface %s to %pM\n", -- GitLab From acdaf28d8cf511142a2e3c6ec1c3c9e65852a1d3 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 31 Aug 2018 16:46:47 +0200 Subject: [PATCH 0968/2269] batman-adv: Fix segfault when writing to throughput_override [ Upstream commit b9fd14c20871e6189f635e49b32d7789e430b3c8 ] The per hardif sysfs file "batman_adv/throughput_override" prints the resulting change as info text when the users writes to this file. It uses the helper function batadv_info to add it at the same time to the kernel ring buffer and to the batman-adv debug log (when CONFIG_BATMAN_ADV_DEBUG is enabled). The function batadv_info requires as first parameter the batman-adv softif net_device. This parameter is then used to find the private buffer which contains the debug log for this batman-adv interface. But batadv_store_throughput_override used as first argument the slave net_device. This slave device doesn't have the batadv_priv private data which is access by batadv_info. Writing to this file with CONFIG_BATMAN_ADV_DEBUG enabled can either lead to a segfault or to memory corruption. Fixes: 0b5ecc6811bd ("batman-adv: add throughput override attribute to hard_ifaces") Signed-off-by: Sven Eckelmann Acked-by: Marek Lindner Signed-off-by: Simon Wunderlich Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/sysfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 0ae8b30e4eaaf..ec412c5b86974 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -1078,8 +1078,9 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj, if (old_tp_override == tp_override) goto out; - batadv_info(net_dev, "%s: Changing from: %u.%u MBit to: %u.%u MBit\n", - "throughput_override", + batadv_info(hard_iface->soft_iface, + "%s: %s: Changing from: %u.%u MBit to: %u.%u MBit\n", + "throughput_override", net_dev->name, old_tp_override / 10, old_tp_override % 10, tp_override / 10, tp_override % 10); -- GitLab From c553f7e514b9b1f83275c668cd6f10dca71f1eb5 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 31 Aug 2018 16:56:29 +0200 Subject: [PATCH 0969/2269] batman-adv: Fix segfault when writing to sysfs elp_interval [ Upstream commit a25bab9d723a08bd0bdafb1529faf9094c690b70 ] The per hardif sysfs file "batman_adv/elp_interval" is using the generic functions to store/show uint values. The helper __batadv_store_uint_attr requires the softif net_device as parameter to print the resulting change as info text when the users writes to this file. It uses the helper function batadv_info to add it at the same time to the kernel ring buffer and to the batman-adv debug log (when CONFIG_BATMAN_ADV_DEBUG is enabled). The function batadv_info requires as first parameter the batman-adv softif net_device. This parameter is then used to find the private buffer which contains the debug log for this batman-adv interface. But batadv_store_throughput_override used as first argument the slave net_device. This slave device doesn't have the batadv_priv private data which is access by batadv_info. Writing to this file with CONFIG_BATMAN_ADV_DEBUG enabled can either lead to a segfault or to memory corruption. Fixes: 0744ff8fa8fa ("batman-adv: Add hard_iface specific sysfs wrapper macros for UINT") Signed-off-by: Sven Eckelmann Acked-by: Marek Lindner Signed-off-by: Simon Wunderlich Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/sysfs.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index ec412c5b86974..2ef9b136fc394 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -186,7 +186,8 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ \ return __batadv_store_uint_attr(buff, count, _min, _max, \ _post_func, attr, \ - &bat_priv->_var, net_dev); \ + &bat_priv->_var, net_dev, \ + NULL); \ } #define BATADV_ATTR_SIF_SHOW_UINT(_name, _var) \ @@ -260,7 +261,9 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ \ length = __batadv_store_uint_attr(buff, count, _min, _max, \ _post_func, attr, \ - &hard_iface->_var, net_dev); \ + &hard_iface->_var, \ + hard_iface->soft_iface, \ + net_dev); \ \ batadv_hardif_put(hard_iface); \ return length; \ @@ -354,10 +357,12 @@ __batadv_store_bool_attr(char *buff, size_t count, static int batadv_store_uint_attr(const char *buff, size_t count, struct net_device *net_dev, + struct net_device *slave_dev, const char *attr_name, unsigned int min, unsigned int max, atomic_t *attr) { + char ifname[IFNAMSIZ + 3] = ""; unsigned long uint_val; int ret; @@ -383,8 +388,11 @@ static int batadv_store_uint_attr(const char *buff, size_t count, if (atomic_read(attr) == uint_val) return count; - batadv_info(net_dev, "%s: Changing from: %i to: %lu\n", - attr_name, atomic_read(attr), uint_val); + if (slave_dev) + snprintf(ifname, sizeof(ifname), "%s: ", slave_dev->name); + + batadv_info(net_dev, "%s: %sChanging from: %i to: %lu\n", + attr_name, ifname, atomic_read(attr), uint_val); atomic_set(attr, uint_val); return count; @@ -395,12 +403,13 @@ static ssize_t __batadv_store_uint_attr(const char *buff, size_t count, void (*post_func)(struct net_device *), const struct attribute *attr, atomic_t *attr_store, - struct net_device *net_dev) + struct net_device *net_dev, + struct net_device *slave_dev) { int ret; - ret = batadv_store_uint_attr(buff, count, net_dev, attr->name, min, max, - attr_store); + ret = batadv_store_uint_attr(buff, count, net_dev, slave_dev, + attr->name, min, max, attr_store); if (post_func && ret) post_func(net_dev); @@ -569,7 +578,7 @@ static ssize_t batadv_store_gw_sel_class(struct kobject *kobj, return __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE, batadv_post_gw_reselect, attr, &bat_priv->gw.sel_class, - bat_priv->soft_iface); + bat_priv->soft_iface, NULL); } static ssize_t batadv_show_gw_bwidth(struct kobject *kobj, -- GitLab From 3b38bed3304d6ac7bee3b23a67b5f3173fbb2f6e Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 12 Aug 2018 21:04:41 +0200 Subject: [PATCH 0970/2269] batman-adv: Prevent duplicated gateway_node entry [ Upstream commit dff9bc42ab0b2d38c5e90ddd79b238fed5b4c7ad ] The function batadv_gw_node_add is responsible for adding new gw_node to the gateway_list. It is expecting that the caller already checked that there is not already an entry with the same key or not. But the lock for the list is only held when the list is really modified. This could lead to duplicated entries because another context could create an entry with the same key between the check and the list manipulation. The check and the manipulation of the list must therefore be in the same locked code section. Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Sven Eckelmann Acked-by: Marek Lindner Signed-off-by: Simon Wunderlich Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/gateway_client.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 06276ae9f7529..c6a7341f05270 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -325,6 +326,9 @@ out: * @bat_priv: the bat priv with all the soft interface information * @orig_node: originator announcing gateway capabilities * @gateway: announced bandwidth information + * + * Has to be called with the appropriate locks being acquired + * (gw.list_lock). */ static void batadv_gw_node_add(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, @@ -332,6 +336,8 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, { struct batadv_gw_node *gw_node; + lockdep_assert_held(&bat_priv->gw.list_lock); + if (gateway->bandwidth_down == 0) return; @@ -346,10 +352,8 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, gw_node->bandwidth_down = ntohl(gateway->bandwidth_down); gw_node->bandwidth_up = ntohl(gateway->bandwidth_up); - spin_lock_bh(&bat_priv->gw.list_lock); kref_get(&gw_node->refcount); hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.gateway_list); - spin_unlock_bh(&bat_priv->gw.list_lock); batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Found new gateway %pM -> gw bandwidth: %u.%u/%u.%u MBit\n", @@ -405,11 +409,14 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv, { struct batadv_gw_node *gw_node, *curr_gw = NULL; + spin_lock_bh(&bat_priv->gw.list_lock); gw_node = batadv_gw_node_get(bat_priv, orig_node); if (!gw_node) { batadv_gw_node_add(bat_priv, orig_node, gateway); + spin_unlock_bh(&bat_priv->gw.list_lock); goto out; } + spin_unlock_bh(&bat_priv->gw.list_lock); if ((gw_node->bandwidth_down == ntohl(gateway->bandwidth_down)) && (gw_node->bandwidth_up == ntohl(gateway->bandwidth_up))) -- GitLab From 18733cfe4126eef747ff5dee6884290f386ce07c Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 12 Aug 2018 21:04:42 +0200 Subject: [PATCH 0971/2269] batman-adv: Prevent duplicated nc_node entry [ Upstream commit fa122fec8640eb7186ce5a41b83a4c1744ceef8f ] The function batadv_nc_get_nc_node is responsible for adding new nc_nodes to the in_coding_list and out_coding_list. It first checks whether the entry already is in the list or not. If it is, then the creation of a new entry is aborted. But the lock for the list is only held when the list is really modified. This could lead to duplicated entries because another context could create an entry with the same key between the check and the list manipulation. The check and the manipulation of the list must therefore be in the same locked code section. Fixes: d56b1705e28c ("batman-adv: network coding - detect coding nodes and remove these after timeout") Signed-off-by: Sven Eckelmann Acked-by: Marek Lindner Signed-off-by: Simon Wunderlich Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/network-coding.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 3604d7899e2cb..7a7dcac205667 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -850,16 +850,27 @@ batadv_nc_get_nc_node(struct batadv_priv *bat_priv, spinlock_t *lock; /* Used to lock list selected by "int in_coding" */ struct list_head *list; + /* Select ingoing or outgoing coding node */ + if (in_coding) { + lock = &orig_neigh_node->in_coding_list_lock; + list = &orig_neigh_node->in_coding_list; + } else { + lock = &orig_neigh_node->out_coding_list_lock; + list = &orig_neigh_node->out_coding_list; + } + + spin_lock_bh(lock); + /* Check if nc_node is already added */ nc_node = batadv_nc_find_nc_node(orig_node, orig_neigh_node, in_coding); /* Node found */ if (nc_node) - return nc_node; + goto unlock; nc_node = kzalloc(sizeof(*nc_node), GFP_ATOMIC); if (!nc_node) - return NULL; + goto unlock; /* Initialize nc_node */ INIT_LIST_HEAD(&nc_node->list); @@ -868,22 +879,14 @@ batadv_nc_get_nc_node(struct batadv_priv *bat_priv, kref_get(&orig_neigh_node->refcount); nc_node->orig_node = orig_neigh_node; - /* Select ingoing or outgoing coding node */ - if (in_coding) { - lock = &orig_neigh_node->in_coding_list_lock; - list = &orig_neigh_node->in_coding_list; - } else { - lock = &orig_neigh_node->out_coding_list_lock; - list = &orig_neigh_node->out_coding_list; - } - batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_node %pM -> %pM\n", nc_node->addr, nc_node->orig_node->orig); /* Add nc_node to orig_node */ - spin_lock_bh(lock); kref_get(&nc_node->refcount); list_add_tail_rcu(&nc_node->list, list); + +unlock: spin_unlock_bh(lock); return nc_node; -- GitLab From 2a544b2da44b1aa330a6d0448f92cc4be1380ba7 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 12 Aug 2018 21:04:43 +0200 Subject: [PATCH 0972/2269] batman-adv: Prevent duplicated softif_vlan entry [ Upstream commit 94cb82f594ed86be303398d6dfc7640a6f1d45d4 ] The function batadv_softif_vlan_get is responsible for adding new softif_vlan to the softif_vlan_list. It first checks whether the entry already is in the list or not. If it is, then the creation of a new entry is aborted. But the lock for the list is only held when the list is really modified. This could lead to duplicated entries because another context could create an entry with the same key between the check and the list manipulation. The check and the manipulation of the list must therefore be in the same locked code section. Fixes: 5d2c05b21337 ("batman-adv: add per VLAN interface attribute framework") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/soft-interface.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index aa2c49fa31cec..8cedb5db1ab31 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -566,15 +566,20 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) struct batadv_softif_vlan *vlan; int err; + spin_lock_bh(&bat_priv->softif_vlan_list_lock); + vlan = batadv_softif_vlan_get(bat_priv, vid); if (vlan) { batadv_softif_vlan_put(vlan); + spin_unlock_bh(&bat_priv->softif_vlan_list_lock); return -EEXIST; } vlan = kzalloc(sizeof(*vlan), GFP_ATOMIC); - if (!vlan) + if (!vlan) { + spin_unlock_bh(&bat_priv->softif_vlan_list_lock); return -ENOMEM; + } vlan->bat_priv = bat_priv; vlan->vid = vid; @@ -582,17 +587,23 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) atomic_set(&vlan->ap_isolation, 0); + kref_get(&vlan->refcount); + hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list); + spin_unlock_bh(&bat_priv->softif_vlan_list_lock); + + /* batadv_sysfs_add_vlan cannot be in the spinlock section due to the + * sleeping behavior of the sysfs functions and the fs_reclaim lock + */ err = batadv_sysfs_add_vlan(bat_priv->soft_iface, vlan); if (err) { - kfree(vlan); + /* ref for the function */ + batadv_softif_vlan_put(vlan); + + /* ref for the list */ + batadv_softif_vlan_put(vlan); return err; } - spin_lock_bh(&bat_priv->softif_vlan_list_lock); - kref_get(&vlan->refcount); - hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list); - spin_unlock_bh(&bat_priv->softif_vlan_list_lock); - /* add a new TT local entry. This one will be marked with the NOPURGE * flag */ -- GitLab From b46fd722b7bc1ad491f5d7df681c73111c1bb2cf Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 12 Aug 2018 21:04:44 +0200 Subject: [PATCH 0973/2269] batman-adv: Prevent duplicated global TT entry [ Upstream commit e7136e48ffdfb9f37b0820f619380485eb407361 ] The function batadv_tt_global_orig_entry_add is responsible for adding new tt_orig_list_entry to the orig_list. It first checks whether the entry already is in the list or not. If it is, then the creation of a new entry is aborted. But the lock for the list is only held when the list is really modified. This could lead to duplicated entries because another context could create an entry with the same key between the check and the list manipulation. The check and the manipulation of the list must therefore be in the same locked code section. Fixes: d657e621a0f5 ("batman-adv: add reference counting for type batadv_tt_orig_list_entry") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/translation-table.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 0f4d4eece3e4d..9da3455847ffa 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1587,6 +1587,8 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, { struct batadv_tt_orig_list_entry *orig_entry; + spin_lock_bh(&tt_global->list_lock); + orig_entry = batadv_tt_global_orig_entry_find(tt_global, orig_node); if (orig_entry) { /* refresh the ttvn: the current value could be a bogus one that @@ -1609,11 +1611,9 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, orig_entry->flags = flags; kref_init(&orig_entry->refcount); - spin_lock_bh(&tt_global->list_lock); kref_get(&orig_entry->refcount); hlist_add_head_rcu(&orig_entry->list, &tt_global->orig_list); - spin_unlock_bh(&tt_global->list_lock); atomic_inc(&tt_global->orig_list_count); sync_flags: @@ -1621,6 +1621,8 @@ sync_flags: out: if (orig_entry) batadv_tt_orig_list_entry_put(orig_entry); + + spin_unlock_bh(&tt_global->list_lock); } /** -- GitLab From 2b1c4897f2566956a1e0a59a63e58d043b4be009 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 12 Aug 2018 21:04:45 +0200 Subject: [PATCH 0974/2269] batman-adv: Prevent duplicated tvlv handler [ Upstream commit ae3cdc97dc10c7a3b31f297dab429bfb774c9ccb ] The function batadv_tvlv_handler_register is responsible for adding new tvlv_handler to the handler_list. It first checks whether the entry already is in the list or not. If it is, then the creation of a new entry is aborted. But the lock for the list is only held when the list is really modified. This could lead to duplicated entries because another context could create an entry with the same key between the check and the list manipulation. The check and the manipulation of the list must therefore be in the same locked code section. Fixes: ef26157747d4 ("batman-adv: tvlv - basic infrastructure") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/tvlv.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index 1d9e267caec92..d6d6d95e48aa8 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -528,15 +528,20 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv, { struct batadv_tvlv_handler *tvlv_handler; + spin_lock_bh(&bat_priv->tvlv.handler_list_lock); + tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version); if (tvlv_handler) { + spin_unlock_bh(&bat_priv->tvlv.handler_list_lock); batadv_tvlv_handler_put(tvlv_handler); return; } tvlv_handler = kzalloc(sizeof(*tvlv_handler), GFP_ATOMIC); - if (!tvlv_handler) + if (!tvlv_handler) { + spin_unlock_bh(&bat_priv->tvlv.handler_list_lock); return; + } tvlv_handler->ogm_handler = optr; tvlv_handler->unicast_handler = uptr; @@ -546,7 +551,6 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv, kref_init(&tvlv_handler->refcount); INIT_HLIST_NODE(&tvlv_handler->list); - spin_lock_bh(&bat_priv->tvlv.handler_list_lock); kref_get(&tvlv_handler->refcount); hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list); spin_unlock_bh(&bat_priv->tvlv.handler_list_lock); -- GitLab From 3e9f714fb58b14d0cf5465dbec55b20b5446d607 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Fri, 7 Sep 2018 05:45:54 +0800 Subject: [PATCH 0975/2269] batman-adv: fix backbone_gw refcount on queue_work() failure [ Upstream commit 5af96b9c59c72fb2af2d19c5cc2f3cdcee391dff ] The backbone_gw refcounter is to be decreased by the queued work and currently is never decreased if the queue_work() call fails. Fix by checking the queue_work() return value and decrease refcount if necessary. Signed-off-by: Marek Lindner Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bridge_loop_avoidance.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 422ee16b7854d..c3c848f64fdd5 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1772,6 +1772,7 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb, { struct batadv_bla_backbone_gw *backbone_gw; struct ethhdr *ethhdr; + bool ret; ethhdr = eth_hdr(skb); @@ -1795,8 +1796,13 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb, if (unlikely(!backbone_gw)) return true; - queue_work(batadv_event_workqueue, &backbone_gw->report_work); - /* backbone_gw is unreferenced in the report work function function */ + ret = queue_work(batadv_event_workqueue, &backbone_gw->report_work); + + /* backbone_gw is unreferenced in the report work function function + * if queue_work() call was successful + */ + if (!ret) + batadv_backbone_gw_put(backbone_gw); return true; } -- GitLab From 498c536f59315a7403d9689f8088d3a8df7f5031 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Fri, 7 Sep 2018 05:45:55 +0800 Subject: [PATCH 0976/2269] batman-adv: fix hardif_neigh refcount on queue_work() failure [ Upstream commit 4c4af6900844ab04c9434c972021d7b48610e06a ] The hardif_neigh refcounter is to be decreased by the queued work and currently is never decreased if the queue_work() call fails. Fix by checking the queue_work() return value and decrease refcount if necessary. Signed-off-by: Marek Lindner Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bat_v_elp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index bfc49d48eb57d..e92dfedccc165 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -254,6 +254,7 @@ static void batadv_v_elp_periodic_work(struct work_struct *work) struct batadv_priv *bat_priv; struct sk_buff *skb; u32 elp_interval; + bool ret; bat_v = container_of(work, struct batadv_hard_iface_bat_v, elp_wq.work); hard_iface = container_of(bat_v, struct batadv_hard_iface, bat_v); @@ -315,8 +316,11 @@ static void batadv_v_elp_periodic_work(struct work_struct *work) * may sleep and that is not allowed in an rcu protected * context. Therefore schedule a task for that. */ - queue_work(batadv_event_workqueue, - &hardif_neigh->bat_v.metric_work); + ret = queue_work(batadv_event_workqueue, + &hardif_neigh->bat_v.metric_work); + + if (!ret) + batadv_hardif_neigh_put(hardif_neigh); } rcu_read_unlock(); -- GitLab From 89450e431a6314514f85d9e7dd11621b1b005754 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Wed, 8 Aug 2018 18:44:59 +0530 Subject: [PATCH 0977/2269] clocksource/drivers/ti-32k: Add CLOCK_SOURCE_SUSPEND_NONSTOP flag for non-am43 SoCs [ Upstream commit 3b7d96a0dbb6b630878597a1838fc39f808b761b ] The 32k clocksource is NONSTOP for non-am43 SoCs. Hence add the flag for all the other SoCs. Reported-by: Tony Lindgren Signed-off-by: Keerthy Acked-by: Tony Lindgren Signed-off-by: Daniel Lezcano Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/timer-ti-32k.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/clocksource/timer-ti-32k.c b/drivers/clocksource/timer-ti-32k.c index 880a861ab3c82..713214d085e06 100644 --- a/drivers/clocksource/timer-ti-32k.c +++ b/drivers/clocksource/timer-ti-32k.c @@ -98,6 +98,9 @@ static int __init ti_32k_timer_init(struct device_node *np) return -ENXIO; } + if (!of_machine_is_compatible("ti,am43")) + ti_32k_timer.cs.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP; + ti_32k_timer.counter = ti_32k_timer.base; /* -- GitLab From 40e49c1553134ca442602dffe707afef3e5a52d0 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 11 Sep 2018 12:22:25 -0700 Subject: [PATCH 0978/2269] scsi: ibmvscsis: Fix a stringop-overflow warning [ Upstream commit d792d4c4fc866ae224b0b0ca2aabd87d23b4d6cc ] There's currently a warning about string overflow with strncat: drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c: In function 'ibmvscsis_probe': drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c:3479:2: error: 'strncat' specified bound 64 equals destination size [-Werror=stringop-overflow=] strncat(vscsi->eye, vdev->name, MAX_EYE); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Switch to a single snprintf instead of a strcpy + strcat to handle this cleanly. Signed-off-by: Laura Abbott Suggested-by: Kees Cook Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index 2799a6b08f736..c6878f5965207 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -3465,8 +3465,7 @@ static int ibmvscsis_probe(struct vio_dev *vdev, vscsi->dds.window[LOCAL].liobn, vscsi->dds.window[REMOTE].liobn); - strcpy(vscsi->eye, "VSCSI "); - strncat(vscsi->eye, vdev->name, MAX_EYE); + snprintf(vscsi->eye, sizeof(vscsi->eye), "VSCSI %s", vdev->name); vscsi->dds.unit_id = vdev->unit_address; strncpy(vscsi->dds.partition_name, partition_name, -- GitLab From 3353c04dd4d754ac6cf0068d42d0fc9df9ae79bb Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 11 Sep 2018 12:22:26 -0700 Subject: [PATCH 0979/2269] scsi: ibmvscsis: Ensure partition name is properly NUL terminated [ Upstream commit adad633af7b970bfa5dd1b624a4afc83cac9b235 ] While reviewing another part of the code, Kees noticed that the strncpy of the partition name might not always be NUL terminated. Switch to using strscpy which does this safely. Reported-by: Kees Cook Signed-off-by: Laura Abbott Reviewed-by: Kees Cook Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index c6878f5965207..25d2741cdf968 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -3468,7 +3468,7 @@ static int ibmvscsis_probe(struct vio_dev *vdev, snprintf(vscsi->eye, sizeof(vscsi->eye), "VSCSI %s", vdev->name); vscsi->dds.unit_id = vdev->unit_address; - strncpy(vscsi->dds.partition_name, partition_name, + strscpy(vscsi->dds.partition_name, partition_name, sizeof(vscsi->dds.partition_name)); vscsi->dds.partition_num = partition_number; -- GitLab From 2b8b339c53d2d8096a43f3c9c7f8386bf646816c Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 18 Sep 2018 16:10:49 +0300 Subject: [PATCH 0980/2269] intel_th: pci: Add Ice Lake PCH support [ Upstream commit 59d08d00d43c644ee2011d7ff1807bdd69f31fe0 ] This adds Intel(R) Trace Hub PCI ID for Ice Lake PCH. Signed-off-by: Alexander Shishkin Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index c2a2ce8ee5410..ef699477d94a8 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -168,6 +168,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x18e1), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Ice Lake PCH */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x34a6), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { 0 }, }; -- GitLab From 8b8ab47a093d827991e33af55dc41304fca5ef0e Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Mon, 17 Sep 2018 12:43:34 -0700 Subject: [PATCH 0981/2269] Input: atakbd - fix Atari keymap [ Upstream commit 9e62df51be993035c577371ffee5477697a56aad ] Fix errors in Atari keymap (mostly in keypad, help and undo keys). Patch provided on debian-68k ML by Andreas Schwab , keymap array size and unhandled scancode limit adjusted to 0x73 by me. Tested-by: Michael Schmitz Signed-off-by: Michael Schmitz Signed-off-by: Andreas Schwab Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/input/keyboard/atakbd.c | 64 ++++++++++++++------------------- 1 file changed, 26 insertions(+), 38 deletions(-) diff --git a/drivers/input/keyboard/atakbd.c b/drivers/input/keyboard/atakbd.c index f1235831283d7..524a72bee55aa 100644 --- a/drivers/input/keyboard/atakbd.c +++ b/drivers/input/keyboard/atakbd.c @@ -79,8 +79,7 @@ MODULE_LICENSE("GPL"); */ -static unsigned char atakbd_keycode[0x72] = { /* American layout */ - [0] = KEY_GRAVE, +static unsigned char atakbd_keycode[0x73] = { /* American layout */ [1] = KEY_ESC, [2] = KEY_1, [3] = KEY_2, @@ -121,9 +120,9 @@ static unsigned char atakbd_keycode[0x72] = { /* American layout */ [38] = KEY_L, [39] = KEY_SEMICOLON, [40] = KEY_APOSTROPHE, - [41] = KEY_BACKSLASH, /* FIXME, '#' */ + [41] = KEY_GRAVE, [42] = KEY_LEFTSHIFT, - [43] = KEY_GRAVE, /* FIXME: '~' */ + [43] = KEY_BACKSLASH, [44] = KEY_Z, [45] = KEY_X, [46] = KEY_C, @@ -149,45 +148,34 @@ static unsigned char atakbd_keycode[0x72] = { /* American layout */ [66] = KEY_F8, [67] = KEY_F9, [68] = KEY_F10, - [69] = KEY_ESC, - [70] = KEY_DELETE, - [71] = KEY_KP7, - [72] = KEY_KP8, - [73] = KEY_KP9, + [71] = KEY_HOME, + [72] = KEY_UP, [74] = KEY_KPMINUS, - [75] = KEY_KP4, - [76] = KEY_KP5, - [77] = KEY_KP6, + [75] = KEY_LEFT, + [77] = KEY_RIGHT, [78] = KEY_KPPLUS, - [79] = KEY_KP1, - [80] = KEY_KP2, - [81] = KEY_KP3, - [82] = KEY_KP0, - [83] = KEY_KPDOT, - [90] = KEY_KPLEFTPAREN, - [91] = KEY_KPRIGHTPAREN, - [92] = KEY_KPASTERISK, /* FIXME */ - [93] = KEY_KPASTERISK, - [94] = KEY_KPPLUS, - [95] = KEY_HELP, + [80] = KEY_DOWN, + [82] = KEY_INSERT, + [83] = KEY_DELETE, [96] = KEY_102ND, - [97] = KEY_KPASTERISK, /* FIXME */ - [98] = KEY_KPSLASH, + [97] = KEY_UNDO, + [98] = KEY_HELP, [99] = KEY_KPLEFTPAREN, [100] = KEY_KPRIGHTPAREN, [101] = KEY_KPSLASH, [102] = KEY_KPASTERISK, - [103] = KEY_UP, - [104] = KEY_KPASTERISK, /* FIXME */ - [105] = KEY_LEFT, - [106] = KEY_RIGHT, - [107] = KEY_KPASTERISK, /* FIXME */ - [108] = KEY_DOWN, - [109] = KEY_KPASTERISK, /* FIXME */ - [110] = KEY_KPASTERISK, /* FIXME */ - [111] = KEY_KPASTERISK, /* FIXME */ - [112] = KEY_KPASTERISK, /* FIXME */ - [113] = KEY_KPASTERISK /* FIXME */ + [103] = KEY_KP7, + [104] = KEY_KP8, + [105] = KEY_KP9, + [106] = KEY_KP4, + [107] = KEY_KP5, + [108] = KEY_KP6, + [109] = KEY_KP1, + [110] = KEY_KP2, + [111] = KEY_KP3, + [112] = KEY_KP0, + [113] = KEY_KPDOT, + [114] = KEY_KPENTER, }; static struct input_dev *atakbd_dev; @@ -195,7 +183,7 @@ static struct input_dev *atakbd_dev; static void atakbd_interrupt(unsigned char scancode, char down) { - if (scancode < 0x72) { /* scancodes < 0xf2 are keys */ + if (scancode < 0x73) { /* scancodes < 0xf3 are keys */ // report raw events here? @@ -209,7 +197,7 @@ static void atakbd_interrupt(unsigned char scancode, char down) input_report_key(atakbd_dev, scancode, down); input_sync(atakbd_dev); } - } else /* scancodes >= 0xf2 are mouse data, most likely */ + } else /* scancodes >= 0xf3 are mouse data, most likely */ printk(KERN_INFO "atakbd: unhandled scancode %x\n", scancode); return; -- GitLab From 1c22d2cbd9f2d944e123af0c94d538f005e03ff4 Mon Sep 17 00:00:00 2001 From: Michael Schmitz Date: Mon, 17 Sep 2018 15:27:49 -0700 Subject: [PATCH 0982/2269] Input: atakbd - fix Atari CapsLock behaviour [ Upstream commit 52d2c7bf7c90217fbe875d2d76f310979c48eb83 ] The CapsLock key on Atari keyboards is not a toggle, it does send the normal make and break scancodes. Drop the CapsLock toggle handling code, which did cause the CapsLock key to merely act as a Shift key. Tested-by: Michael Schmitz Signed-off-by: Michael Schmitz Signed-off-by: Andreas Schwab Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/input/keyboard/atakbd.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/input/keyboard/atakbd.c b/drivers/input/keyboard/atakbd.c index 524a72bee55aa..fdeda0b0fbd61 100644 --- a/drivers/input/keyboard/atakbd.c +++ b/drivers/input/keyboard/atakbd.c @@ -189,14 +189,8 @@ static void atakbd_interrupt(unsigned char scancode, char down) scancode = atakbd_keycode[scancode]; - if (scancode == KEY_CAPSLOCK) { /* CapsLock is a toggle switch key on Amiga */ - input_report_key(atakbd_dev, scancode, 1); - input_report_key(atakbd_dev, scancode, 0); - input_sync(atakbd_dev); - } else { - input_report_key(atakbd_dev, scancode, down); - input_sync(atakbd_dev); - } + input_report_key(atakbd_dev, scancode, down); + input_sync(atakbd_dev); } else /* scancodes >= 0xf3 are mouse data, most likely */ printk(KERN_INFO "atakbd: unhandled scancode %x\n", scancode); -- GitLab From 997cdc46f4362d40d39b55efbce2badb39ca16ce Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Mon, 17 Sep 2018 17:22:40 +0200 Subject: [PATCH 0983/2269] net: emac: fix fixed-link setup for the RTL8363SB switch [ Upstream commit 08e39982ef64f800fd1f9b9b92968d14d5fafa82 ] On the Netgear WNDAP620, the emac ethernet isn't receiving nor xmitting any frames from/to the RTL8363SB (identifies itself as a RTL8367RB). This is caused by the emac hardware not knowing the forced link parameters for speed, duplex, pause, etc. This begs the question, how this was working on the original driver code, when it was necessary to set the phy_address and phy_map to 0xffffffff. But I guess without access to the old PPC405/440/460 hardware, it's not possible to know. Signed-off-by: Christian Lamparter Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ibm/emac/core.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 7feff2450ed68..d1da8f05ef85b 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -2671,12 +2671,17 @@ static int emac_init_phy(struct emac_instance *dev) if (of_phy_is_fixed_link(np)) { int res = emac_dt_mdio_probe(dev); - if (!res) { - res = of_phy_register_fixed_link(np); - if (res) - mdiobus_unregister(dev->mii_bus); + if (res) + return res; + + res = of_phy_register_fixed_link(np); + dev->phy_dev = of_phy_find_device(np); + if (res || !dev->phy_dev) { + mdiobus_unregister(dev->mii_bus); + return res ? res : -EINVAL; } - return res; + emac_adjust_link(dev->ndev); + put_device(&dev->phy_dev->mdio.dev); } return 0; } -- GitLab From 48047b847808a33c02292d9bb8d10b24a1633fab Mon Sep 17 00:00:00 2001 From: Kazuya Mizuguchi Date: Tue, 18 Sep 2018 12:22:26 +0200 Subject: [PATCH 0984/2269] ravb: do not write 1 to reserved bits [ Upstream commit 2fe397a3959de8a472f165e6d152f64cb77fa2cc ] EtherAVB hardware requires 0 to be written to status register bits in order to clear them, however, care must be taken not to: 1. Clear other bits, by writing zero to them 2. Write one to reserved bits This patch corrects the ravb driver with respect to the second point above. This is done by defining reserved bit masks for the affected registers and, after auditing the code, ensure all sites that may write a one to a reserved bit use are suitably masked. Signed-off-by: Kazuya Mizuguchi Signed-off-by: Simon Horman Reviewed-by: Sergei Shtylyov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/renesas/ravb.h | 5 +++++ drivers/net/ethernet/renesas/ravb_main.c | 11 ++++++----- drivers/net/ethernet/renesas/ravb_ptp.c | 2 +- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index 96a27b00c90e2..897bd33c2c508 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -431,6 +431,7 @@ enum EIS_BIT { EIS_CULF1 = 0x00000080, EIS_TFFF = 0x00000100, EIS_QFS = 0x00010000, + EIS_RESERVED = (GENMASK(31, 17) | GENMASK(15, 11)), }; /* RIC0 */ @@ -475,6 +476,7 @@ enum RIS0_BIT { RIS0_FRF15 = 0x00008000, RIS0_FRF16 = 0x00010000, RIS0_FRF17 = 0x00020000, + RIS0_RESERVED = GENMASK(31, 18), }; /* RIC1 */ @@ -531,6 +533,7 @@ enum RIS2_BIT { RIS2_QFF16 = 0x00010000, RIS2_QFF17 = 0x00020000, RIS2_RFFF = 0x80000000, + RIS2_RESERVED = GENMASK(30, 18), }; /* TIC */ @@ -547,6 +550,7 @@ enum TIS_BIT { TIS_FTF1 = 0x00000002, /* Undocumented? */ TIS_TFUF = 0x00000100, TIS_TFWF = 0x00000200, + TIS_RESERVED = (GENMASK(31, 20) | GENMASK(15, 12) | GENMASK(7, 4)) }; /* ISS */ @@ -620,6 +624,7 @@ enum GIC_BIT { enum GIS_BIT { GIS_PTCF = 0x00000001, /* Undocumented? */ GIS_PTMF = 0x00000004, + GIS_RESERVED = GENMASK(15, 10), }; /* GIE (R-Car Gen3 only) */ diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index e87a779bfcfe5..ff3a293ffe368 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -721,10 +721,11 @@ static void ravb_error_interrupt(struct net_device *ndev) u32 eis, ris2; eis = ravb_read(ndev, EIS); - ravb_write(ndev, ~EIS_QFS, EIS); + ravb_write(ndev, ~(EIS_QFS | EIS_RESERVED), EIS); if (eis & EIS_QFS) { ris2 = ravb_read(ndev, RIS2); - ravb_write(ndev, ~(RIS2_QFF0 | RIS2_RFFF), RIS2); + ravb_write(ndev, ~(RIS2_QFF0 | RIS2_RFFF | RIS2_RESERVED), + RIS2); /* Receive Descriptor Empty int */ if (ris2 & RIS2_QFF0) @@ -777,7 +778,7 @@ static bool ravb_timestamp_interrupt(struct net_device *ndev) u32 tis = ravb_read(ndev, TIS); if (tis & TIS_TFUF) { - ravb_write(ndev, ~TIS_TFUF, TIS); + ravb_write(ndev, ~(TIS_TFUF | TIS_RESERVED), TIS); ravb_get_tx_tstamp(ndev); return true; } @@ -912,7 +913,7 @@ static int ravb_poll(struct napi_struct *napi, int budget) /* Processing RX Descriptor Ring */ if (ris0 & mask) { /* Clear RX interrupt */ - ravb_write(ndev, ~mask, RIS0); + ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0); if (ravb_rx(ndev, "a, q)) goto out; } @@ -920,7 +921,7 @@ static int ravb_poll(struct napi_struct *napi, int budget) if (tis & mask) { spin_lock_irqsave(&priv->lock, flags); /* Clear TX interrupt */ - ravb_write(ndev, ~mask, TIS); + ravb_write(ndev, ~(mask | TIS_RESERVED), TIS); ravb_tx_free(ndev, q, true); netif_wake_subqueue(ndev, q); mmiowb(); diff --git a/drivers/net/ethernet/renesas/ravb_ptp.c b/drivers/net/ethernet/renesas/ravb_ptp.c index eede70ec37f8c..9e3222fd69f9e 100644 --- a/drivers/net/ethernet/renesas/ravb_ptp.c +++ b/drivers/net/ethernet/renesas/ravb_ptp.c @@ -319,7 +319,7 @@ void ravb_ptp_interrupt(struct net_device *ndev) } } - ravb_write(ndev, ~gis, GIS); + ravb_write(ndev, ~(gis | GIS_RESERVED), GIS); } void ravb_ptp_init(struct net_device *ndev, struct platform_device *pdev) -- GitLab From 940d35f3a57296bebc0828e7cc4a274c507d499f Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 20 Sep 2018 16:32:52 -0500 Subject: [PATCH 0985/2269] PCI: dwc: Fix scheduling while atomic issues [ Upstream commit 9024143e700f89d74b8cdaf316a3499d74fc56fe ] When programming the inbound/outbound ATUs, we call usleep_range() after each checking PCIE_ATU_ENABLE bit. Unfortunately, the ATU programming can be executed in atomic context: inbound ATU programming could be called through pci_epc_write_header() =>dw_pcie_ep_write_header() =>dw_pcie_prog_inbound_atu() outbound ATU programming could be called through pci_bus_read_config_dword() =>dw_pcie_rd_conf() =>dw_pcie_prog_outbound_atu() Fix this issue by calling mdelay() instead. Fixes: f8aed6ec624f ("PCI: dwc: designware: Add EP mode support") Fixes: d8bbeb39fbf3 ("PCI: designware: Wait for iATU enable") Signed-off-by: Jisheng Zhang [lorenzo.pieralisi@arm.com: commit log update] Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Acked-by: Gustavo Pimentel Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/dwc/pcie-designware.c | 8 ++++---- drivers/pci/dwc/pcie-designware.h | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/pci/dwc/pcie-designware.c b/drivers/pci/dwc/pcie-designware.c index 88abdddee2ad7..a06ad2c65174f 100644 --- a/drivers/pci/dwc/pcie-designware.c +++ b/drivers/pci/dwc/pcie-designware.c @@ -138,7 +138,7 @@ static void dw_pcie_prog_outbound_atu_unroll(struct dw_pcie *pci, int index, if (val & PCIE_ATU_ENABLE) return; - usleep_range(LINK_WAIT_IATU_MIN, LINK_WAIT_IATU_MAX); + mdelay(LINK_WAIT_IATU); } dev_err(pci->dev, "outbound iATU is not being enabled\n"); } @@ -181,7 +181,7 @@ void dw_pcie_prog_outbound_atu(struct dw_pcie *pci, int index, int type, if (val & PCIE_ATU_ENABLE) return; - usleep_range(LINK_WAIT_IATU_MIN, LINK_WAIT_IATU_MAX); + mdelay(LINK_WAIT_IATU); } dev_err(pci->dev, "outbound iATU is not being enabled\n"); } @@ -239,7 +239,7 @@ static int dw_pcie_prog_inbound_atu_unroll(struct dw_pcie *pci, int index, if (val & PCIE_ATU_ENABLE) return 0; - usleep_range(LINK_WAIT_IATU_MIN, LINK_WAIT_IATU_MAX); + mdelay(LINK_WAIT_IATU); } dev_err(pci->dev, "inbound iATU is not being enabled\n"); @@ -285,7 +285,7 @@ int dw_pcie_prog_inbound_atu(struct dw_pcie *pci, int index, int bar, if (val & PCIE_ATU_ENABLE) return 0; - usleep_range(LINK_WAIT_IATU_MIN, LINK_WAIT_IATU_MAX); + mdelay(LINK_WAIT_IATU); } dev_err(pci->dev, "inbound iATU is not being enabled\n"); diff --git a/drivers/pci/dwc/pcie-designware.h b/drivers/pci/dwc/pcie-designware.h index cb493bcae8b4f..3551dd607b907 100644 --- a/drivers/pci/dwc/pcie-designware.h +++ b/drivers/pci/dwc/pcie-designware.h @@ -28,8 +28,7 @@ /* Parameters for the waiting for iATU enabled routine */ #define LINK_WAIT_MAX_IATU_RETRIES 5 -#define LINK_WAIT_IATU_MIN 9000 -#define LINK_WAIT_IATU_MAX 10000 +#define LINK_WAIT_IATU 9 /* Synopsys-specific PCIe configuration registers */ #define PCIE_PORT_LINK_CONTROL 0x710 -- GitLab From 26377fe2f7b00993c3f6bb5d95cff81385c72d12 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Mon, 16 Jul 2018 11:07:07 +0100 Subject: [PATCH 0986/2269] drm: mali-dp: Call drm_crtc_vblank_reset on device init [ Upstream commit 69be1984ded00a11b1ed0888c6d8e4f35370372f ] Currently, if userspace calls drm_wait_vblank before the crtc is activated the crtc vblank_enable hook is called, which in case of malidp driver triggers some warninngs. This happens because on device init we don't inform the drm core about the vblank state by calling drm_crtc_vblank_on/off/reset which together with drm_vblank_get have some magic that prevents calling drm_vblank_enable when crtc is off. Signed-off-by: Alexandru Gheorghe Acked-by: Liviu Dudau Signed-off-by: Liviu Dudau Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/arm/malidp_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c index 1a57cc28955e7..ff3348ee9595f 100644 --- a/drivers/gpu/drm/arm/malidp_drv.c +++ b/drivers/gpu/drm/arm/malidp_drv.c @@ -617,6 +617,7 @@ static int malidp_bind(struct device *dev) drm->irq_enabled = true; ret = drm_vblank_init(drm, drm->mode_config.num_crtc); + drm_crtc_vblank_reset(&malidp->crtc); if (ret < 0) { DRM_ERROR("failed to initialise vblank\n"); goto vblank_fail; -- GitLab From b6b5bc245dc23cfa6dcbf0bb1e13b606066471af Mon Sep 17 00:00:00 2001 From: Wen Xiong Date: Thu, 20 Sep 2018 19:32:12 -0500 Subject: [PATCH 0987/2269] scsi: ipr: System hung while dlpar adding primary ipr adapter back [ Upstream commit 318ddb34b2052f838aa243d07173e2badf3e630e ] While dlpar adding primary ipr adapter back, driver goes through adapter initialization then schedule ipr_worker_thread to start te disk scan by dropping the host lock, calling scsi_add_device. Then get the adapter reset request again, so driver does scsi_block_requests, this will cause the scsi_add_device get hung until we unblock. But we can't run ipr_worker_thread to do the unblock because its stuck in scsi_add_device. This patch fixes the issue. [mkp: typo and whitespace fixes] Signed-off-by: Wen Xiong Acked-by: Brian King Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ipr.c | 106 ++++++++++++++++++++++++++------------------- drivers/scsi/ipr.h | 1 + 2 files changed, 62 insertions(+), 45 deletions(-) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index f838bd73befa8..35d54ee1c5c74 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -3308,6 +3308,65 @@ static void ipr_release_dump(struct kref *kref) LEAVE; } +static void ipr_add_remove_thread(struct work_struct *work) +{ + unsigned long lock_flags; + struct ipr_resource_entry *res; + struct scsi_device *sdev; + struct ipr_ioa_cfg *ioa_cfg = + container_of(work, struct ipr_ioa_cfg, scsi_add_work_q); + u8 bus, target, lun; + int did_work; + + ENTER; + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); + +restart: + do { + did_work = 0; + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) { + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + return; + } + + list_for_each_entry(res, &ioa_cfg->used_res_q, queue) { + if (res->del_from_ml && res->sdev) { + did_work = 1; + sdev = res->sdev; + if (!scsi_device_get(sdev)) { + if (!res->add_to_ml) + list_move_tail(&res->queue, &ioa_cfg->free_res_q); + else + res->del_from_ml = 0; + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + scsi_remove_device(sdev); + scsi_device_put(sdev); + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); + } + break; + } + } + } while (did_work); + + list_for_each_entry(res, &ioa_cfg->used_res_q, queue) { + if (res->add_to_ml) { + bus = res->bus; + target = res->target; + lun = res->lun; + res->add_to_ml = 0; + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + scsi_add_device(ioa_cfg->host, bus, target, lun); + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); + goto restart; + } + } + + ioa_cfg->scan_done = 1; + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + kobject_uevent(&ioa_cfg->host->shost_dev.kobj, KOBJ_CHANGE); + LEAVE; +} + /** * ipr_worker_thread - Worker thread * @work: ioa config struct @@ -3322,13 +3381,9 @@ static void ipr_release_dump(struct kref *kref) static void ipr_worker_thread(struct work_struct *work) { unsigned long lock_flags; - struct ipr_resource_entry *res; - struct scsi_device *sdev; struct ipr_dump *dump; struct ipr_ioa_cfg *ioa_cfg = container_of(work, struct ipr_ioa_cfg, work_q); - u8 bus, target, lun; - int did_work; ENTER; spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); @@ -3366,49 +3421,9 @@ static void ipr_worker_thread(struct work_struct *work) return; } -restart: - do { - did_work = 0; - if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) { - spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); - return; - } + schedule_work(&ioa_cfg->scsi_add_work_q); - list_for_each_entry(res, &ioa_cfg->used_res_q, queue) { - if (res->del_from_ml && res->sdev) { - did_work = 1; - sdev = res->sdev; - if (!scsi_device_get(sdev)) { - if (!res->add_to_ml) - list_move_tail(&res->queue, &ioa_cfg->free_res_q); - else - res->del_from_ml = 0; - spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); - scsi_remove_device(sdev); - scsi_device_put(sdev); - spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - } - break; - } - } - } while (did_work); - - list_for_each_entry(res, &ioa_cfg->used_res_q, queue) { - if (res->add_to_ml) { - bus = res->bus; - target = res->target; - lun = res->lun; - res->add_to_ml = 0; - spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); - scsi_add_device(ioa_cfg->host, bus, target, lun); - spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - goto restart; - } - } - - ioa_cfg->scan_done = 1; spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); - kobject_uevent(&ioa_cfg->host->shost_dev.kobj, KOBJ_CHANGE); LEAVE; } @@ -9937,6 +9952,7 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg, INIT_LIST_HEAD(&ioa_cfg->free_res_q); INIT_LIST_HEAD(&ioa_cfg->used_res_q); INIT_WORK(&ioa_cfg->work_q, ipr_worker_thread); + INIT_WORK(&ioa_cfg->scsi_add_work_q, ipr_add_remove_thread); init_waitqueue_head(&ioa_cfg->reset_wait_q); init_waitqueue_head(&ioa_cfg->msi_wait_q); init_waitqueue_head(&ioa_cfg->eeh_wait_q); diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index c7f0e9e3cd7d4..085e6c90f9e6a 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -1568,6 +1568,7 @@ struct ipr_ioa_cfg { u8 saved_mode_page_len; struct work_struct work_q; + struct work_struct scsi_add_work_q; struct workqueue_struct *reset_work_q; wait_queue_head_t reset_wait_q; -- GitLab From 4c0e2e4a4b54d2893d2664edf6f043199803b9bc Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Fri, 21 Sep 2018 09:01:01 +0200 Subject: [PATCH 0988/2269] scsi: sd: don't crash the host on invalid commands [ Upstream commit f1f1fadacaf08b7cf11714c0c29f8fa4d4ef68a9 ] When sd_init_command() get's a command with a unknown req_op() it crashes the system via BUG(). This makes debugging the actual reason for the broken request cmd_flags pretty hard as the system is down before it's able to write out debugging data on the serial console or the trace buffer. Change the BUG() to a WARN_ON() and return BLKPREP_KILL to fail gracefully and return an I/O error to the producer of the request. Signed-off-by: Johannes Thumshirn Cc: Hannes Reinecke Cc: Bart Van Assche Cc: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 4a532318b2119..6d3091ff9b928 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1285,7 +1285,8 @@ static int sd_init_command(struct scsi_cmnd *cmd) case REQ_OP_ZONE_RESET: return sd_zbc_setup_reset_cmnd(cmd); default: - BUG(); + WARN_ON_ONCE(1); + return BLKPREP_KILL; } } -- GitLab From f75ad0441a0aec523b83bc4ec09abffd3840cfdc Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 21 Sep 2018 02:44:12 -0700 Subject: [PATCH 0989/2269] net/mlx4: Use cpumask_available for eq->affinity_mask [ Upstream commit 8ac1ee6f4d62e781e3b3fd8b9c42b70371427669 ] Clang warns that the address of a pointer will always evaluated as true in a boolean context: drivers/net/ethernet/mellanox/mlx4/eq.c:243:11: warning: address of array 'eq->affinity_mask' will always evaluate to 'true' [-Wpointer-bool-conversion] if (!eq->affinity_mask || cpumask_empty(eq->affinity_mask)) ~~~~~^~~~~~~~~~~~~ 1 warning generated. Use cpumask_available, introduced in commit f7e30f01a9e2 ("cpumask: Add helper cpumask_available()"), which does the proper checking and avoids this warning. Link: https://github.com/ClangBuiltLinux/linux/issues/86 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/eq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 6f57c052053e2..050dc213e8dbe 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -240,7 +240,8 @@ static void mlx4_set_eq_affinity_hint(struct mlx4_priv *priv, int vec) struct mlx4_dev *dev = &priv->dev; struct mlx4_eq *eq = &priv->eq_table.eq[vec]; - if (!eq->affinity_mask || cpumask_empty(eq->affinity_mask)) + if (!cpumask_available(eq->affinity_mask) || + cpumask_empty(eq->affinity_mask)) return; hint_err = irq_set_affinity_hint(eq->irq, eq->affinity_mask); -- GitLab From 4f5dbf26a1bdb1a3395b0de9eb20d22b47736656 Mon Sep 17 00:00:00 2001 From: Tao Ren Date: Wed, 19 Sep 2018 15:13:31 -0700 Subject: [PATCH 0990/2269] clocksource/drivers/fttmr010: Fix set_next_event handler [ Upstream commit 4451d3f59f2a6f95e5d205c2d04ea072955d080d ] Currently, the aspeed MATCH1 register is updated to in set_next_event handler, with the assumption that COUNT register value is preserved when the timer is disabled and it continues decrementing after the timer is enabled. But the assumption is wrong: RELOAD register is loaded into COUNT register when the aspeed timer is enabled, which means the next event may be delayed because timer interrupt won't be generated until <0xFFFFFFFF - current_count + cycles>. The problem can be fixed by updating RELOAD register to , and COUNT register will be re-loaded when the timer is enabled and interrupt is generated when COUNT register overflows. The test result on Facebook Backpack-CMM BMC hardware (AST2500) shows the issue is fixed: without the patch, usleep(100) suspends the process for several milliseconds (and sometimes even over 40 milliseconds); after applying the fix, usleep(100) takes averagely 240 microseconds to return under the same workload level. Signed-off-by: Tao Ren Reviewed-by: Linus Walleij Tested-by: Lei YU Signed-off-by: Daniel Lezcano Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/timer-fttmr010.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/clocksource/timer-fttmr010.c b/drivers/clocksource/timer-fttmr010.c index cdf23b6286882..cdfe1c82f3f00 100644 --- a/drivers/clocksource/timer-fttmr010.c +++ b/drivers/clocksource/timer-fttmr010.c @@ -130,13 +130,17 @@ static int fttmr010_timer_set_next_event(unsigned long cycles, cr &= ~fttmr010->t1_enable_val; writel(cr, fttmr010->base + TIMER_CR); - /* Setup the match register forward/backward in time */ - cr = readl(fttmr010->base + TIMER1_COUNT); - if (fttmr010->count_down) - cr -= cycles; - else - cr += cycles; - writel(cr, fttmr010->base + TIMER1_MATCH1); + if (fttmr010->count_down) { + /* + * ASPEED Timer Controller will load TIMER1_LOAD register + * into TIMER1_COUNT register when the timer is re-enabled. + */ + writel(cycles, fttmr010->base + TIMER1_LOAD); + } else { + /* Setup the match register forward in time */ + cr = readl(fttmr010->base + TIMER1_COUNT); + writel(cr + cycles, fttmr010->base + TIMER1_MATCH1); + } /* Start */ cr = readl(fttmr010->base + TIMER_CR); -- GitLab From 256707d3b89b945920e96a9edebe518c1054f84a Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 24 Sep 2018 17:27:04 +1000 Subject: [PATCH 0991/2269] powerpc/tm: Fix userspace r13 corruption [ Upstream commit cf13435b730a502e814c63c84d93db131e563f5f ] When we treclaim we store the userspace checkpointed r13 to a scratch SPR and then later save the scratch SPR to the user thread struct. Unfortunately, this doesn't work as accessing the user thread struct can take an SLB fault and the SLB fault handler will write the same scratch SPRG that now contains the userspace r13. To fix this, we store r13 to the kernel stack (which can't fault) before we access the user thread struct. Found by running P8 guest + powervm + disable_1tb_segments + TM. Seen as a random userspace segfault with r13 looking like a kernel address. Signed-off-by: Michael Neuling Reviewed-by: Breno Leitao Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/tm.S | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 1da12f521cb74..69eacb85ebca0 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -167,13 +167,20 @@ _GLOBAL(tm_reclaim) std r1, PACATMSCRATCH(r13) ld r1, PACAR1(r13) - /* Store the PPR in r11 and reset to decent value */ std r11, GPR11(r1) /* Temporary stash */ + /* + * Store r13 away so we can free up the scratch SPR for the SLB fault + * handler (needed once we start accessing the thread_struct). + */ + GET_SCRATCH0(r11) + std r11, GPR13(r1) + /* Reset MSR RI so we can take SLB faults again */ li r11, MSR_RI mtmsrd r11, 1 + /* Store the PPR in r11 and reset to decent value */ mfspr r11, SPRN_PPR HMT_MEDIUM @@ -202,7 +209,7 @@ _GLOBAL(tm_reclaim) ld r4, GPR7(r1) /* user r7 */ ld r5, GPR11(r1) /* user r11 */ ld r6, GPR12(r1) /* user r12 */ - GET_SCRATCH0(8) /* user r13 */ + ld r8, GPR13(r1) /* user r13 */ std r3, GPR1(r7) std r4, GPR7(r7) std r5, GPR11(r7) -- GitLab From 62c15d061f5fd540ff62aed9464f44f7a1363b6d Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 25 Sep 2018 19:36:47 +1000 Subject: [PATCH 0992/2269] powerpc/tm: Avoid possible userspace r1 corruption on reclaim [ Upstream commit 96dc89d526ef77604376f06220e3d2931a0bfd58 ] Current we store the userspace r1 to PACATMSCRATCH before finally saving it to the thread struct. In theory an exception could be taken here (like a machine check or SLB miss) that could write PACATMSCRATCH and hence corrupt the userspace r1. The SLB fault currently doesn't touch PACATMSCRATCH, but others do. We've never actually seen this happen but it's theoretically possible. Either way, the code is fragile as it is. This patch saves r1 to the kernel stack (which can't fault) before we turn MSR[RI] back on. PACATMSCRATCH is still used but only with MSR[RI] off. We then copy r1 from the kernel stack to the thread struct once we have MSR[RI] back on. Suggested-by: Breno Leitao Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/tm.S | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 69eacb85ebca0..b735b727ed2b5 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -169,6 +169,13 @@ _GLOBAL(tm_reclaim) std r11, GPR11(r1) /* Temporary stash */ + /* + * Move the saved user r1 to the kernel stack in case PACATMSCRATCH is + * clobbered by an exception once we turn on MSR_RI below. + */ + ld r11, PACATMSCRATCH(r13) + std r11, GPR1(r1) + /* * Store r13 away so we can free up the scratch SPR for the SLB fault * handler (needed once we start accessing the thread_struct). @@ -205,7 +212,7 @@ _GLOBAL(tm_reclaim) SAVE_GPR(8, r7) /* user r8 */ SAVE_GPR(9, r7) /* user r9 */ SAVE_GPR(10, r7) /* user r10 */ - ld r3, PACATMSCRATCH(r13) /* user r1 */ + ld r3, GPR1(r1) /* user r1 */ ld r4, GPR7(r1) /* user r7 */ ld r5, GPR11(r1) /* user r11 */ ld r6, GPR12(r1) /* user r12 */ -- GitLab From f78ae314c6e39702e4112008c6987a377b4e7019 Mon Sep 17 00:00:00 2001 From: Arindam Nath Date: Tue, 18 Sep 2018 15:40:58 +0530 Subject: [PATCH 0993/2269] iommu/amd: Return devid as alias for ACPI HID devices [ Upstream commit 5ebb1bc2d63d90dd204169e21fd7a0b4bb8c776e ] ACPI HID devices do not actually have an alias for them in the IVRS. But dev_data->alias is still used for indexing into the IOMMU device table for devices being handled by the IOMMU. So for ACPI HID devices, we simply return the corresponding devid as an alias, as parsed from IVRS table. Signed-off-by: Arindam Nath Fixes: 2bf9a0a12749 ('iommu/amd: Add iommu support for ACPI HID devices') Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/amd_iommu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 9137030423cdc..efa6cd2500b93 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -253,7 +253,13 @@ static u16 get_alias(struct device *dev) /* The callers make sure that get_device_id() does not fail here */ devid = get_device_id(dev); + + /* For ACPI HID devices, we simply return the devid as such */ + if (!dev_is_pci(dev)) + return devid; + ivrs_alias = amd_iommu_alias_table[devid]; + pci_for_each_dma_alias(pdev, __last_alias, &pci_alias); if (ivrs_alias == pci_alias) -- GitLab From e261faa2eccbf1c77eabfbacee3904108f41df66 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 24 Nov 2017 08:31:09 +0100 Subject: [PATCH 0994/2269] powerpc/lib/feature-fixups: use raw_patch_instruction() commit 8183d99f4a22c2abbc543847a588df3666ef0c0c upstream. feature fixups need to use patch_instruction() early in the boot, even before the code is relocated to its final address, requiring patch_instruction() to use PTRRELOC() in order to address data. But feature fixups applies on code before it is set to read only, even for modules. Therefore, feature fixups can use raw_patch_instruction() instead. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Reported-by: David Gounaris Tested-by: David Gounaris Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/code-patching.h | 1 + arch/powerpc/lib/code-patching.c | 4 ++-- arch/powerpc/lib/feature-fixups.c | 8 ++++---- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 2c895e8d07f79..812535f40124e 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -31,6 +31,7 @@ unsigned int create_cond_branch(const unsigned int *addr, unsigned long target, int flags); int patch_branch(unsigned int *addr, unsigned long target, int flags); int patch_instruction(unsigned int *addr, unsigned int instr); +int raw_patch_instruction(unsigned int *addr, unsigned int instr); int instr_is_relative_branch(unsigned int instr); int instr_is_relative_link_branch(unsigned int instr); diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 882c750dc519e..130405158afa2 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -39,7 +39,7 @@ static int __patch_instruction(unsigned int *exec_addr, unsigned int instr, return 0; } -static int raw_patch_instruction(unsigned int *addr, unsigned int instr) +int raw_patch_instruction(unsigned int *addr, unsigned int instr) { return __patch_instruction(addr, instr, addr); } @@ -156,7 +156,7 @@ static int do_patch_instruction(unsigned int *addr, unsigned int instr) * when text_poke_area is not ready, but we still need * to allow patching. We just do the plain old patching */ - if (!this_cpu_read(*PTRRELOC(&text_poke_area))) + if (!this_cpu_read(text_poke_area)) return raw_patch_instruction(addr, instr); local_irq_save(flags); diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 762a899e85a45..e1bcdc32a851c 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -63,7 +63,7 @@ static int patch_alt_instruction(unsigned int *src, unsigned int *dest, } } - patch_instruction(dest, instr); + raw_patch_instruction(dest, instr); return 0; } @@ -92,7 +92,7 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) } for (; dest < end; dest++) - patch_instruction(dest, PPC_INST_NOP); + raw_patch_instruction(dest, PPC_INST_NOP); return 0; } @@ -292,7 +292,7 @@ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) for (; start < end; start++) { dest = (void *)start + *start; - patch_instruction(dest, PPC_INST_LWSYNC); + raw_patch_instruction(dest, PPC_INST_LWSYNC); } } @@ -310,7 +310,7 @@ static void do_final_fixups(void) length = (__end_interrupts - _stext) / sizeof(int); while (length--) { - patch_instruction(dest, *src); + raw_patch_instruction(dest, *src); src++; dest++; } -- GitLab From f473bf92169633719acc3b43617c1dd20107770a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 18 Oct 2018 14:22:54 +0200 Subject: [PATCH 0995/2269] Revert "vfs: fix freeze protection in mnt_want_write_file() for overlayfs" This reverts commit 4f4374a9bd25b333971e6f2656b642d29e2efe7b which was commit a6795a585929d94ca3e931bc8518f8deb8bbe627 upstream. Turns out this causes problems and was to fix a patch only in the 4.19 and newer tree. Reported-by: Amir Goldstein Cc: Miklos Szeredi Cc: Christoph Hellwig Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 3ee3ee5819bc1..9dc146e7b5e0a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -446,10 +446,10 @@ int mnt_want_write_file_path(struct file *file) { int ret; - sb_start_write(file_inode(file)->i_sb); + sb_start_write(file->f_path.mnt->mnt_sb); ret = __mnt_want_write_file(file); if (ret) - sb_end_write(file_inode(file)->i_sb); + sb_end_write(file->f_path.mnt->mnt_sb); return ret; } @@ -540,8 +540,7 @@ void __mnt_drop_write_file(struct file *file) void mnt_drop_write_file_path(struct file *file) { - __mnt_drop_write_file(file); - sb_end_write(file_inode(file)->i_sb); + mnt_drop_write(file->f_path.mnt); } void mnt_drop_write_file(struct file *file) -- GitLab From 541500abfe9eb30a89ff0a6eb42a21521996d68d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 12 Oct 2018 15:22:59 -0700 Subject: [PATCH 0996/2269] mremap: properly flush TLB before releasing the page commit eb66ae030829605d61fbef1909ce310e29f78821 upstream. Jann Horn points out that our TLB flushing was subtly wrong for the mremap() case. What makes mremap() special is that we don't follow the usual "add page to list of pages to be freed, then flush tlb, and then free pages". No, mremap() obviously just _moves_ the page from one page table location to another. That matters, because mremap() thus doesn't directly control the lifetime of the moved page with a freelist: instead, the lifetime of the page is controlled by the page table locking, that serializes access to the entry. As a result, we need to flush the TLB not just before releasing the lock for the source location (to avoid any concurrent accesses to the entry), but also before we release the destination page table lock (to avoid the TLB being flushed after somebody else has already done something to that page). This also makes the whole "need_flush" logic unnecessary, since we now always end up flushing the TLB for every valid entry. Reported-and-tested-by: Jann Horn Acked-by: Will Deacon Tested-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/huge_mm.h | 2 +- mm/huge_memory.c | 10 ++++------ mm/mremap.c | 30 +++++++++++++----------------- 3 files changed, 18 insertions(+), 24 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 87067d23a48b8..bfa38da4c261f 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -42,7 +42,7 @@ extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned char *vec); extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, - pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush); + pmd_t *old_pmd, pmd_t *new_pmd); extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, int prot_numa); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 39c1fedcfdb45..adacfe66cf3d9 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1765,7 +1765,7 @@ static pmd_t move_soft_dirty_pmd(pmd_t pmd) bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, - pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush) + pmd_t *old_pmd, pmd_t *new_pmd) { spinlock_t *old_ptl, *new_ptl; pmd_t pmd; @@ -1796,7 +1796,7 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd); - if (pmd_present(pmd) && pmd_dirty(pmd)) + if (pmd_present(pmd)) force_flush = true; VM_BUG_ON(!pmd_none(*new_pmd)); @@ -1807,12 +1807,10 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, } pmd = move_soft_dirty_pmd(pmd); set_pmd_at(mm, new_addr, new_pmd, pmd); - if (new_ptl != old_ptl) - spin_unlock(new_ptl); if (force_flush) flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE); - else - *need_flush = true; + if (new_ptl != old_ptl) + spin_unlock(new_ptl); spin_unlock(old_ptl); return true; } diff --git a/mm/mremap.c b/mm/mremap.c index 049470aa1e3ee..88ceeb4ef817e 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -115,7 +115,7 @@ static pte_t move_soft_dirty_pte(pte_t pte) static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, unsigned long old_addr, unsigned long old_end, struct vm_area_struct *new_vma, pmd_t *new_pmd, - unsigned long new_addr, bool need_rmap_locks, bool *need_flush) + unsigned long new_addr, bool need_rmap_locks) { struct mm_struct *mm = vma->vm_mm; pte_t *old_pte, *new_pte, pte; @@ -163,15 +163,17 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, pte = ptep_get_and_clear(mm, old_addr, old_pte); /* - * If we are remapping a dirty PTE, make sure + * If we are remapping a valid PTE, make sure * to flush TLB before we drop the PTL for the - * old PTE or we may race with page_mkclean(). + * PTE. * - * This check has to be done after we removed the - * old PTE from page tables or another thread may - * dirty it after the check and before the removal. + * NOTE! Both old and new PTL matter: the old one + * for racing with page_mkclean(), the new one to + * make sure the physical page stays valid until + * the TLB entry for the old mapping has been + * flushed. */ - if (pte_present(pte) && pte_dirty(pte)) + if (pte_present(pte)) force_flush = true; pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); pte = move_soft_dirty_pte(pte); @@ -179,13 +181,11 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, } arch_leave_lazy_mmu_mode(); + if (force_flush) + flush_tlb_range(vma, old_end - len, old_end); if (new_ptl != old_ptl) spin_unlock(new_ptl); pte_unmap(new_pte - 1); - if (force_flush) - flush_tlb_range(vma, old_end - len, old_end); - else - *need_flush = true; pte_unmap_unlock(old_pte - 1, old_ptl); if (need_rmap_locks) drop_rmap_locks(vma); @@ -200,7 +200,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma, { unsigned long extent, next, old_end; pmd_t *old_pmd, *new_pmd; - bool need_flush = false; unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ @@ -231,8 +230,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma, if (need_rmap_locks) take_rmap_locks(vma); moved = move_huge_pmd(vma, old_addr, new_addr, - old_end, old_pmd, new_pmd, - &need_flush); + old_end, old_pmd, new_pmd); if (need_rmap_locks) drop_rmap_locks(vma); if (moved) @@ -250,10 +248,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma, if (extent > LATENCY_LIMIT) extent = LATENCY_LIMIT; move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma, - new_pmd, new_addr, need_rmap_locks, &need_flush); + new_pmd, new_addr, need_rmap_locks); } - if (need_flush) - flush_tlb_range(vma, old_end-len, old_addr); mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end); -- GitLab From b6d06b180ff818dff2b4d0499f269718d67673d0 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Thu, 13 Sep 2018 23:24:28 +0300 Subject: [PATCH 0997/2269] ARC: build: Get rid of toolchain check commit 615f64458ad890ef94abc879a66d8b27236e733a upstream. This check is very naive: we simply test if GCC invoked without "-mcpu=XXX" has ARC700 define set. In that case we think that GCC was built with "--with-cpu=arc700" and has libgcc built for ARC700. Otherwise if ARC700 is not defined we think that everythng was built for ARCv2. But in reality our life is much more interesting. 1. Regardless of GCC configuration (i.e. what we pass in "--with-cpu" it may generate code for any ARC core). 2. libgcc might be built with explicitly specified "--mcpu=YYY" That's exactly what happens in case of multilibbed toolchains: - GCC is configured with default settings - All the libs built for many different CPU flavors I.e. that check gets in the way of usage of multilibbed toolchains. And even non-multilibbed toolchains are affected. OpenEmbedded also builds GCC without "--with-cpu" because each and every target component later is compiled with explicitly set "-mcpu=ZZZ". Acked-by: Rob Herring Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/Makefile | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 6c1b20dd76ad9..16ae8675116e3 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -20,20 +20,6 @@ cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7 cflags-$(CONFIG_ISA_ARCV2) += -mcpu=archs -is_700 = $(shell $(CC) -dM -E - < /dev/null | grep -q "ARC700" && echo 1 || echo 0) - -ifdef CONFIG_ISA_ARCOMPACT -ifeq ($(is_700), 0) - $(error Toolchain not configured for ARCompact builds) -endif -endif - -ifdef CONFIG_ISA_ARCV2 -ifeq ($(is_700), 1) - $(error Toolchain not configured for ARCv2 builds) -endif -endif - ifdef CONFIG_ARC_CURR_IN_REG # For a global register defintion, make sure it gets passed to every file # We had a customer reported bug where some code built in kernel was NOT using -- GitLab From 7b9f716d55500d72d9db5b6e47c72e1b4da30ddd Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Sun, 16 Sep 2018 23:47:57 +0300 Subject: [PATCH 0998/2269] ARC: build: Don't set CROSS_COMPILE in arch's Makefile commit 40660f1fcee8d524a60b5101538e42b1f39f106d upstream. There's not much sense in doing that because if user or his build-system didn't set CROSS_COMPILE we still may very well make incorrect guess. But as it turned out setting CROSS_COMPILE is not as harmless as one may think: with recent changes that implemented automatic discovery of __host__ gcc features unconditional setup of CROSS_COMPILE leads to failures on execution of "make xxx_defconfig" with absent cross-compiler, for more info see [1]. Set CROSS_COMPILE as well gets in the way if we want only to build .dtb's (again with absent cross-compiler which is not really needed for building .dtb's), see [2]. Note, we had to change LIBGCC assignment type from ":=" to "=" so that is is resolved on its usage, otherwise if it is resolved at declaration time with missing CROSS_COMPILE we're getting this error message from host GCC: | gcc: error: unrecognized command line option -mmedium-calls | gcc: error: unrecognized command line option -mno-sdata [1] http://lists.infradead.org/pipermail/linux-snps-arc/2018-September/004308.html [2] http://lists.infradead.org/pipermail/linux-snps-arc/2018-September/004320.html Signed-off-by: Alexey Brodkin Cc: Masahiro Yamada Cc: Rob Herring Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/Makefile | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 16ae8675116e3..7c6c977820225 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -6,14 +6,6 @@ # published by the Free Software Foundation. # -ifeq ($(CROSS_COMPILE),) -ifndef CONFIG_CPU_BIG_ENDIAN -CROSS_COMPILE := arc-linux- -else -CROSS_COMPILE := arceb-linux- -endif -endif - KBUILD_DEFCONFIG := nsim_700_defconfig cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ @@ -73,7 +65,7 @@ ldflags-$(CONFIG_CPU_BIG_ENDIAN) += -EB # --build-id w/o "-marclinux". Default arc-elf32-ld is OK ldflags-$(upto_gcc44) += -marclinux -LIBGCC := $(shell $(CC) $(cflags-y) --print-libgcc-file-name) +LIBGCC = $(shell $(CC) $(cflags-y) --print-libgcc-file-name) # Modules with short calls might break for calls into builtin-kernel KBUILD_CFLAGS_MODULE += -mlong-calls -mno-millicode -- GitLab From e896a2adba930ec5c80b5d5b19e66932d29c4053 Mon Sep 17 00:00:00 2001 From: Natanael Copa Date: Thu, 18 Oct 2018 17:04:17 +0200 Subject: [PATCH 0999/2269] HID: quirks: fix support for Apple Magic Keyboards Commit b6cc0ba2cbf4 (HID: add support for Apple Magic Keyboards) backported support for the Magic Keyboard over Bluetooth, but did not add the BT_VENDOR_ID_APPLE to hid_have_special_driver[] so the hid-apple driver is never loaded and Fn key does not work at all. Adding BT_VENDOR_ID_APPLE to hid_have_special_driver[] is not needed after commit e04a0442d33b (HID: core: remove the absolute need of hid_have_special_driver[]), so 4.16 kernels and newer does not need it. Fixes: b6cc0ba2cbf4 (HID: add support for Apple Magic Keyboards) Bugzilla-id: https://bugzilla.kernel.org/show_bug.cgi?id=99881 Signed-off-by: Natanael Copa Acked-by: Benjamin Tissoires Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 672b0be41d442..a306493e2e970 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1964,6 +1964,9 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ISO) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_ANSI) }, + { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_ANSI) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_ANSI) }, + { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) }, #endif -- GitLab From 61377bc808612a9ff40fb4929a5a7bc662c531a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 17 Jul 2018 20:42:14 +0300 Subject: [PATCH 1000/2269] drm/i915: Nuke the LVDS lid notifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 05c72e77ccda89ff624108b1b59a0fc43843f343 upstream. We broke the LVDS notifier resume thing in (presumably) commit e2c8b8701e2d ("drm/i915: Use atomic helpers for suspend, v2.") as we no longer duplicate the current state in the LVDS notifier and thus we never resume it properly either. Instead of trying to fix it again let's just kill off the lid notifier entirely. None of the machines tested thus far have apparently needed it. Originally the lid notifier was added to work around cases where the VBIOS was clobbering some of the hardware state behind the driver's back, mostly on Thinkpads. We now have a few report of Thinkpads working just fine without the notifier. So maybe it was misdiagnosed originally, or something else has changed (ACPI video stuff perhaps?). If we do end up finding a machine where the VBIOS is still causing problems I would suggest that we first try setting various bits in the VBIOS scratch registers. There are several to choose from that may instruct the VBIOS to steer clear. With the notifier gone we'll also stop looking at the panel status in ->detect(). v2: Nuke enum modeset_restore (Rodrigo) Cc: stable@vger.kernel.org Cc: Wolfgang Draxinger Cc: Vito Caputo Cc: kitsunyan Cc: Joonas Saarinen Tested-by: Vito Caputo # Thinkapd X61s Tested-by: kitsunyan # ThinkPad X200 Tested-by: Joonas Saarinen # Fujitsu Siemens U9210 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105902 References: https://lists.freedesktop.org/archives/intel-gfx/2018-June/169315.html References: https://bugs.freedesktop.org/show_bug.cgi?id=21230 Fixes: e2c8b8701e2d ("drm/i915: Use atomic helpers for suspend, v2.") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180717174216.22252-1-ville.syrjala@linux.intel.com Reviewed-by: Rodrigo Vivi Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_drv.c | 10 --- drivers/gpu/drm/i915/i915_drv.h | 8 -- drivers/gpu/drm/i915/intel_lvds.c | 136 +----------------------------- 3 files changed, 2 insertions(+), 152 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 562220ec9d411..c75f4ccbcdefe 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -878,7 +878,6 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, spin_lock_init(&dev_priv->mm.object_stat_lock); mutex_init(&dev_priv->sb_lock); - mutex_init(&dev_priv->modeset_restore_lock); mutex_init(&dev_priv->av_mutex); mutex_init(&dev_priv->wm.wm_mutex); mutex_init(&dev_priv->pps_mutex); @@ -1505,11 +1504,6 @@ static int i915_drm_suspend(struct drm_device *dev) pci_power_t opregion_target_state; int error; - /* ignore lid events during suspend */ - mutex_lock(&dev_priv->modeset_restore_lock); - dev_priv->modeset_restore = MODESET_SUSPENDED; - mutex_unlock(&dev_priv->modeset_restore_lock); - disable_rpm_wakeref_asserts(dev_priv); /* We do a lot of poking in a lot of registers, make sure they work @@ -1718,10 +1712,6 @@ static int i915_drm_resume(struct drm_device *dev) intel_fbdev_set_suspend(dev, FBINFO_STATE_RUNNING, false); - mutex_lock(&dev_priv->modeset_restore_lock); - dev_priv->modeset_restore = MODESET_DONE; - mutex_unlock(&dev_priv->modeset_restore_lock); - intel_opregion_notify_adapter(dev_priv, PCI_D0); intel_autoenable_gt_powersave(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 51411894d2cd5..1d19bb53dfb02 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1614,12 +1614,6 @@ struct i915_gpu_error { unsigned long test_irq_rings; }; -enum modeset_restore { - MODESET_ON_LID_OPEN, - MODESET_DONE, - MODESET_SUSPENDED, -}; - #define DP_AUX_A 0x40 #define DP_AUX_B 0x10 #define DP_AUX_C 0x20 @@ -2296,8 +2290,6 @@ struct drm_i915_private { unsigned long quirks; - enum modeset_restore modeset_restore; - struct mutex modeset_restore_lock; struct drm_atomic_state *modeset_restore_state; struct drm_modeset_acquire_ctx reset_ctx; diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index dae4e22a2c3f8..fe67e458b0031 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -44,8 +44,6 @@ /* Private structure for the integrated LVDS support */ struct intel_lvds_connector { struct intel_connector base; - - struct notifier_block lid_notifier; }; struct intel_lvds_pps { @@ -440,26 +438,9 @@ static bool intel_lvds_compute_config(struct intel_encoder *intel_encoder, return true; } -/** - * Detect the LVDS connection. - * - * Since LVDS doesn't have hotlug, we use the lid as a proxy. Open means - * connected and closed means disconnected. We also send hotplug events as - * needed, using lid status notification from the input layer. - */ static enum drm_connector_status intel_lvds_detect(struct drm_connector *connector, bool force) { - struct drm_i915_private *dev_priv = to_i915(connector->dev); - enum drm_connector_status status; - - DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", - connector->base.id, connector->name); - - status = intel_panel_detect(dev_priv); - if (status != connector_status_unknown) - return status; - return connector_status_connected; } @@ -484,117 +465,6 @@ static int intel_lvds_get_modes(struct drm_connector *connector) return 1; } -static int intel_no_modeset_on_lid_dmi_callback(const struct dmi_system_id *id) -{ - DRM_INFO("Skipping forced modeset for %s\n", id->ident); - return 1; -} - -/* The GPU hangs up on these systems if modeset is performed on LID open */ -static const struct dmi_system_id intel_no_modeset_on_lid[] = { - { - .callback = intel_no_modeset_on_lid_dmi_callback, - .ident = "Toshiba Tecra A11", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), - DMI_MATCH(DMI_PRODUCT_NAME, "TECRA A11"), - }, - }, - - { } /* terminating entry */ -}; - -/* - * Lid events. Note the use of 'modeset': - * - we set it to MODESET_ON_LID_OPEN on lid close, - * and set it to MODESET_DONE on open - * - we use it as a "only once" bit (ie we ignore - * duplicate events where it was already properly set) - * - the suspend/resume paths will set it to - * MODESET_SUSPENDED and ignore the lid open event, - * because they restore the mode ("lid open"). - */ -static int intel_lid_notify(struct notifier_block *nb, unsigned long val, - void *unused) -{ - struct intel_lvds_connector *lvds_connector = - container_of(nb, struct intel_lvds_connector, lid_notifier); - struct drm_connector *connector = &lvds_connector->base.base; - struct drm_device *dev = connector->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - - if (dev->switch_power_state != DRM_SWITCH_POWER_ON) - return NOTIFY_OK; - - mutex_lock(&dev_priv->modeset_restore_lock); - if (dev_priv->modeset_restore == MODESET_SUSPENDED) - goto exit; - /* - * check and update the status of LVDS connector after receiving - * the LID nofication event. - */ - connector->status = connector->funcs->detect(connector, false); - - /* Don't force modeset on machines where it causes a GPU lockup */ - if (dmi_check_system(intel_no_modeset_on_lid)) - goto exit; - if (!acpi_lid_open()) { - /* do modeset on next lid open event */ - dev_priv->modeset_restore = MODESET_ON_LID_OPEN; - goto exit; - } - - if (dev_priv->modeset_restore == MODESET_DONE) - goto exit; - - /* - * Some old platform's BIOS love to wreak havoc while the lid is closed. - * We try to detect this here and undo any damage. The split for PCH - * platforms is rather conservative and a bit arbitrary expect that on - * those platforms VGA disabling requires actual legacy VGA I/O access, - * and as part of the cleanup in the hw state restore we also redisable - * the vga plane. - */ - if (!HAS_PCH_SPLIT(dev_priv)) - intel_display_resume(dev); - - dev_priv->modeset_restore = MODESET_DONE; - -exit: - mutex_unlock(&dev_priv->modeset_restore_lock); - return NOTIFY_OK; -} - -static int -intel_lvds_connector_register(struct drm_connector *connector) -{ - struct intel_lvds_connector *lvds = to_lvds_connector(connector); - int ret; - - ret = intel_connector_register(connector); - if (ret) - return ret; - - lvds->lid_notifier.notifier_call = intel_lid_notify; - if (acpi_lid_notifier_register(&lvds->lid_notifier)) { - DRM_DEBUG_KMS("lid notifier registration failed\n"); - lvds->lid_notifier.notifier_call = NULL; - } - - return 0; -} - -static void -intel_lvds_connector_unregister(struct drm_connector *connector) -{ - struct intel_lvds_connector *lvds = to_lvds_connector(connector); - - if (lvds->lid_notifier.notifier_call) - acpi_lid_notifier_unregister(&lvds->lid_notifier); - - intel_connector_unregister(connector); -} - /** * intel_lvds_destroy - unregister and free LVDS structures * @connector: connector to free @@ -627,8 +497,8 @@ static const struct drm_connector_funcs intel_lvds_connector_funcs = { .fill_modes = drm_helper_probe_single_connector_modes, .atomic_get_property = intel_digital_connector_atomic_get_property, .atomic_set_property = intel_digital_connector_atomic_set_property, - .late_register = intel_lvds_connector_register, - .early_unregister = intel_lvds_connector_unregister, + .late_register = intel_connector_register, + .early_unregister = intel_connector_unregister, .destroy = intel_lvds_destroy, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, .atomic_duplicate_state = intel_digital_connector_duplicate_state, @@ -1091,8 +961,6 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) * 2) check for VBT data * 3) check to see if LVDS is already on * if none of the above, no panel - * 4) make sure lid is open - * if closed, act like it's not there for now */ /* -- GitLab From 8e6173c1246786d344548db3d10745ae250adb41 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Sun, 7 Jan 2018 12:14:24 +0000 Subject: [PATCH 1001/2269] staging: ccree: check DMA pool buf !NULL before free commit 2f7caf6b02145bd9cd9d0b56204f51a5fefe7790 upstream. If we ran out of DMA pool buffers, we get into the unmap code path with a NULL before. Deal with this by checking the virtual mapping is not NULL. Cc: stable@vger.kernel.org Signed-off-by: Gilad Ben-Yossef Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/staging/ccree/ssi_buffer_mgr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/ccree/ssi_buffer_mgr.c b/drivers/staging/ccree/ssi_buffer_mgr.c index 63936091d524e..4ba6e9c422c46 100644 --- a/drivers/staging/ccree/ssi_buffer_mgr.c +++ b/drivers/staging/ccree/ssi_buffer_mgr.c @@ -492,7 +492,8 @@ void ssi_buffer_mgr_unmap_blkcipher_request( DMA_TO_DEVICE); } /* Release pool */ - if (req_ctx->dma_buf_type == SSI_DMA_BUF_MLLI) { + if (req_ctx->dma_buf_type == SSI_DMA_BUF_MLLI && + req_ctx->mlli_params.mlli_virt_addr) { dma_pool_free(req_ctx->mlli_params.curr_pool, req_ctx->mlli_params.mlli_virt_addr, req_ctx->mlli_params.mlli_dma_addr); -- GitLab From 2a797fd8f8aeca8d120209d137b241d60bb48063 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 26 Jul 2018 16:37:15 -0700 Subject: [PATCH 1002/2269] mm: disallow mappings that conflict for devm_memremap_pages() commit 15d36fecd0bdc7510b70a0e5ec6671140b3fce0c upstream. When pmem namespaces created are smaller than section size, this can cause an issue during removal and gpf was observed: general protection fault: 0000 1 SMP PTI CPU: 36 PID: 3941 Comm: ndctl Tainted: G W 4.14.28-1.el7uek.x86_64 #2 task: ffff88acda150000 task.stack: ffffc900233a4000 RIP: 0010:__put_page+0x56/0x79 Call Trace: devm_memremap_pages_release+0x155/0x23a release_nodes+0x21e/0x260 devres_release_all+0x3c/0x48 device_release_driver_internal+0x15c/0x207 device_release_driver+0x12/0x14 unbind_store+0xba/0xd8 drv_attr_store+0x27/0x31 sysfs_kf_write+0x3f/0x46 kernfs_fop_write+0x10f/0x18b __vfs_write+0x3a/0x16d vfs_write+0xb2/0x1a1 SyS_write+0x55/0xb9 do_syscall_64+0x79/0x1ae entry_SYSCALL_64_after_hwframe+0x3d/0x0 Add code to check whether we have a mapping already in the same section and prevent additional mappings from being created if that is the case. Link: http://lkml.kernel.org/r/152909478401.50143.312364396244072931.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Dave Jiang Cc: Dan Williams Cc: Robert Elliott Cc: Jeff Moyer Cc: Matthew Wilcox Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- kernel/memremap.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/kernel/memremap.c b/kernel/memremap.c index 2b136d4988f78..790ddf3bce19f 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -355,10 +355,27 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, struct dev_pagemap *pgmap; struct page_map *page_map; int error, nid, is_ram, i = 0; + struct dev_pagemap *conflict_pgmap; align_start = res->start & ~(SECTION_SIZE - 1); align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) - align_start; + align_end = align_start + align_size - 1; + + conflict_pgmap = get_dev_pagemap(PHYS_PFN(align_start), NULL); + if (conflict_pgmap) { + dev_WARN(dev, "Conflicting mapping in same section\n"); + put_dev_pagemap(conflict_pgmap); + return ERR_PTR(-ENOMEM); + } + + conflict_pgmap = get_dev_pagemap(PHYS_PFN(align_end), NULL); + if (conflict_pgmap) { + dev_WARN(dev, "Conflicting mapping in same section\n"); + put_dev_pagemap(conflict_pgmap); + return ERR_PTR(-ENOMEM); + } + is_ram = region_intersects(align_start, align_size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE); @@ -396,7 +413,6 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, mutex_lock(&pgmap_lock); error = 0; - align_end = align_start + align_size - 1; foreach_order_pgoff(res, order, pgoff) { struct dev_pagemap *dup; -- GitLab From faf578e3593e69613aed559dc96c3594f27324f3 Mon Sep 17 00:00:00 2001 From: Clint Taylor Date: Tue, 10 Jul 2018 13:02:05 -0700 Subject: [PATCH 1003/2269] drm/i915/glk: Add Quirk for GLK NUC HDMI port issues. commit 0ca9488193e61ec5f31a631d8147f74525629e8a upstream. On GLK NUC platforms the HDMI retiming buffer needs additional disabled time to correctly sync to a faster incoming signal. When measured on a scope the highspeed lines of the HDMI clock turn off for ~400uS during a normal resolution change. The HDMI retimer on the GLK NUC appears to require at least a full frame of quiet time before a new faster clock can be correctly sync'd. Wait 100ms due to msleep inaccuracies while waiting for a completed frame. Add a quirk to the driver for GLK boards that use ITE66317 HDMI retimers. V2: Add more devices to the quirk list V3: Delay increased to 100ms, check to confirm crtc type is HDMI. V4: crtc type check extended to include _DDI and whitespace fixes v5: Fix white spaces, remove the macro for delay. Revert the crtc type check introduced in v4. Cc: Imre Deak Cc: # v4.14+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105887 Signed-off-by: Clint Taylor Tested-by: Daniel Scheller Signed-off-by: Radhakrishna Sripada Signed-off-by: Imre Deak Reviewed-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20180710200205.1478-1-radhakrishna.sripada@intel.com (cherry picked from commit 90c3e2198777aaa355b6994a31a79c636c8d4306) Signed-off-by: Rodrigo Vivi Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_ddi.c | 13 +++++++++++-- drivers/gpu/drm/i915/intel_display.c | 21 ++++++++++++++++++++- drivers/gpu/drm/i915/intel_drv.h | 3 +-- 4 files changed, 33 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1d19bb53dfb02..41f51509c9e42 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1183,6 +1183,7 @@ enum intel_sbi_destination { #define QUIRK_BACKLIGHT_PRESENT (1<<3) #define QUIRK_PIN_SWIZZLED_PAGES (1<<5) #define QUIRK_INCREASE_T12_DELAY (1<<6) +#define QUIRK_INCREASE_DDI_DISABLED_TIME (1<<7) struct intel_fbdev; struct intel_fbc_work; diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 3a4a581345c43..77085b9bcb306 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1526,15 +1526,24 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), temp); } -void intel_ddi_disable_transcoder_func(struct drm_i915_private *dev_priv, - enum transcoder cpu_transcoder) +void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; i915_reg_t reg = TRANS_DDI_FUNC_CTL(cpu_transcoder); uint32_t val = I915_READ(reg); val &= ~(TRANS_DDI_FUNC_ENABLE | TRANS_DDI_PORT_MASK | TRANS_DDI_DP_VC_PAYLOAD_ALLOC); val |= TRANS_DDI_PORT_NONE; I915_WRITE(reg, val); + + if (dev_priv->quirks & QUIRK_INCREASE_DDI_DISABLED_TIME && + intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) { + DRM_DEBUG_KMS("Quirk Increase DDI disabled time\n"); + /* Quirk time at 100ms for reliable operation */ + msleep(100); + } } bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index cf648c526e123..2006ab44fbf9b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5653,7 +5653,7 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, intel_ddi_set_vc_payload_alloc(intel_crtc->config, false); if (!transcoder_is_dsi(cpu_transcoder)) - intel_ddi_disable_transcoder_func(dev_priv, cpu_transcoder); + intel_ddi_disable_transcoder_func(old_crtc_state); if (INTEL_GEN(dev_priv) >= 9) skylake_scaler_disable(intel_crtc); @@ -14286,6 +14286,18 @@ static void quirk_increase_t12_delay(struct drm_device *dev) DRM_INFO("Applying T12 delay quirk\n"); } +/* + * GeminiLake NUC HDMI outputs require additional off time + * this allows the onboard retimer to correctly sync to signal + */ +static void quirk_increase_ddi_disabled_time(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + + dev_priv->quirks |= QUIRK_INCREASE_DDI_DISABLED_TIME; + DRM_INFO("Applying Increase DDI Disabled quirk\n"); +} + struct intel_quirk { int device; int subsystem_vendor; @@ -14372,6 +14384,13 @@ static struct intel_quirk intel_quirks[] = { /* Toshiba Satellite P50-C-18C */ { 0x191B, 0x1179, 0xF840, quirk_increase_t12_delay }, + + /* GeminiLake NUC */ + { 0x3185, 0x8086, 0x2072, quirk_increase_ddi_disabled_time }, + { 0x3184, 0x8086, 0x2072, quirk_increase_ddi_disabled_time }, + /* ASRock ITX*/ + { 0x3185, 0x1849, 0x2212, quirk_increase_ddi_disabled_time }, + { 0x3184, 0x1849, 0x2212, quirk_increase_ddi_disabled_time }, }; static void intel_init_quirks(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 589905aab1851..3adb9c3b412e7 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1254,8 +1254,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port); enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder); bool intel_ddi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe); void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state); -void intel_ddi_disable_transcoder_func(struct drm_i915_private *dev_priv, - enum transcoder cpu_transcoder); +void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state); void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state); void intel_ddi_disable_pipe_clock(const struct intel_crtc_state *crtc_state); struct intel_encoder * -- GitLab From 4d5af836674f58daf5f7bf6b5536f58406d400d7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 28 Jun 2018 22:45:38 +0200 Subject: [PATCH 1004/2269] i2c: rcar: handle RXDMA HW behaviour on Gen3 commit 2b16fd63059ab9a46d473620749672dc342e1d21 upstream. On Gen3, we can only do RXDMA once per transfer reliably. For that, we must reset the device, then we can have RXDMA once. This patch implements this. When there is no reset controller or the reset fails, RXDMA will be blocked completely. Otherwise, it will be disabled after the first RXDMA transfer. Based on a commit from the BSP by Hiromitsu Yamasaki, yet completely refactored to handle multiple read messages within one transfer. Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Signed-off-by: Wolfram Sang Cc: stable@kernel.org Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-rcar.c | 54 +++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index 15d764afec3b2..7f044df1ea070 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -32,6 +32,7 @@ #include #include #include +#include #include /* register offsets */ @@ -111,8 +112,9 @@ #define ID_ARBLOST (1 << 3) #define ID_NACK (1 << 4) /* persistent flags */ +#define ID_P_NO_RXDMA (1 << 30) /* HW forbids RXDMA sometimes */ #define ID_P_PM_BLOCKED (1 << 31) -#define ID_P_MASK ID_P_PM_BLOCKED +#define ID_P_MASK (ID_P_PM_BLOCKED | ID_P_NO_RXDMA) enum rcar_i2c_type { I2C_RCAR_GEN1, @@ -140,6 +142,8 @@ struct rcar_i2c_priv { struct dma_chan *dma_rx; struct scatterlist sg; enum dma_data_direction dma_direction; + + struct reset_control *rstc; }; #define rcar_i2c_priv_to_dev(p) ((p)->adap.dev.parent) @@ -321,6 +325,11 @@ static void rcar_i2c_dma_unmap(struct rcar_i2c_priv *priv) dma_unmap_single(chan->device->dev, sg_dma_address(&priv->sg), sg_dma_len(&priv->sg), priv->dma_direction); + /* Gen3 can only do one RXDMA per transfer and we just completed it */ + if (priv->devtype == I2C_RCAR_GEN3 && + priv->dma_direction == DMA_FROM_DEVICE) + priv->flags |= ID_P_NO_RXDMA; + priv->dma_direction = DMA_NONE; } @@ -358,8 +367,9 @@ static void rcar_i2c_dma(struct rcar_i2c_priv *priv) unsigned char *buf; int len; - /* Do not use DMA if it's not available or for messages < 8 bytes */ - if (IS_ERR(chan) || msg->len < 8) + /* Do various checks to see if DMA is feasible at all */ + if (IS_ERR(chan) || msg->len < 8 || + (read && priv->flags & ID_P_NO_RXDMA)) return; if (read) { @@ -688,6 +698,25 @@ static void rcar_i2c_release_dma(struct rcar_i2c_priv *priv) } } +/* I2C is a special case, we need to poll the status of a reset */ +static int rcar_i2c_do_reset(struct rcar_i2c_priv *priv) +{ + int i, ret; + + ret = reset_control_reset(priv->rstc); + if (ret) + return ret; + + for (i = 0; i < LOOP_TIMEOUT; i++) { + ret = reset_control_status(priv->rstc); + if (ret == 0) + return 0; + udelay(1); + } + + return -ETIMEDOUT; +} + static int rcar_i2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) @@ -699,6 +728,16 @@ static int rcar_i2c_master_xfer(struct i2c_adapter *adap, pm_runtime_get_sync(dev); + /* Gen3 needs a reset before allowing RXDMA once */ + if (priv->devtype == I2C_RCAR_GEN3) { + priv->flags |= ID_P_NO_RXDMA; + if (!IS_ERR(priv->rstc)) { + ret = rcar_i2c_do_reset(priv); + if (ret == 0) + priv->flags &= ~ID_P_NO_RXDMA; + } + } + rcar_i2c_init(priv); ret = rcar_i2c_bus_barrier(priv); @@ -868,6 +907,15 @@ static int rcar_i2c_probe(struct platform_device *pdev) if (ret < 0) goto out_pm_put; + if (priv->devtype == I2C_RCAR_GEN3) { + priv->rstc = devm_reset_control_get_exclusive(&pdev->dev, NULL); + if (!IS_ERR(priv->rstc)) { + ret = reset_control_status(priv->rstc); + if (ret < 0) + priv->rstc = ERR_PTR(-ENOTSUPP); + } + } + /* Stay always active when multi-master to keep arbitration working */ if (of_property_read_bool(dev->of_node, "multi-master")) priv->flags |= ID_P_PM_BLOCKED; -- GitLab From 6edd85a7870a1ed85598caf87a19d72a919c8b3d Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Thu, 20 Sep 2018 12:59:14 -0700 Subject: [PATCH 1005/2269] IB/hfi1: Fix destroy_qp hang after a link down commit b4a4957d3d1c328b733fce783b7264996f866ad2 upstream. rvt_destroy_qp() cannot complete until all in process packets have been released from the underlying hardware. If a link down event occurs, an application can hang with a kernel stack similar to: cat /proc//stack quiesce_qp+0x178/0x250 [hfi1] rvt_reset_qp+0x23d/0x400 [rdmavt] rvt_destroy_qp+0x69/0x210 [rdmavt] ib_destroy_qp+0xba/0x1c0 [ib_core] nvme_rdma_destroy_queue_ib+0x46/0x80 [nvme_rdma] nvme_rdma_free_queue+0x3c/0xd0 [nvme_rdma] nvme_rdma_destroy_io_queues+0x88/0xd0 [nvme_rdma] nvme_rdma_error_recovery_work+0x52/0xf0 [nvme_rdma] process_one_work+0x17a/0x440 worker_thread+0x126/0x3c0 kthread+0xcf/0xe0 ret_from_fork+0x58/0x90 0xffffffffffffffff quiesce_qp() waits until all outstanding packets have been freed. This wait should be momentary. During a link down event, the cleanup handling does not ensure that all packets caught by the link down are flushed properly. This is caused by the fact that the freeze path and the link down event is handled the same. This is not correct. The freeze path waits until the HFI is unfrozen and then restarts PIO. A link down is not a freeze event. The link down path cannot restart the PIO until link is restored. If the PIO path is restarted before the link comes up, the application (QP) using the PIO path will hang (until link is restored). Fix by separating the linkdown path from the freeze path and use the link down path for link down events. Close a race condition sc_disable() by acquiring both the progress and release locks. Close a race condition in sc_stop() by moving the setting of the flag bits under the alloc lock. Cc: # 4.9.x+ Fixes: 7724105686e7 ("IB/hfi1: add driver files") Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/chip.c | 7 +++++- drivers/infiniband/hw/hfi1/pio.c | 42 +++++++++++++++++++++++++------ drivers/infiniband/hw/hfi1/pio.h | 2 ++ 3 files changed, 42 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 33cf1734c4e5d..f9faacce92509 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -6722,6 +6722,7 @@ void start_freeze_handling(struct hfi1_pportdata *ppd, int flags) struct hfi1_devdata *dd = ppd->dd; struct send_context *sc; int i; + int sc_flags; if (flags & FREEZE_SELF) write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK); @@ -6732,11 +6733,13 @@ void start_freeze_handling(struct hfi1_pportdata *ppd, int flags) /* notify all SDMA engines that they are going into a freeze */ sdma_freeze_notify(dd, !!(flags & FREEZE_LINK_DOWN)); + sc_flags = SCF_FROZEN | SCF_HALTED | (flags & FREEZE_LINK_DOWN ? + SCF_LINK_DOWN : 0); /* do halt pre-handling on all enabled send contexts */ for (i = 0; i < dd->num_send_contexts; i++) { sc = dd->send_contexts[i].sc; if (sc && (sc->flags & SCF_ENABLED)) - sc_stop(sc, SCF_FROZEN | SCF_HALTED); + sc_stop(sc, sc_flags); } /* Send context are frozen. Notify user space */ @@ -10646,6 +10649,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); handle_linkup_change(dd, 1); + pio_kernel_linkup(dd); + ppd->host_link_state = HLS_UP_INIT; break; case HLS_UP_ARMED: diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 19a8e6052820f..07bf282fd8aaf 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -942,20 +942,18 @@ void sc_free(struct send_context *sc) void sc_disable(struct send_context *sc) { u64 reg; - unsigned long flags; struct pio_buf *pbuf; if (!sc) return; /* do all steps, even if already disabled */ - spin_lock_irqsave(&sc->alloc_lock, flags); + spin_lock_irq(&sc->alloc_lock); reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL)); reg &= ~SC(CTRL_CTXT_ENABLE_SMASK); sc->flags &= ~SCF_ENABLED; sc_wait_for_packet_egress(sc, 1); write_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL), reg); - spin_unlock_irqrestore(&sc->alloc_lock, flags); /* * Flush any waiters. Once the context is disabled, @@ -965,7 +963,7 @@ void sc_disable(struct send_context *sc) * proceed with the flush. */ udelay(1); - spin_lock_irqsave(&sc->release_lock, flags); + spin_lock(&sc->release_lock); if (sc->sr) { /* this context has a shadow ring */ while (sc->sr_tail != sc->sr_head) { pbuf = &sc->sr[sc->sr_tail].pbuf; @@ -976,7 +974,8 @@ void sc_disable(struct send_context *sc) sc->sr_tail = 0; } } - spin_unlock_irqrestore(&sc->release_lock, flags); + spin_unlock(&sc->release_lock); + spin_unlock_irq(&sc->alloc_lock); } /* return SendEgressCtxtStatus.PacketOccupancy */ @@ -1199,11 +1198,39 @@ void pio_kernel_unfreeze(struct hfi1_devdata *dd) sc = dd->send_contexts[i].sc; if (!sc || !(sc->flags & SCF_FROZEN) || sc->type == SC_USER) continue; + if (sc->flags & SCF_LINK_DOWN) + continue; sc_enable(sc); /* will clear the sc frozen flag */ } } +/** + * pio_kernel_linkup() - Re-enable send contexts after linkup event + * @dd: valid devive data + * + * When the link goes down, the freeze path is taken. However, a link down + * event is different from a freeze because if the send context is re-enabled + * whowever is sending data will start sending data again, which will hang + * any QP that is sending data. + * + * The freeze path now looks at the type of event that occurs and takes this + * path for link down event. + */ +void pio_kernel_linkup(struct hfi1_devdata *dd) +{ + struct send_context *sc; + int i; + + for (i = 0; i < dd->num_send_contexts; i++) { + sc = dd->send_contexts[i].sc; + if (!sc || !(sc->flags & SCF_LINK_DOWN) || sc->type == SC_USER) + continue; + + sc_enable(sc); /* will clear the sc link down flag */ + } +} + /* * Wait for the SendPioInitCtxt.PioInitInProgress bit to clear. * Returns: @@ -1403,11 +1430,10 @@ void sc_stop(struct send_context *sc, int flag) { unsigned long flags; - /* mark the context */ - sc->flags |= flag; - /* stop buffer allocations */ spin_lock_irqsave(&sc->alloc_lock, flags); + /* mark the context */ + sc->flags |= flag; sc->flags &= ~SCF_ENABLED; spin_unlock_irqrestore(&sc->alloc_lock, flags); wake_up(&sc->halt_wait); diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index 99ca5edb0b435..c7c4e6e5d3174 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -145,6 +145,7 @@ struct send_context { #define SCF_IN_FREE 0x02 #define SCF_HALTED 0x04 #define SCF_FROZEN 0x08 +#define SCF_LINK_DOWN 0x10 struct send_context_info { struct send_context *sc; /* allocated working context */ @@ -312,6 +313,7 @@ void set_pio_integrity(struct send_context *sc); void pio_reset_all(struct hfi1_devdata *dd); void pio_freeze(struct hfi1_devdata *dd); void pio_kernel_unfreeze(struct hfi1_devdata *dd); +void pio_kernel_linkup(struct hfi1_devdata *dd); /* global PIO send control operations */ #define PSC_GLOBAL_ENABLE 0 -- GitLab From e7405910ca5553eae8744af4e5c03e64ee048cb1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 20 Oct 2018 09:48:54 +0200 Subject: [PATCH 1006/2269] Linux 4.14.78 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 16d1a18496fb1..89574ee68d6b4 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 77 +SUBLEVEL = 78 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 561e1f842f30222389e0910c43610f11e3475406 Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Mon, 15 Oct 2018 14:53:36 -0700 Subject: [PATCH 1007/2269] Revert "fscrypt: add Speck128/256 support" This reverts commit e81950ade16d103cfebc396ef90cd00169561eb8. Resolves conflicts with a later CL, 755a8a8a3bfc "fscrypt: log the crypto algorithm implementations". Also leaves the include/uapi/linux/fs.h constants in place to prevent future accidental re-use. Bug: 116008047 Change-Id: I2d64d8d3e384400b7bdfc06a353c3844d4ebb377 Signed-off-by: Alistair Strachan --- Documentation/filesystems/fscrypt.rst | 626 -------------------------- fs/crypto/fscrypt_private.h | 4 - fs/crypto/keyinfo.c | 10 - 3 files changed, 640 deletions(-) delete mode 100644 Documentation/filesystems/fscrypt.rst diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst deleted file mode 100644 index 48b424de85bbc..0000000000000 --- a/Documentation/filesystems/fscrypt.rst +++ /dev/null @@ -1,626 +0,0 @@ -===================================== -Filesystem-level encryption (fscrypt) -===================================== - -Introduction -============ - -fscrypt is a library which filesystems can hook into to support -transparent encryption of files and directories. - -Note: "fscrypt" in this document refers to the kernel-level portion, -implemented in ``fs/crypto/``, as opposed to the userspace tool -`fscrypt `_. This document only -covers the kernel-level portion. For command-line examples of how to -use encryption, see the documentation for the userspace tool `fscrypt -`_. Also, it is recommended to use -the fscrypt userspace tool, or other existing userspace tools such as -`fscryptctl `_ or `Android's key -management system -`_, over -using the kernel's API directly. Using existing tools reduces the -chance of introducing your own security bugs. (Nevertheless, for -completeness this documentation covers the kernel's API anyway.) - -Unlike dm-crypt, fscrypt operates at the filesystem level rather than -at the block device level. This allows it to encrypt different files -with different keys and to have unencrypted files on the same -filesystem. This is useful for multi-user systems where each user's -data-at-rest needs to be cryptographically isolated from the others. -However, except for filenames, fscrypt does not encrypt filesystem -metadata. - -Unlike eCryptfs, which is a stacked filesystem, fscrypt is integrated -directly into supported filesystems --- currently ext4, F2FS, and -UBIFS. This allows encrypted files to be read and written without -caching both the decrypted and encrypted pages in the pagecache, -thereby nearly halving the memory used and bringing it in line with -unencrypted files. Similarly, half as many dentries and inodes are -needed. eCryptfs also limits encrypted filenames to 143 bytes, -causing application compatibility issues; fscrypt allows the full 255 -bytes (NAME_MAX). Finally, unlike eCryptfs, the fscrypt API can be -used by unprivileged users, with no need to mount anything. - -fscrypt does not support encrypting files in-place. Instead, it -supports marking an empty directory as encrypted. Then, after -userspace provides the key, all regular files, directories, and -symbolic links created in that directory tree are transparently -encrypted. - -Threat model -============ - -Offline attacks ---------------- - -Provided that userspace chooses a strong encryption key, fscrypt -protects the confidentiality of file contents and filenames in the -event of a single point-in-time permanent offline compromise of the -block device content. fscrypt does not protect the confidentiality of -non-filename metadata, e.g. file sizes, file permissions, file -timestamps, and extended attributes. Also, the existence and location -of holes (unallocated blocks which logically contain all zeroes) in -files is not protected. - -fscrypt is not guaranteed to protect confidentiality or authenticity -if an attacker is able to manipulate the filesystem offline prior to -an authorized user later accessing the filesystem. - -Online attacks --------------- - -fscrypt (and storage encryption in general) can only provide limited -protection, if any at all, against online attacks. In detail: - -fscrypt is only resistant to side-channel attacks, such as timing or -electromagnetic attacks, to the extent that the underlying Linux -Cryptographic API algorithms are. If a vulnerable algorithm is used, -such as a table-based implementation of AES, it may be possible for an -attacker to mount a side channel attack against the online system. -Side channel attacks may also be mounted against applications -consuming decrypted data. - -After an encryption key has been provided, fscrypt is not designed to -hide the plaintext file contents or filenames from other users on the -same system, regardless of the visibility of the keyring key. -Instead, existing access control mechanisms such as file mode bits, -POSIX ACLs, LSMs, or mount namespaces should be used for this purpose. -Also note that as long as the encryption keys are *anywhere* in -memory, an online attacker can necessarily compromise them by mounting -a physical attack or by exploiting any kernel security vulnerability -which provides an arbitrary memory read primitive. - -While it is ostensibly possible to "evict" keys from the system, -recently accessed encrypted files will remain accessible at least -until the filesystem is unmounted or the VFS caches are dropped, e.g. -using ``echo 2 > /proc/sys/vm/drop_caches``. Even after that, if the -RAM is compromised before being powered off, it will likely still be -possible to recover portions of the plaintext file contents, if not -some of the encryption keys as well. (Since Linux v4.12, all -in-kernel keys related to fscrypt are sanitized before being freed. -However, userspace would need to do its part as well.) - -Currently, fscrypt does not prevent a user from maliciously providing -an incorrect key for another user's existing encrypted files. A -protection against this is planned. - -Key hierarchy -============= - -Master Keys ------------ - -Each encrypted directory tree is protected by a *master key*. Master -keys can be up to 64 bytes long, and must be at least as long as the -greater of the key length needed by the contents and filenames -encryption modes being used. For example, if AES-256-XTS is used for -contents encryption, the master key must be 64 bytes (512 bits). Note -that the XTS mode is defined to require a key twice as long as that -required by the underlying block cipher. - -To "unlock" an encrypted directory tree, userspace must provide the -appropriate master key. There can be any number of master keys, each -of which protects any number of directory trees on any number of -filesystems. - -Userspace should generate master keys either using a cryptographically -secure random number generator, or by using a KDF (Key Derivation -Function). Note that whenever a KDF is used to "stretch" a -lower-entropy secret such as a passphrase, it is critical that a KDF -designed for this purpose be used, such as scrypt, PBKDF2, or Argon2. - -Per-file keys -------------- - -Master keys are not used to encrypt file contents or names directly. -Instead, a unique key is derived for each encrypted file, including -each regular file, directory, and symbolic link. This has several -advantages: - -- In cryptosystems, the same key material should never be used for - different purposes. Using the master key as both an XTS key for - contents encryption and as a CTS-CBC key for filenames encryption - would violate this rule. -- Per-file keys simplify the choice of IVs (Initialization Vectors) - for contents encryption. Without per-file keys, to ensure IV - uniqueness both the inode and logical block number would need to be - encoded in the IVs. This would make it impossible to renumber - inodes, which e.g. ``resize2fs`` can do when resizing an ext4 - filesystem. With per-file keys, it is sufficient to encode just the - logical block number in the IVs. -- Per-file keys strengthen the encryption of filenames, where IVs are - reused out of necessity. With a unique key per directory, IV reuse - is limited to within a single directory. -- Per-file keys allow individual files to be securely erased simply by - securely erasing their keys. (Not yet implemented.) - -A KDF (Key Derivation Function) is used to derive per-file keys from -the master key. This is done instead of wrapping a randomly-generated -key for each file because it reduces the size of the encryption xattr, -which for some filesystems makes the xattr more likely to fit in-line -in the filesystem's inode table. With a KDF, only a 16-byte nonce is -required --- long enough to make key reuse extremely unlikely. A -wrapped key, on the other hand, would need to be up to 64 bytes --- -the length of an AES-256-XTS key. Furthermore, currently there is no -requirement to support unlocking a file with multiple alternative -master keys or to support rotating master keys. Instead, the master -keys may be wrapped in userspace, e.g. as done by the `fscrypt -`_ tool. - -The current KDF encrypts the master key using the 16-byte nonce as an -AES-128-ECB key. The output is used as the derived key. If the -output is longer than needed, then it is truncated to the needed -length. Truncation is the norm for directories and symlinks, since -those use the CTS-CBC encryption mode which requires a key half as -long as that required by the XTS encryption mode. - -Note: this KDF meets the primary security requirement, which is to -produce unique derived keys that preserve the entropy of the master -key, assuming that the master key is already a good pseudorandom key. -However, it is nonstandard and has some problems such as being -reversible, so it is generally considered to be a mistake! It may be -replaced with HKDF or another more standard KDF in the future. - -Encryption modes and usage -========================== - -fscrypt allows one encryption mode to be specified for file contents -and one encryption mode to be specified for filenames. Different -directory trees are permitted to use different encryption modes. -Currently, the following pairs of encryption modes are supported: - -- AES-256-XTS for contents and AES-256-CTS-CBC for filenames -- AES-128-CBC for contents and AES-128-CTS-CBC for filenames -- Speck128/256-XTS for contents and Speck128/256-CTS-CBC for filenames - -It is strongly recommended to use AES-256-XTS for contents encryption. -AES-128-CBC was added only for low-powered embedded devices with -crypto accelerators such as CAAM or CESA that do not support XTS. - -Similarly, Speck128/256 support was only added for older or low-end -CPUs which cannot do AES fast enough -- especially ARM CPUs which have -NEON instructions but not the Cryptography Extensions -- and for which -it would not otherwise be feasible to use encryption at all. It is -not recommended to use Speck on CPUs that have AES instructions. -Speck support is only available if it has been enabled in the crypto -API via CONFIG_CRYPTO_SPECK. Also, on ARM platforms, to get -acceptable performance CONFIG_CRYPTO_SPECK_NEON must be enabled. - -New encryption modes can be added relatively easily, without changes -to individual filesystems. However, authenticated encryption (AE) -modes are not currently supported because of the difficulty of dealing -with ciphertext expansion. - -For file contents, each filesystem block is encrypted independently. -Currently, only the case where the filesystem block size is equal to -the system's page size (usually 4096 bytes) is supported. With the -XTS mode of operation (recommended), the logical block number within -the file is used as the IV. With the CBC mode of operation (not -recommended), ESSIV is used; specifically, the IV for CBC is the -logical block number encrypted with AES-256, where the AES-256 key is -the SHA-256 hash of the inode's data encryption key. - -For filenames, the full filename is encrypted at once. Because of the -requirements to retain support for efficient directory lookups and -filenames of up to 255 bytes, a constant initialization vector (IV) is -used. However, each encrypted directory uses a unique key, which -limits IV reuse to within a single directory. Note that IV reuse in -the context of CTS-CBC encryption means that when the original -filenames share a common prefix at least as long as the cipher block -size (16 bytes for AES), the corresponding encrypted filenames will -also share a common prefix. This is undesirable; it may be fixed in -the future by switching to an encryption mode that is a strong -pseudorandom permutation on arbitrary-length messages, e.g. the HEH -(Hash-Encrypt-Hash) mode. - -Since filenames are encrypted with the CTS-CBC mode of operation, the -plaintext and ciphertext filenames need not be multiples of the AES -block size, i.e. 16 bytes. However, the minimum size that can be -encrypted is 16 bytes, so shorter filenames are NUL-padded to 16 bytes -before being encrypted. In addition, to reduce leakage of filename -lengths via their ciphertexts, all filenames are NUL-padded to the -next 4, 8, 16, or 32-byte boundary (configurable). 32 is recommended -since this provides the best confidentiality, at the cost of making -directory entries consume slightly more space. Note that since NUL -(``\0``) is not otherwise a valid character in filenames, the padding -will never produce duplicate plaintexts. - -Symbolic link targets are considered a type of filename and are -encrypted in the same way as filenames in directory entries. Each -symlink also uses a unique key; hence, the hardcoded IV is not a -problem for symlinks. - -User API -======== - -Setting an encryption policy ----------------------------- - -The FS_IOC_SET_ENCRYPTION_POLICY ioctl sets an encryption policy on an -empty directory or verifies that a directory or regular file already -has the specified encryption policy. It takes in a pointer to a -:c:type:`struct fscrypt_policy`, defined as follows:: - - #define FS_KEY_DESCRIPTOR_SIZE 8 - - struct fscrypt_policy { - __u8 version; - __u8 contents_encryption_mode; - __u8 filenames_encryption_mode; - __u8 flags; - __u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE]; - }; - -This structure must be initialized as follows: - -- ``version`` must be 0. - -- ``contents_encryption_mode`` and ``filenames_encryption_mode`` must - be set to constants from ```` which identify the - encryption modes to use. If unsure, use - FS_ENCRYPTION_MODE_AES_256_XTS (1) for ``contents_encryption_mode`` - and FS_ENCRYPTION_MODE_AES_256_CTS (4) for - ``filenames_encryption_mode``. - -- ``flags`` must be set to a value from ```` which - identifies the amount of NUL-padding to use when encrypting - filenames. If unsure, use FS_POLICY_FLAGS_PAD_32 (0x3). - -- ``master_key_descriptor`` specifies how to find the master key in - the keyring; see `Adding keys`_. It is up to userspace to choose a - unique ``master_key_descriptor`` for each master key. The e4crypt - and fscrypt tools use the first 8 bytes of - ``SHA-512(SHA-512(master_key))``, but this particular scheme is not - required. Also, the master key need not be in the keyring yet when - FS_IOC_SET_ENCRYPTION_POLICY is executed. However, it must be added - before any files can be created in the encrypted directory. - -If the file is not yet encrypted, then FS_IOC_SET_ENCRYPTION_POLICY -verifies that the file is an empty directory. If so, the specified -encryption policy is assigned to the directory, turning it into an -encrypted directory. After that, and after providing the -corresponding master key as described in `Adding keys`_, all regular -files, directories (recursively), and symlinks created in the -directory will be encrypted, inheriting the same encryption policy. -The filenames in the directory's entries will be encrypted as well. - -Alternatively, if the file is already encrypted, then -FS_IOC_SET_ENCRYPTION_POLICY validates that the specified encryption -policy exactly matches the actual one. If they match, then the ioctl -returns 0. Otherwise, it fails with EEXIST. This works on both -regular files and directories, including nonempty directories. - -Note that the ext4 filesystem does not allow the root directory to be -encrypted, even if it is empty. Users who want to encrypt an entire -filesystem with one key should consider using dm-crypt instead. - -FS_IOC_SET_ENCRYPTION_POLICY can fail with the following errors: - -- ``EACCES``: the file is not owned by the process's uid, nor does the - process have the CAP_FOWNER capability in a namespace with the file - owner's uid mapped -- ``EEXIST``: the file is already encrypted with an encryption policy - different from the one specified -- ``EINVAL``: an invalid encryption policy was specified (invalid - version, mode(s), or flags) -- ``ENOTDIR``: the file is unencrypted and is a regular file, not a - directory -- ``ENOTEMPTY``: the file is unencrypted and is a nonempty directory -- ``ENOTTY``: this type of filesystem does not implement encryption -- ``EOPNOTSUPP``: the kernel was not configured with encryption - support for this filesystem, or the filesystem superblock has not - had encryption enabled on it. (For example, to use encryption on an - ext4 filesystem, CONFIG_EXT4_ENCRYPTION must be enabled in the - kernel config, and the superblock must have had the "encrypt" - feature flag enabled using ``tune2fs -O encrypt`` or ``mkfs.ext4 -O - encrypt``.) -- ``EPERM``: this directory may not be encrypted, e.g. because it is - the root directory of an ext4 filesystem -- ``EROFS``: the filesystem is readonly - -Getting an encryption policy ----------------------------- - -The FS_IOC_GET_ENCRYPTION_POLICY ioctl retrieves the :c:type:`struct -fscrypt_policy`, if any, for a directory or regular file. See above -for the struct definition. No additional permissions are required -beyond the ability to open the file. - -FS_IOC_GET_ENCRYPTION_POLICY can fail with the following errors: - -- ``EINVAL``: the file is encrypted, but it uses an unrecognized - encryption context format -- ``ENODATA``: the file is not encrypted -- ``ENOTTY``: this type of filesystem does not implement encryption -- ``EOPNOTSUPP``: the kernel was not configured with encryption - support for this filesystem - -Note: if you only need to know whether a file is encrypted or not, on -most filesystems it is also possible to use the FS_IOC_GETFLAGS ioctl -and check for FS_ENCRYPT_FL, or to use the statx() system call and -check for STATX_ATTR_ENCRYPTED in stx_attributes. - -Getting the per-filesystem salt -------------------------------- - -Some filesystems, such as ext4 and F2FS, also support the deprecated -ioctl FS_IOC_GET_ENCRYPTION_PWSALT. This ioctl retrieves a randomly -generated 16-byte value stored in the filesystem superblock. This -value is intended to used as a salt when deriving an encryption key -from a passphrase or other low-entropy user credential. - -FS_IOC_GET_ENCRYPTION_PWSALT is deprecated. Instead, prefer to -generate and manage any needed salt(s) in userspace. - -Adding keys ------------ - -To provide a master key, userspace must add it to an appropriate -keyring using the add_key() system call (see: -``Documentation/security/keys/core.rst``). The key type must be -"logon"; keys of this type are kept in kernel memory and cannot be -read back by userspace. The key description must be "fscrypt:" -followed by the 16-character lower case hex representation of the -``master_key_descriptor`` that was set in the encryption policy. The -key payload must conform to the following structure:: - - #define FS_MAX_KEY_SIZE 64 - - struct fscrypt_key { - u32 mode; - u8 raw[FS_MAX_KEY_SIZE]; - u32 size; - }; - -``mode`` is ignored; just set it to 0. The actual key is provided in -``raw`` with ``size`` indicating its size in bytes. That is, the -bytes ``raw[0..size-1]`` (inclusive) are the actual key. - -The key description prefix "fscrypt:" may alternatively be replaced -with a filesystem-specific prefix such as "ext4:". However, the -filesystem-specific prefixes are deprecated and should not be used in -new programs. - -There are several different types of keyrings in which encryption keys -may be placed, such as a session keyring, a user session keyring, or a -user keyring. Each key must be placed in a keyring that is "attached" -to all processes that might need to access files encrypted with it, in -the sense that request_key() will find the key. Generally, if only -processes belonging to a specific user need to access a given -encrypted directory and no session keyring has been installed, then -that directory's key should be placed in that user's user session -keyring or user keyring. Otherwise, a session keyring should be -installed if needed, and the key should be linked into that session -keyring, or in a keyring linked into that session keyring. - -Note: introducing the complex visibility semantics of keyrings here -was arguably a mistake --- especially given that by design, after any -process successfully opens an encrypted file (thereby setting up the -per-file key), possessing the keyring key is not actually required for -any process to read/write the file until its in-memory inode is -evicted. In the future there probably should be a way to provide keys -directly to the filesystem instead, which would make the intended -semantics clearer. - -Access semantics -================ - -With the key ------------- - -With the encryption key, encrypted regular files, directories, and -symlinks behave very similarly to their unencrypted counterparts --- -after all, the encryption is intended to be transparent. However, -astute users may notice some differences in behavior: - -- Unencrypted files, or files encrypted with a different encryption - policy (i.e. different key, modes, or flags), cannot be renamed or - linked into an encrypted directory; see `Encryption policy - enforcement`_. Attempts to do so will fail with EPERM. However, - encrypted files can be renamed within an encrypted directory, or - into an unencrypted directory. - -- Direct I/O is not supported on encrypted files. Attempts to use - direct I/O on such files will fall back to buffered I/O. - -- The fallocate operations FALLOC_FL_COLLAPSE_RANGE, - FALLOC_FL_INSERT_RANGE, and FALLOC_FL_ZERO_RANGE are not supported - on encrypted files and will fail with EOPNOTSUPP. - -- Online defragmentation of encrypted files is not supported. The - EXT4_IOC_MOVE_EXT and F2FS_IOC_MOVE_RANGE ioctls will fail with - EOPNOTSUPP. - -- The ext4 filesystem does not support data journaling with encrypted - regular files. It will fall back to ordered data mode instead. - -- DAX (Direct Access) is not supported on encrypted files. - -- The st_size of an encrypted symlink will not necessarily give the - length of the symlink target as required by POSIX. It will actually - give the length of the ciphertext, which will be slightly longer - than the plaintext due to NUL-padding and an extra 2-byte overhead. - -- The maximum length of an encrypted symlink is 2 bytes shorter than - the maximum length of an unencrypted symlink. For example, on an - EXT4 filesystem with a 4K block size, unencrypted symlinks can be up - to 4095 bytes long, while encrypted symlinks can only be up to 4093 - bytes long (both lengths excluding the terminating null). - -Note that mmap *is* supported. This is possible because the pagecache -for an encrypted file contains the plaintext, not the ciphertext. - -Without the key ---------------- - -Some filesystem operations may be performed on encrypted regular -files, directories, and symlinks even before their encryption key has -been provided: - -- File metadata may be read, e.g. using stat(). - -- Directories may be listed, in which case the filenames will be - listed in an encoded form derived from their ciphertext. The - current encoding algorithm is described in `Filename hashing and - encoding`_. The algorithm is subject to change, but it is - guaranteed that the presented filenames will be no longer than - NAME_MAX bytes, will not contain the ``/`` or ``\0`` characters, and - will uniquely identify directory entries. - - The ``.`` and ``..`` directory entries are special. They are always - present and are not encrypted or encoded. - -- Files may be deleted. That is, nondirectory files may be deleted - with unlink() as usual, and empty directories may be deleted with - rmdir() as usual. Therefore, ``rm`` and ``rm -r`` will work as - expected. - -- Symlink targets may be read and followed, but they will be presented - in encrypted form, similar to filenames in directories. Hence, they - are unlikely to point to anywhere useful. - -Without the key, regular files cannot be opened or truncated. -Attempts to do so will fail with ENOKEY. This implies that any -regular file operations that require a file descriptor, such as -read(), write(), mmap(), fallocate(), and ioctl(), are also forbidden. - -Also without the key, files of any type (including directories) cannot -be created or linked into an encrypted directory, nor can a name in an -encrypted directory be the source or target of a rename, nor can an -O_TMPFILE temporary file be created in an encrypted directory. All -such operations will fail with ENOKEY. - -It is not currently possible to backup and restore encrypted files -without the encryption key. This would require special APIs which -have not yet been implemented. - -Encryption policy enforcement -============================= - -After an encryption policy has been set on a directory, all regular -files, directories, and symbolic links created in that directory -(recursively) will inherit that encryption policy. Special files --- -that is, named pipes, device nodes, and UNIX domain sockets --- will -not be encrypted. - -Except for those special files, it is forbidden to have unencrypted -files, or files encrypted with a different encryption policy, in an -encrypted directory tree. Attempts to link or rename such a file into -an encrypted directory will fail with EPERM. This is also enforced -during ->lookup() to provide limited protection against offline -attacks that try to disable or downgrade encryption in known locations -where applications may later write sensitive data. It is recommended -that systems implementing a form of "verified boot" take advantage of -this by validating all top-level encryption policies prior to access. - -Implementation details -====================== - -Encryption context ------------------- - -An encryption policy is represented on-disk by a :c:type:`struct -fscrypt_context`. It is up to individual filesystems to decide where -to store it, but normally it would be stored in a hidden extended -attribute. It should *not* be exposed by the xattr-related system -calls such as getxattr() and setxattr() because of the special -semantics of the encryption xattr. (In particular, there would be -much confusion if an encryption policy were to be added to or removed -from anything other than an empty directory.) The struct is defined -as follows:: - - #define FS_KEY_DESCRIPTOR_SIZE 8 - #define FS_KEY_DERIVATION_NONCE_SIZE 16 - - struct fscrypt_context { - u8 format; - u8 contents_encryption_mode; - u8 filenames_encryption_mode; - u8 flags; - u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE]; - u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE]; - }; - -Note that :c:type:`struct fscrypt_context` contains the same -information as :c:type:`struct fscrypt_policy` (see `Setting an -encryption policy`_), except that :c:type:`struct fscrypt_context` -also contains a nonce. The nonce is randomly generated by the kernel -and is used to derive the inode's encryption key as described in -`Per-file keys`_. - -Data path changes ------------------ - -For the read path (->readpage()) of regular files, filesystems can -read the ciphertext into the page cache and decrypt it in-place. The -page lock must be held until decryption has finished, to prevent the -page from becoming visible to userspace prematurely. - -For the write path (->writepage()) of regular files, filesystems -cannot encrypt data in-place in the page cache, since the cached -plaintext must be preserved. Instead, filesystems must encrypt into a -temporary buffer or "bounce page", then write out the temporary -buffer. Some filesystems, such as UBIFS, already use temporary -buffers regardless of encryption. Other filesystems, such as ext4 and -F2FS, have to allocate bounce pages specially for encryption. - -Filename hashing and encoding ------------------------------ - -Modern filesystems accelerate directory lookups by using indexed -directories. An indexed directory is organized as a tree keyed by -filename hashes. When a ->lookup() is requested, the filesystem -normally hashes the filename being looked up so that it can quickly -find the corresponding directory entry, if any. - -With encryption, lookups must be supported and efficient both with and -without the encryption key. Clearly, it would not work to hash the -plaintext filenames, since the plaintext filenames are unavailable -without the key. (Hashing the plaintext filenames would also make it -impossible for the filesystem's fsck tool to optimize encrypted -directories.) Instead, filesystems hash the ciphertext filenames, -i.e. the bytes actually stored on-disk in the directory entries. When -asked to do a ->lookup() with the key, the filesystem just encrypts -the user-supplied name to get the ciphertext. - -Lookups without the key are more complicated. The raw ciphertext may -contain the ``\0`` and ``/`` characters, which are illegal in -filenames. Therefore, readdir() must base64-encode the ciphertext for -presentation. For most filenames, this works fine; on ->lookup(), the -filesystem just base64-decodes the user-supplied name to get back to -the raw ciphertext. - -However, for very long filenames, base64 encoding would cause the -filename length to exceed NAME_MAX. To prevent this, readdir() -actually presents long filenames in an abbreviated form which encodes -a strong "hash" of the ciphertext filename, along with the optional -filesystem-specific hash(es) needed for directory lookups. This -allows the filesystem to still, with a high degree of confidence, map -the filename given in ->lookup() back to a particular directory entry -that was previously listed by readdir(). See :c:type:`struct -fscrypt_digested_name` in the source for more details. - -Note that the precise way that filenames are presented to userspace -without the key is subject to change in the future. It is only meant -as a way to temporarily present valid filenames so that commands like -``rm -r`` work as expected on encrypted directories. diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 39c20ef26db41..79debfc9cef91 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -83,10 +83,6 @@ static inline bool fscrypt_valid_enc_modes(u32 contents_mode, filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS) return true; - if (contents_mode == FS_ENCRYPTION_MODE_SPECK128_256_XTS && - filenames_mode == FS_ENCRYPTION_MODE_SPECK128_256_CTS) - return true; - return false; } diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index e997ca51192f5..7874c9bb2fc53 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -174,16 +174,6 @@ static struct fscrypt_mode { .cipher_str = "cts(cbc(aes))", .keysize = 16, }, - [FS_ENCRYPTION_MODE_SPECK128_256_XTS] = { - .friendly_name = "Speck128/256-XTS", - .cipher_str = "xts(speck128)", - .keysize = 64, - }, - [FS_ENCRYPTION_MODE_SPECK128_256_CTS] = { - .friendly_name = "Speck128/256-CTS-CBC", - .cipher_str = "cts(cbc(speck128))", - .keysize = 32, - }, }; static struct fscrypt_mode * -- GitLab From 0e6d8d3812170328e0ea1e9b0026d09c18d322b9 Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Mon, 15 Oct 2018 14:47:10 -0700 Subject: [PATCH 1008/2269] Revert "FROMLIST: crypto: arm64/speck - add NEON-accelerated implementation of Speck-XTS" This reverts commit f595eebe1e8fdea685d4a86d45b21ac0419cf077. Bug: 116008047 Change-Id: Ic47509910c162a35f6fba10a196f4369299451ac Signed-off-by: Alistair Strachan --- arch/arm64/crypto/Kconfig | 6 - arch/arm64/crypto/Makefile | 3 - arch/arm64/crypto/speck-neon-core.S | 352 ---------------------------- arch/arm64/crypto/speck-neon-glue.c | 282 ---------------------- 4 files changed, 643 deletions(-) delete mode 100644 arch/arm64/crypto/speck-neon-core.S delete mode 100644 arch/arm64/crypto/speck-neon-glue.c diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index f856628e53fcc..70c517aa45016 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -95,10 +95,4 @@ config CRYPTO_AES_ARM64_BS select CRYPTO_AES_ARM64 select CRYPTO_SIMD -config CRYPTO_SPECK_NEON - tristate "NEON accelerated Speck cipher algorithms" - depends on KERNEL_MODE_NEON - select CRYPTO_BLKCIPHER - select CRYPTO_SPECK - endif diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index e761c0a7a181c..93dbf4889eb66 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -44,9 +44,6 @@ sha512-arm64-y := sha512-glue.o sha512-core.o obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o -obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o -speck-neon-y := speck-neon-core.o speck-neon-glue.o - obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o diff --git a/arch/arm64/crypto/speck-neon-core.S b/arch/arm64/crypto/speck-neon-core.S deleted file mode 100644 index b14463438b096..0000000000000 --- a/arch/arm64/crypto/speck-neon-core.S +++ /dev/null @@ -1,352 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ARM64 NEON-accelerated implementation of Speck128-XTS and Speck64-XTS - * - * Copyright (c) 2018 Google, Inc - * - * Author: Eric Biggers - */ - -#include - - .text - - // arguments - ROUND_KEYS .req x0 // const {u64,u32} *round_keys - NROUNDS .req w1 // int nrounds - NROUNDS_X .req x1 - DST .req x2 // void *dst - SRC .req x3 // const void *src - NBYTES .req w4 // unsigned int nbytes - TWEAK .req x5 // void *tweak - - // registers which hold the data being encrypted/decrypted - // (underscores avoid a naming collision with ARM64 registers x0-x3) - X_0 .req v0 - Y_0 .req v1 - X_1 .req v2 - Y_1 .req v3 - X_2 .req v4 - Y_2 .req v5 - X_3 .req v6 - Y_3 .req v7 - - // the round key, duplicated in all lanes - ROUND_KEY .req v8 - - // index vector for tbl-based 8-bit rotates - ROTATE_TABLE .req v9 - ROTATE_TABLE_Q .req q9 - - // temporary registers - TMP0 .req v10 - TMP1 .req v11 - TMP2 .req v12 - TMP3 .req v13 - - // multiplication table for updating XTS tweaks - GFMUL_TABLE .req v14 - GFMUL_TABLE_Q .req q14 - - // next XTS tweak value(s) - TWEAKV_NEXT .req v15 - - // XTS tweaks for the blocks currently being encrypted/decrypted - TWEAKV0 .req v16 - TWEAKV1 .req v17 - TWEAKV2 .req v18 - TWEAKV3 .req v19 - TWEAKV4 .req v20 - TWEAKV5 .req v21 - TWEAKV6 .req v22 - TWEAKV7 .req v23 - - .align 4 -.Lror64_8_table: - .octa 0x080f0e0d0c0b0a090007060504030201 -.Lror32_8_table: - .octa 0x0c0f0e0d080b0a090407060500030201 -.Lrol64_8_table: - .octa 0x0e0d0c0b0a09080f0605040302010007 -.Lrol32_8_table: - .octa 0x0e0d0c0f0a09080b0605040702010003 -.Lgf128mul_table: - .octa 0x00000000000000870000000000000001 -.Lgf64mul_table: - .octa 0x0000000000000000000000002d361b00 - -/* - * _speck_round_128bytes() - Speck encryption round on 128 bytes at a time - * - * Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for - * Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes - * of ROUND_KEY. 'n' is the lane size: 64 for Speck128, or 32 for Speck64. - * 'lanes' is the lane specifier: "2d" for Speck128 or "4s" for Speck64. - */ -.macro _speck_round_128bytes n, lanes - - // x = ror(x, 8) - tbl X_0.16b, {X_0.16b}, ROTATE_TABLE.16b - tbl X_1.16b, {X_1.16b}, ROTATE_TABLE.16b - tbl X_2.16b, {X_2.16b}, ROTATE_TABLE.16b - tbl X_3.16b, {X_3.16b}, ROTATE_TABLE.16b - - // x += y - add X_0.\lanes, X_0.\lanes, Y_0.\lanes - add X_1.\lanes, X_1.\lanes, Y_1.\lanes - add X_2.\lanes, X_2.\lanes, Y_2.\lanes - add X_3.\lanes, X_3.\lanes, Y_3.\lanes - - // x ^= k - eor X_0.16b, X_0.16b, ROUND_KEY.16b - eor X_1.16b, X_1.16b, ROUND_KEY.16b - eor X_2.16b, X_2.16b, ROUND_KEY.16b - eor X_3.16b, X_3.16b, ROUND_KEY.16b - - // y = rol(y, 3) - shl TMP0.\lanes, Y_0.\lanes, #3 - shl TMP1.\lanes, Y_1.\lanes, #3 - shl TMP2.\lanes, Y_2.\lanes, #3 - shl TMP3.\lanes, Y_3.\lanes, #3 - sri TMP0.\lanes, Y_0.\lanes, #(\n - 3) - sri TMP1.\lanes, Y_1.\lanes, #(\n - 3) - sri TMP2.\lanes, Y_2.\lanes, #(\n - 3) - sri TMP3.\lanes, Y_3.\lanes, #(\n - 3) - - // y ^= x - eor Y_0.16b, TMP0.16b, X_0.16b - eor Y_1.16b, TMP1.16b, X_1.16b - eor Y_2.16b, TMP2.16b, X_2.16b - eor Y_3.16b, TMP3.16b, X_3.16b -.endm - -/* - * _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time - * - * This is the inverse of _speck_round_128bytes(). - */ -.macro _speck_unround_128bytes n, lanes - - // y ^= x - eor TMP0.16b, Y_0.16b, X_0.16b - eor TMP1.16b, Y_1.16b, X_1.16b - eor TMP2.16b, Y_2.16b, X_2.16b - eor TMP3.16b, Y_3.16b, X_3.16b - - // y = ror(y, 3) - ushr Y_0.\lanes, TMP0.\lanes, #3 - ushr Y_1.\lanes, TMP1.\lanes, #3 - ushr Y_2.\lanes, TMP2.\lanes, #3 - ushr Y_3.\lanes, TMP3.\lanes, #3 - sli Y_0.\lanes, TMP0.\lanes, #(\n - 3) - sli Y_1.\lanes, TMP1.\lanes, #(\n - 3) - sli Y_2.\lanes, TMP2.\lanes, #(\n - 3) - sli Y_3.\lanes, TMP3.\lanes, #(\n - 3) - - // x ^= k - eor X_0.16b, X_0.16b, ROUND_KEY.16b - eor X_1.16b, X_1.16b, ROUND_KEY.16b - eor X_2.16b, X_2.16b, ROUND_KEY.16b - eor X_3.16b, X_3.16b, ROUND_KEY.16b - - // x -= y - sub X_0.\lanes, X_0.\lanes, Y_0.\lanes - sub X_1.\lanes, X_1.\lanes, Y_1.\lanes - sub X_2.\lanes, X_2.\lanes, Y_2.\lanes - sub X_3.\lanes, X_3.\lanes, Y_3.\lanes - - // x = rol(x, 8) - tbl X_0.16b, {X_0.16b}, ROTATE_TABLE.16b - tbl X_1.16b, {X_1.16b}, ROTATE_TABLE.16b - tbl X_2.16b, {X_2.16b}, ROTATE_TABLE.16b - tbl X_3.16b, {X_3.16b}, ROTATE_TABLE.16b -.endm - -.macro _next_xts_tweak next, cur, tmp, n -.if \n == 64 - /* - * Calculate the next tweak by multiplying the current one by x, - * modulo p(x) = x^128 + x^7 + x^2 + x + 1. - */ - sshr \tmp\().2d, \cur\().2d, #63 - and \tmp\().16b, \tmp\().16b, GFMUL_TABLE.16b - shl \next\().2d, \cur\().2d, #1 - ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 - eor \next\().16b, \next\().16b, \tmp\().16b -.else - /* - * Calculate the next two tweaks by multiplying the current ones by x^2, - * modulo p(x) = x^64 + x^4 + x^3 + x + 1. - */ - ushr \tmp\().2d, \cur\().2d, #62 - shl \next\().2d, \cur\().2d, #2 - tbl \tmp\().16b, {GFMUL_TABLE.16b}, \tmp\().16b - eor \next\().16b, \next\().16b, \tmp\().16b -.endif -.endm - -/* - * _speck_xts_crypt() - Speck-XTS encryption/decryption - * - * Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer - * using Speck-XTS, specifically the variant with a block size of '2n' and round - * count given by NROUNDS. The expanded round keys are given in ROUND_KEYS, and - * the current XTS tweak value is given in TWEAK. It's assumed that NBYTES is a - * nonzero multiple of 128. - */ -.macro _speck_xts_crypt n, lanes, decrypting - - /* - * If decrypting, modify the ROUND_KEYS parameter to point to the last - * round key rather than the first, since for decryption the round keys - * are used in reverse order. - */ -.if \decrypting - mov NROUNDS, NROUNDS /* zero the high 32 bits */ -.if \n == 64 - add ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #3 - sub ROUND_KEYS, ROUND_KEYS, #8 -.else - add ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #2 - sub ROUND_KEYS, ROUND_KEYS, #4 -.endif -.endif - - // Load the index vector for tbl-based 8-bit rotates -.if \decrypting - ldr ROTATE_TABLE_Q, .Lrol\n\()_8_table -.else - ldr ROTATE_TABLE_Q, .Lror\n\()_8_table -.endif - - // One-time XTS preparation -.if \n == 64 - // Load first tweak - ld1 {TWEAKV0.16b}, [TWEAK] - - // Load GF(2^128) multiplication table - ldr GFMUL_TABLE_Q, .Lgf128mul_table -.else - // Load first tweak - ld1 {TWEAKV0.8b}, [TWEAK] - - // Load GF(2^64) multiplication table - ldr GFMUL_TABLE_Q, .Lgf64mul_table - - // Calculate second tweak, packing it together with the first - ushr TMP0.2d, TWEAKV0.2d, #63 - shl TMP1.2d, TWEAKV0.2d, #1 - tbl TMP0.8b, {GFMUL_TABLE.16b}, TMP0.8b - eor TMP0.8b, TMP0.8b, TMP1.8b - mov TWEAKV0.d[1], TMP0.d[0] -.endif - -.Lnext_128bytes_\@: - - // Calculate XTS tweaks for next 128 bytes - _next_xts_tweak TWEAKV1, TWEAKV0, TMP0, \n - _next_xts_tweak TWEAKV2, TWEAKV1, TMP0, \n - _next_xts_tweak TWEAKV3, TWEAKV2, TMP0, \n - _next_xts_tweak TWEAKV4, TWEAKV3, TMP0, \n - _next_xts_tweak TWEAKV5, TWEAKV4, TMP0, \n - _next_xts_tweak TWEAKV6, TWEAKV5, TMP0, \n - _next_xts_tweak TWEAKV7, TWEAKV6, TMP0, \n - _next_xts_tweak TWEAKV_NEXT, TWEAKV7, TMP0, \n - - // Load the next source blocks into {X,Y}[0-3] - ld1 {X_0.16b-Y_1.16b}, [SRC], #64 - ld1 {X_2.16b-Y_3.16b}, [SRC], #64 - - // XOR the source blocks with their XTS tweaks - eor TMP0.16b, X_0.16b, TWEAKV0.16b - eor Y_0.16b, Y_0.16b, TWEAKV1.16b - eor TMP1.16b, X_1.16b, TWEAKV2.16b - eor Y_1.16b, Y_1.16b, TWEAKV3.16b - eor TMP2.16b, X_2.16b, TWEAKV4.16b - eor Y_2.16b, Y_2.16b, TWEAKV5.16b - eor TMP3.16b, X_3.16b, TWEAKV6.16b - eor Y_3.16b, Y_3.16b, TWEAKV7.16b - - /* - * De-interleave the 'x' and 'y' elements of each block, i.e. make it so - * that the X[0-3] registers contain only the second halves of blocks, - * and the Y[0-3] registers contain only the first halves of blocks. - * (Speck uses the order (y, x) rather than the more intuitive (x, y).) - */ - uzp2 X_0.\lanes, TMP0.\lanes, Y_0.\lanes - uzp1 Y_0.\lanes, TMP0.\lanes, Y_0.\lanes - uzp2 X_1.\lanes, TMP1.\lanes, Y_1.\lanes - uzp1 Y_1.\lanes, TMP1.\lanes, Y_1.\lanes - uzp2 X_2.\lanes, TMP2.\lanes, Y_2.\lanes - uzp1 Y_2.\lanes, TMP2.\lanes, Y_2.\lanes - uzp2 X_3.\lanes, TMP3.\lanes, Y_3.\lanes - uzp1 Y_3.\lanes, TMP3.\lanes, Y_3.\lanes - - // Do the cipher rounds - mov x6, ROUND_KEYS - mov w7, NROUNDS -.Lnext_round_\@: -.if \decrypting - ld1r {ROUND_KEY.\lanes}, [x6] - sub x6, x6, #( \n / 8 ) - _speck_unround_128bytes \n, \lanes -.else - ld1r {ROUND_KEY.\lanes}, [x6], #( \n / 8 ) - _speck_round_128bytes \n, \lanes -.endif - subs w7, w7, #1 - bne .Lnext_round_\@ - - // Re-interleave the 'x' and 'y' elements of each block - zip1 TMP0.\lanes, Y_0.\lanes, X_0.\lanes - zip2 Y_0.\lanes, Y_0.\lanes, X_0.\lanes - zip1 TMP1.\lanes, Y_1.\lanes, X_1.\lanes - zip2 Y_1.\lanes, Y_1.\lanes, X_1.\lanes - zip1 TMP2.\lanes, Y_2.\lanes, X_2.\lanes - zip2 Y_2.\lanes, Y_2.\lanes, X_2.\lanes - zip1 TMP3.\lanes, Y_3.\lanes, X_3.\lanes - zip2 Y_3.\lanes, Y_3.\lanes, X_3.\lanes - - // XOR the encrypted/decrypted blocks with the tweaks calculated earlier - eor X_0.16b, TMP0.16b, TWEAKV0.16b - eor Y_0.16b, Y_0.16b, TWEAKV1.16b - eor X_1.16b, TMP1.16b, TWEAKV2.16b - eor Y_1.16b, Y_1.16b, TWEAKV3.16b - eor X_2.16b, TMP2.16b, TWEAKV4.16b - eor Y_2.16b, Y_2.16b, TWEAKV5.16b - eor X_3.16b, TMP3.16b, TWEAKV6.16b - eor Y_3.16b, Y_3.16b, TWEAKV7.16b - mov TWEAKV0.16b, TWEAKV_NEXT.16b - - // Store the ciphertext in the destination buffer - st1 {X_0.16b-Y_1.16b}, [DST], #64 - st1 {X_2.16b-Y_3.16b}, [DST], #64 - - // Continue if there are more 128-byte chunks remaining - subs NBYTES, NBYTES, #128 - bne .Lnext_128bytes_\@ - - // Store the next tweak and return -.if \n == 64 - st1 {TWEAKV_NEXT.16b}, [TWEAK] -.else - st1 {TWEAKV_NEXT.8b}, [TWEAK] -.endif - ret -.endm - -ENTRY(speck128_xts_encrypt_neon) - _speck_xts_crypt n=64, lanes=2d, decrypting=0 -ENDPROC(speck128_xts_encrypt_neon) - -ENTRY(speck128_xts_decrypt_neon) - _speck_xts_crypt n=64, lanes=2d, decrypting=1 -ENDPROC(speck128_xts_decrypt_neon) - -ENTRY(speck64_xts_encrypt_neon) - _speck_xts_crypt n=32, lanes=4s, decrypting=0 -ENDPROC(speck64_xts_encrypt_neon) - -ENTRY(speck64_xts_decrypt_neon) - _speck_xts_crypt n=32, lanes=4s, decrypting=1 -ENDPROC(speck64_xts_decrypt_neon) diff --git a/arch/arm64/crypto/speck-neon-glue.c b/arch/arm64/crypto/speck-neon-glue.c deleted file mode 100644 index 6e233aeb4ff48..0000000000000 --- a/arch/arm64/crypto/speck-neon-glue.c +++ /dev/null @@ -1,282 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS - * (64-bit version; based on the 32-bit version) - * - * Copyright (c) 2018 Google, Inc - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* The assembly functions only handle multiples of 128 bytes */ -#define SPECK_NEON_CHUNK_SIZE 128 - -/* Speck128 */ - -struct speck128_xts_tfm_ctx { - struct speck128_tfm_ctx main_key; - struct speck128_tfm_ctx tweak_key; -}; - -asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds, - void *dst, const void *src, - unsigned int nbytes, void *tweak); - -asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds, - void *dst, const void *src, - unsigned int nbytes, void *tweak); - -typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *, - u8 *, const u8 *); -typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *, - const void *, unsigned int, void *); - -static __always_inline int -__speck128_xts_crypt(struct skcipher_request *req, - speck128_crypt_one_t crypt_one, - speck128_xts_crypt_many_t crypt_many) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - const struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - le128 tweak; - int err; - - err = skcipher_walk_virt(&walk, req, true); - - crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv); - - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; - u8 *dst = walk.dst.virt.addr; - const u8 *src = walk.src.virt.addr; - - if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) { - unsigned int count; - - count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE); - kernel_neon_begin(); - (*crypt_many)(ctx->main_key.round_keys, - ctx->main_key.nrounds, - dst, src, count, &tweak); - kernel_neon_end(); - dst += count; - src += count; - nbytes -= count; - } - - /* Handle any remainder with generic code */ - while (nbytes >= sizeof(tweak)) { - le128_xor((le128 *)dst, (const le128 *)src, &tweak); - (*crypt_one)(&ctx->main_key, dst, dst); - le128_xor((le128 *)dst, (const le128 *)dst, &tweak); - gf128mul_x_ble(&tweak, &tweak); - - dst += sizeof(tweak); - src += sizeof(tweak); - nbytes -= sizeof(tweak); - } - err = skcipher_walk_done(&walk, nbytes); - } - - return err; -} - -static int speck128_xts_encrypt(struct skcipher_request *req) -{ - return __speck128_xts_crypt(req, crypto_speck128_encrypt, - speck128_xts_encrypt_neon); -} - -static int speck128_xts_decrypt(struct skcipher_request *req) -{ - return __speck128_xts_crypt(req, crypto_speck128_decrypt, - speck128_xts_decrypt_neon); -} - -static int speck128_xts_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen) -{ - struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); - int err; - - err = xts_verify_key(tfm, key, keylen); - if (err) - return err; - - keylen /= 2; - - err = crypto_speck128_setkey(&ctx->main_key, key, keylen); - if (err) - return err; - - return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen); -} - -/* Speck64 */ - -struct speck64_xts_tfm_ctx { - struct speck64_tfm_ctx main_key; - struct speck64_tfm_ctx tweak_key; -}; - -asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds, - void *dst, const void *src, - unsigned int nbytes, void *tweak); - -asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds, - void *dst, const void *src, - unsigned int nbytes, void *tweak); - -typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *, - u8 *, const u8 *); -typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *, - const void *, unsigned int, void *); - -static __always_inline int -__speck64_xts_crypt(struct skcipher_request *req, speck64_crypt_one_t crypt_one, - speck64_xts_crypt_many_t crypt_many) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - const struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - __le64 tweak; - int err; - - err = skcipher_walk_virt(&walk, req, true); - - crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv); - - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; - u8 *dst = walk.dst.virt.addr; - const u8 *src = walk.src.virt.addr; - - if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) { - unsigned int count; - - count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE); - kernel_neon_begin(); - (*crypt_many)(ctx->main_key.round_keys, - ctx->main_key.nrounds, - dst, src, count, &tweak); - kernel_neon_end(); - dst += count; - src += count; - nbytes -= count; - } - - /* Handle any remainder with generic code */ - while (nbytes >= sizeof(tweak)) { - *(__le64 *)dst = *(__le64 *)src ^ tweak; - (*crypt_one)(&ctx->main_key, dst, dst); - *(__le64 *)dst ^= tweak; - tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^ - ((tweak & cpu_to_le64(1ULL << 63)) ? - 0x1B : 0)); - dst += sizeof(tweak); - src += sizeof(tweak); - nbytes -= sizeof(tweak); - } - err = skcipher_walk_done(&walk, nbytes); - } - - return err; -} - -static int speck64_xts_encrypt(struct skcipher_request *req) -{ - return __speck64_xts_crypt(req, crypto_speck64_encrypt, - speck64_xts_encrypt_neon); -} - -static int speck64_xts_decrypt(struct skcipher_request *req) -{ - return __speck64_xts_crypt(req, crypto_speck64_decrypt, - speck64_xts_decrypt_neon); -} - -static int speck64_xts_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen) -{ - struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); - int err; - - err = xts_verify_key(tfm, key, keylen); - if (err) - return err; - - keylen /= 2; - - err = crypto_speck64_setkey(&ctx->main_key, key, keylen); - if (err) - return err; - - return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen); -} - -static struct skcipher_alg speck_algs[] = { - { - .base.cra_name = "xts(speck128)", - .base.cra_driver_name = "xts-speck128-neon", - .base.cra_priority = 300, - .base.cra_blocksize = SPECK128_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct speck128_xts_tfm_ctx), - .base.cra_alignmask = 7, - .base.cra_module = THIS_MODULE, - .min_keysize = 2 * SPECK128_128_KEY_SIZE, - .max_keysize = 2 * SPECK128_256_KEY_SIZE, - .ivsize = SPECK128_BLOCK_SIZE, - .walksize = SPECK_NEON_CHUNK_SIZE, - .setkey = speck128_xts_setkey, - .encrypt = speck128_xts_encrypt, - .decrypt = speck128_xts_decrypt, - }, { - .base.cra_name = "xts(speck64)", - .base.cra_driver_name = "xts-speck64-neon", - .base.cra_priority = 300, - .base.cra_blocksize = SPECK64_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct speck64_xts_tfm_ctx), - .base.cra_alignmask = 7, - .base.cra_module = THIS_MODULE, - .min_keysize = 2 * SPECK64_96_KEY_SIZE, - .max_keysize = 2 * SPECK64_128_KEY_SIZE, - .ivsize = SPECK64_BLOCK_SIZE, - .walksize = SPECK_NEON_CHUNK_SIZE, - .setkey = speck64_xts_setkey, - .encrypt = speck64_xts_encrypt, - .decrypt = speck64_xts_decrypt, - } -}; - -static int __init speck_neon_module_init(void) -{ - if (!(elf_hwcap & HWCAP_ASIMD)) - return -ENODEV; - return crypto_register_skciphers(speck_algs, ARRAY_SIZE(speck_algs)); -} - -static void __exit speck_neon_module_exit(void) -{ - crypto_unregister_skciphers(speck_algs, ARRAY_SIZE(speck_algs)); -} - -module_init(speck_neon_module_init); -module_exit(speck_neon_module_exit); - -MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Eric Biggers "); -MODULE_ALIAS_CRYPTO("xts(speck128)"); -MODULE_ALIAS_CRYPTO("xts-speck128-neon"); -MODULE_ALIAS_CRYPTO("xts(speck64)"); -MODULE_ALIAS_CRYPTO("xts-speck64-neon"); -- GitLab From 436beae530ff53e0f060ae47d23f7d40bd4f3f4a Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Mon, 15 Oct 2018 14:48:44 -0700 Subject: [PATCH 1009/2269] Revert "FROMGIT: crypto: speck - add test vectors for Speck64-XTS" This reverts commit 921c88669c515e534d520df3e81b5405cd81bab7. Bug: 116008047 Change-Id: I96897223f5daced94e3d62ae817c2de2b59b417f Signed-off-by: Alistair Strachan --- crypto/testmgr.c | 9 - crypto/testmgr.h | 671 ----------------------------------------------- 2 files changed, 680 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index ec5d0035d819e..4fc1c990690b2 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -3611,15 +3611,6 @@ static const struct alg_test_desc alg_test_descs[] = { .dec = __VECS(speck128_xts_dec_tv_template) } } - }, { - .alg = "xts(speck64)", - .test = alg_test_skcipher, - .suite = { - .cipher = { - .enc = __VECS(speck64_xts_enc_tv_template), - .dec = __VECS(speck64_xts_dec_tv_template) - } - } }, { .alg = "xts(twofish)", .test = alg_test_skcipher, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 5d634ece1c071..f165574ef5587 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -14521,677 +14521,6 @@ static const struct cipher_testvec speck64_dec_tv_template[] = { }, }; -/* - * Speck64-XTS test vectors, taken from the AES-XTS test vectors with the result - * recomputed with Speck64 as the cipher, and key lengths adjusted - */ - -static const struct cipher_testvec speck64_xts_enc_tv_template[] = { - { - .key = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .klen = 24, - .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .ilen = 32, - .result = "\x84\xaf\x54\x07\x19\xd4\x7c\xa6" - "\xe4\xfe\xdf\xc4\x1f\x34\xc3\xc2" - "\x80\xf5\x72\xe7\xcd\xf0\x99\x22" - "\x35\xa7\x2f\x06\xef\xdc\x51\xaa", - .rlen = 32, - }, { - .key = "\x11\x11\x11\x11\x11\x11\x11\x11" - "\x11\x11\x11\x11\x11\x11\x11\x11" - "\x22\x22\x22\x22\x22\x22\x22\x22", - .klen = 24, - .iv = "\x33\x33\x33\x33\x33\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44", - .ilen = 32, - .result = "\x12\x56\x73\xcd\x15\x87\xa8\x59" - "\xcf\x84\xae\xd9\x1c\x66\xd6\x9f" - "\xb3\x12\x69\x7e\x36\xeb\x52\xff" - "\x62\xdd\xba\x90\xb3\xe1\xee\x99", - .rlen = 32, - }, { - .key = "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8" - "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0" - "\x22\x22\x22\x22\x22\x22\x22\x22", - .klen = 24, - .iv = "\x33\x33\x33\x33\x33\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44", - .ilen = 32, - .result = "\x15\x1b\xe4\x2c\xa2\x5a\x2d\x2c" - "\x27\x36\xc0\xbf\x5d\xea\x36\x37" - "\x2d\x1a\x88\xbc\x66\xb5\xd0\x0b" - "\xa1\xbc\x19\xb2\x0f\x3b\x75\x34", - .rlen = 32, - }, { - .key = "\x27\x18\x28\x18\x28\x45\x90\x45" - "\x23\x53\x60\x28\x74\x71\x35\x26" - "\x31\x41\x59\x26\x53\x58\x97\x93", - .klen = 24, - .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" - "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", - .ilen = 512, - .result = "\xaf\xa1\x81\xa6\x32\xbb\x15\x8e" - "\xf8\x95\x2e\xd3\xe6\xee\x7e\x09" - "\x0c\x1a\xf5\x02\x97\x8b\xe3\xb3" - "\x11\xc7\x39\x96\xd0\x95\xf4\x56" - "\xf4\xdd\x03\x38\x01\x44\x2c\xcf" - "\x88\xae\x8e\x3c\xcd\xe7\xaa\x66" - "\xfe\x3d\xc6\xfb\x01\x23\x51\x43" - "\xd5\xd2\x13\x86\x94\x34\xe9\x62" - "\xf9\x89\xe3\xd1\x7b\xbe\xf8\xef" - "\x76\x35\x04\x3f\xdb\x23\x9d\x0b" - "\x85\x42\xb9\x02\xd6\xcc\xdb\x96" - "\xa7\x6b\x27\xb6\xd4\x45\x8f\x7d" - "\xae\xd2\x04\xd5\xda\xc1\x7e\x24" - "\x8c\x73\xbe\x48\x7e\xcf\x65\x28" - "\x29\xe5\xbe\x54\x30\xcb\x46\x95" - "\x4f\x2e\x8a\x36\xc8\x27\xc5\xbe" - "\xd0\x1a\xaf\xab\x26\xcd\x9e\x69" - "\xa1\x09\x95\x71\x26\xe9\xc4\xdf" - "\xe6\x31\xc3\x46\xda\xaf\x0b\x41" - "\x1f\xab\xb1\x8e\xd6\xfc\x0b\xb3" - "\x82\xc0\x37\x27\xfc\x91\xa7\x05" - "\xfb\xc5\xdc\x2b\x74\x96\x48\x43" - "\x5d\x9c\x19\x0f\x60\x63\x3a\x1f" - "\x6f\xf0\x03\xbe\x4d\xfd\xc8\x4a" - "\xc6\xa4\x81\x6d\xc3\x12\x2a\x5c" - "\x07\xff\xf3\x72\x74\x48\xb5\x40" - "\x50\xb5\xdd\x90\x43\x31\x18\x15" - "\x7b\xf2\xa6\xdb\x83\xc8\x4b\x4a" - "\x29\x93\x90\x8b\xda\x07\xf0\x35" - "\x6d\x90\x88\x09\x4e\x83\xf5\x5b" - "\x94\x12\xbb\x33\x27\x1d\x3f\x23" - "\x51\xa8\x7c\x07\xa2\xae\x77\xa6" - "\x50\xfd\xcc\xc0\x4f\x80\x7a\x9f" - "\x66\xdd\xcd\x75\x24\x8b\x33\xf7" - "\x20\xdb\x83\x9b\x4f\x11\x63\x6e" - "\xcf\x37\xef\xc9\x11\x01\x5c\x45" - "\x32\x99\x7c\x3c\x9e\x42\x89\xe3" - "\x70\x6d\x15\x9f\xb1\xe6\xb6\x05" - "\xfe\x0c\xb9\x49\x2d\x90\x6d\xcc" - "\x5d\x3f\xc1\xfe\x89\x0a\x2e\x2d" - "\xa0\xa8\x89\x3b\x73\x39\xa5\x94" - "\x4c\xa4\xa6\xbb\xa7\x14\x46\x89" - "\x10\xff\xaf\xef\xca\xdd\x4f\x80" - "\xb3\xdf\x3b\xab\xd4\xe5\x5a\xc7" - "\x33\xca\x00\x8b\x8b\x3f\xea\xec" - "\x68\x8a\xc2\x6d\xfd\xd4\x67\x0f" - "\x22\x31\xe1\x0e\xfe\x5a\x04\xd5" - "\x64\xa3\xf1\x1a\x76\x28\xcc\x35" - "\x36\xa7\x0a\x74\xf7\x1c\x44\x9b" - "\xc7\x1b\x53\x17\x02\xea\xd1\xad" - "\x13\x51\x73\xc0\xa0\xb2\x05\x32" - "\xa8\xa2\x37\x2e\xe1\x7a\x3a\x19" - "\x26\xb4\x6c\x62\x5d\xb3\x1a\x1d" - "\x59\xda\xee\x1a\x22\x18\xda\x0d" - "\x88\x0f\x55\x8b\x72\x62\xfd\xc1" - "\x69\x13\xcd\x0d\x5f\xc1\x09\x52" - "\xee\xd6\xe3\x84\x4d\xee\xf6\x88" - "\xaf\x83\xdc\x76\xf4\xc0\x93\x3f" - "\x4a\x75\x2f\xb0\x0b\x3e\xc4\x54" - "\x7d\x69\x8d\x00\x62\x77\x0d\x14" - "\xbe\x7c\xa6\x7d\xc5\x24\x4f\xf3" - "\x50\xf7\x5f\xf4\xc2\xca\x41\x97" - "\x37\xbe\x75\x74\xcd\xf0\x75\x6e" - "\x25\x23\x94\xbd\xda\x8d\xb0\xd4", - .rlen = 512, - }, { - .key = "\x27\x18\x28\x18\x28\x45\x90\x45" - "\x23\x53\x60\x28\x74\x71\x35\x26" - "\x62\x49\x77\x57\x24\x70\x93\x69" - "\x99\x59\x57\x49\x66\x96\x76\x27", - .klen = 32, - .iv = "\xff\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" - "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", - .ilen = 512, - .result = "\x55\xed\x71\xd3\x02\x8e\x15\x3b" - "\xc6\x71\x29\x2d\x3e\x89\x9f\x59" - "\x68\x6a\xcc\x8a\x56\x97\xf3\x95" - "\x4e\x51\x08\xda\x2a\xf8\x6f\x3c" - "\x78\x16\xea\x80\xdb\x33\x75\x94" - "\xf9\x29\xc4\x2b\x76\x75\x97\xc7" - "\xf2\x98\x2c\xf9\xff\xc8\xd5\x2b" - "\x18\xf1\xaf\xcf\x7c\xc5\x0b\xee" - "\xad\x3c\x76\x7c\xe6\x27\xa2\x2a" - "\xe4\x66\xe1\xab\xa2\x39\xfc\x7c" - "\xf5\xec\x32\x74\xa3\xb8\x03\x88" - "\x52\xfc\x2e\x56\x3f\xa1\xf0\x9f" - "\x84\x5e\x46\xed\x20\x89\xb6\x44" - "\x8d\xd0\xed\x54\x47\x16\xbe\x95" - "\x8a\xb3\x6b\x72\xc4\x32\x52\x13" - "\x1b\xb0\x82\xbe\xac\xf9\x70\xa6" - "\x44\x18\xdd\x8c\x6e\xca\x6e\x45" - "\x8f\x1e\x10\x07\x57\x25\x98\x7b" - "\x17\x8c\x78\xdd\x80\xa7\xd9\xd8" - "\x63\xaf\xb9\x67\x57\xfd\xbc\xdb" - "\x44\xe9\xc5\x65\xd1\xc7\x3b\xff" - "\x20\xa0\x80\x1a\xc3\x9a\xad\x5e" - "\x5d\x3b\xd3\x07\xd9\xf5\xfd\x3d" - "\x4a\x8b\xa8\xd2\x6e\x7a\x51\x65" - "\x6c\x8e\x95\xe0\x45\xc9\x5f\x4a" - "\x09\x3c\x3d\x71\x7f\x0c\x84\x2a" - "\xc8\x48\x52\x1a\xc2\xd5\xd6\x78" - "\x92\x1e\xa0\x90\x2e\xea\xf0\xf3" - "\xdc\x0f\xb1\xaf\x0d\x9b\x06\x2e" - "\x35\x10\x30\x82\x0d\xe7\xc5\x9b" - "\xde\x44\x18\xbd\x9f\xd1\x45\xa9" - "\x7b\x7a\x4a\xad\x35\x65\x27\xca" - "\xb2\xc3\xd4\x9b\x71\x86\x70\xee" - "\xf1\x89\x3b\x85\x4b\x5b\xaa\xaf" - "\xfc\x42\xc8\x31\x59\xbe\x16\x60" - "\x4f\xf9\xfa\x12\xea\xd0\xa7\x14" - "\xf0\x7a\xf3\xd5\x8d\xbd\x81\xef" - "\x52\x7f\x29\x51\x94\x20\x67\x3c" - "\xd1\xaf\x77\x9f\x22\x5a\x4e\x63" - "\xe7\xff\x73\x25\xd1\xdd\x96\x8a" - "\x98\x52\x6d\xf3\xac\x3e\xf2\x18" - "\x6d\xf6\x0a\x29\xa6\x34\x3d\xed" - "\xe3\x27\x0d\x9d\x0a\x02\x44\x7e" - "\x5a\x7e\x67\x0f\x0a\x9e\xd6\xad" - "\x91\xe6\x4d\x81\x8c\x5c\x59\xaa" - "\xfb\xeb\x56\x53\xd2\x7d\x4c\x81" - "\x65\x53\x0f\x41\x11\xbd\x98\x99" - "\xf9\xc6\xfa\x51\x2e\xa3\xdd\x8d" - "\x84\x98\xf9\x34\xed\x33\x2a\x1f" - "\x82\xed\xc1\x73\x98\xd3\x02\xdc" - "\xe6\xc2\x33\x1d\xa2\xb4\xca\x76" - "\x63\x51\x34\x9d\x96\x12\xae\xce" - "\x83\xc9\x76\x5e\xa4\x1b\x53\x37" - "\x17\xd5\xc0\x80\x1d\x62\xf8\x3d" - "\x54\x27\x74\xbb\x10\x86\x57\x46" - "\x68\xe1\xed\x14\xe7\x9d\xfc\x84" - "\x47\xbc\xc2\xf8\x19\x4b\x99\xcf" - "\x7a\xe9\xc4\xb8\x8c\x82\x72\x4d" - "\x7b\x4f\x38\x55\x36\x71\x64\xc1" - "\xfc\x5c\x75\x52\x33\x02\x18\xf8" - "\x17\xe1\x2b\xc2\x43\x39\xbd\x76" - "\x9b\x63\x76\x32\x2f\x19\x72\x10" - "\x9f\x21\x0c\xf1\x66\x50\x7f\xa5" - "\x0d\x1f\x46\xe0\xba\xd3\x2f\x3c", - .rlen = 512, - .also_non_np = 1, - .np = 3, - .tap = { 512 - 20, 4, 16 }, - } -}; - -static const struct cipher_testvec speck64_xts_dec_tv_template[] = { - { - .key = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .klen = 24, - .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x84\xaf\x54\x07\x19\xd4\x7c\xa6" - "\xe4\xfe\xdf\xc4\x1f\x34\xc3\xc2" - "\x80\xf5\x72\xe7\xcd\xf0\x99\x22" - "\x35\xa7\x2f\x06\xef\xdc\x51\xaa", - .ilen = 32, - .result = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .rlen = 32, - }, { - .key = "\x11\x11\x11\x11\x11\x11\x11\x11" - "\x11\x11\x11\x11\x11\x11\x11\x11" - "\x22\x22\x22\x22\x22\x22\x22\x22", - .klen = 24, - .iv = "\x33\x33\x33\x33\x33\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x12\x56\x73\xcd\x15\x87\xa8\x59" - "\xcf\x84\xae\xd9\x1c\x66\xd6\x9f" - "\xb3\x12\x69\x7e\x36\xeb\x52\xff" - "\x62\xdd\xba\x90\xb3\xe1\xee\x99", - .ilen = 32, - .result = "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44", - .rlen = 32, - }, { - .key = "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8" - "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0" - "\x22\x22\x22\x22\x22\x22\x22\x22", - .klen = 24, - .iv = "\x33\x33\x33\x33\x33\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x15\x1b\xe4\x2c\xa2\x5a\x2d\x2c" - "\x27\x36\xc0\xbf\x5d\xea\x36\x37" - "\x2d\x1a\x88\xbc\x66\xb5\xd0\x0b" - "\xa1\xbc\x19\xb2\x0f\x3b\x75\x34", - .ilen = 32, - .result = "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44", - .rlen = 32, - }, { - .key = "\x27\x18\x28\x18\x28\x45\x90\x45" - "\x23\x53\x60\x28\x74\x71\x35\x26" - "\x31\x41\x59\x26\x53\x58\x97\x93", - .klen = 24, - .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\xaf\xa1\x81\xa6\x32\xbb\x15\x8e" - "\xf8\x95\x2e\xd3\xe6\xee\x7e\x09" - "\x0c\x1a\xf5\x02\x97\x8b\xe3\xb3" - "\x11\xc7\x39\x96\xd0\x95\xf4\x56" - "\xf4\xdd\x03\x38\x01\x44\x2c\xcf" - "\x88\xae\x8e\x3c\xcd\xe7\xaa\x66" - "\xfe\x3d\xc6\xfb\x01\x23\x51\x43" - "\xd5\xd2\x13\x86\x94\x34\xe9\x62" - "\xf9\x89\xe3\xd1\x7b\xbe\xf8\xef" - "\x76\x35\x04\x3f\xdb\x23\x9d\x0b" - "\x85\x42\xb9\x02\xd6\xcc\xdb\x96" - "\xa7\x6b\x27\xb6\xd4\x45\x8f\x7d" - "\xae\xd2\x04\xd5\xda\xc1\x7e\x24" - "\x8c\x73\xbe\x48\x7e\xcf\x65\x28" - "\x29\xe5\xbe\x54\x30\xcb\x46\x95" - "\x4f\x2e\x8a\x36\xc8\x27\xc5\xbe" - "\xd0\x1a\xaf\xab\x26\xcd\x9e\x69" - "\xa1\x09\x95\x71\x26\xe9\xc4\xdf" - "\xe6\x31\xc3\x46\xda\xaf\x0b\x41" - "\x1f\xab\xb1\x8e\xd6\xfc\x0b\xb3" - "\x82\xc0\x37\x27\xfc\x91\xa7\x05" - "\xfb\xc5\xdc\x2b\x74\x96\x48\x43" - "\x5d\x9c\x19\x0f\x60\x63\x3a\x1f" - "\x6f\xf0\x03\xbe\x4d\xfd\xc8\x4a" - "\xc6\xa4\x81\x6d\xc3\x12\x2a\x5c" - "\x07\xff\xf3\x72\x74\x48\xb5\x40" - "\x50\xb5\xdd\x90\x43\x31\x18\x15" - "\x7b\xf2\xa6\xdb\x83\xc8\x4b\x4a" - "\x29\x93\x90\x8b\xda\x07\xf0\x35" - "\x6d\x90\x88\x09\x4e\x83\xf5\x5b" - "\x94\x12\xbb\x33\x27\x1d\x3f\x23" - "\x51\xa8\x7c\x07\xa2\xae\x77\xa6" - "\x50\xfd\xcc\xc0\x4f\x80\x7a\x9f" - "\x66\xdd\xcd\x75\x24\x8b\x33\xf7" - "\x20\xdb\x83\x9b\x4f\x11\x63\x6e" - "\xcf\x37\xef\xc9\x11\x01\x5c\x45" - "\x32\x99\x7c\x3c\x9e\x42\x89\xe3" - "\x70\x6d\x15\x9f\xb1\xe6\xb6\x05" - "\xfe\x0c\xb9\x49\x2d\x90\x6d\xcc" - "\x5d\x3f\xc1\xfe\x89\x0a\x2e\x2d" - "\xa0\xa8\x89\x3b\x73\x39\xa5\x94" - "\x4c\xa4\xa6\xbb\xa7\x14\x46\x89" - "\x10\xff\xaf\xef\xca\xdd\x4f\x80" - "\xb3\xdf\x3b\xab\xd4\xe5\x5a\xc7" - "\x33\xca\x00\x8b\x8b\x3f\xea\xec" - "\x68\x8a\xc2\x6d\xfd\xd4\x67\x0f" - "\x22\x31\xe1\x0e\xfe\x5a\x04\xd5" - "\x64\xa3\xf1\x1a\x76\x28\xcc\x35" - "\x36\xa7\x0a\x74\xf7\x1c\x44\x9b" - "\xc7\x1b\x53\x17\x02\xea\xd1\xad" - "\x13\x51\x73\xc0\xa0\xb2\x05\x32" - "\xa8\xa2\x37\x2e\xe1\x7a\x3a\x19" - "\x26\xb4\x6c\x62\x5d\xb3\x1a\x1d" - "\x59\xda\xee\x1a\x22\x18\xda\x0d" - "\x88\x0f\x55\x8b\x72\x62\xfd\xc1" - "\x69\x13\xcd\x0d\x5f\xc1\x09\x52" - "\xee\xd6\xe3\x84\x4d\xee\xf6\x88" - "\xaf\x83\xdc\x76\xf4\xc0\x93\x3f" - "\x4a\x75\x2f\xb0\x0b\x3e\xc4\x54" - "\x7d\x69\x8d\x00\x62\x77\x0d\x14" - "\xbe\x7c\xa6\x7d\xc5\x24\x4f\xf3" - "\x50\xf7\x5f\xf4\xc2\xca\x41\x97" - "\x37\xbe\x75\x74\xcd\xf0\x75\x6e" - "\x25\x23\x94\xbd\xda\x8d\xb0\xd4", - .ilen = 512, - .result = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" - "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", - .rlen = 512, - }, { - .key = "\x27\x18\x28\x18\x28\x45\x90\x45" - "\x23\x53\x60\x28\x74\x71\x35\x26" - "\x62\x49\x77\x57\x24\x70\x93\x69" - "\x99\x59\x57\x49\x66\x96\x76\x27", - .klen = 32, - .iv = "\xff\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x55\xed\x71\xd3\x02\x8e\x15\x3b" - "\xc6\x71\x29\x2d\x3e\x89\x9f\x59" - "\x68\x6a\xcc\x8a\x56\x97\xf3\x95" - "\x4e\x51\x08\xda\x2a\xf8\x6f\x3c" - "\x78\x16\xea\x80\xdb\x33\x75\x94" - "\xf9\x29\xc4\x2b\x76\x75\x97\xc7" - "\xf2\x98\x2c\xf9\xff\xc8\xd5\x2b" - "\x18\xf1\xaf\xcf\x7c\xc5\x0b\xee" - "\xad\x3c\x76\x7c\xe6\x27\xa2\x2a" - "\xe4\x66\xe1\xab\xa2\x39\xfc\x7c" - "\xf5\xec\x32\x74\xa3\xb8\x03\x88" - "\x52\xfc\x2e\x56\x3f\xa1\xf0\x9f" - "\x84\x5e\x46\xed\x20\x89\xb6\x44" - "\x8d\xd0\xed\x54\x47\x16\xbe\x95" - "\x8a\xb3\x6b\x72\xc4\x32\x52\x13" - "\x1b\xb0\x82\xbe\xac\xf9\x70\xa6" - "\x44\x18\xdd\x8c\x6e\xca\x6e\x45" - "\x8f\x1e\x10\x07\x57\x25\x98\x7b" - "\x17\x8c\x78\xdd\x80\xa7\xd9\xd8" - "\x63\xaf\xb9\x67\x57\xfd\xbc\xdb" - "\x44\xe9\xc5\x65\xd1\xc7\x3b\xff" - "\x20\xa0\x80\x1a\xc3\x9a\xad\x5e" - "\x5d\x3b\xd3\x07\xd9\xf5\xfd\x3d" - "\x4a\x8b\xa8\xd2\x6e\x7a\x51\x65" - "\x6c\x8e\x95\xe0\x45\xc9\x5f\x4a" - "\x09\x3c\x3d\x71\x7f\x0c\x84\x2a" - "\xc8\x48\x52\x1a\xc2\xd5\xd6\x78" - "\x92\x1e\xa0\x90\x2e\xea\xf0\xf3" - "\xdc\x0f\xb1\xaf\x0d\x9b\x06\x2e" - "\x35\x10\x30\x82\x0d\xe7\xc5\x9b" - "\xde\x44\x18\xbd\x9f\xd1\x45\xa9" - "\x7b\x7a\x4a\xad\x35\x65\x27\xca" - "\xb2\xc3\xd4\x9b\x71\x86\x70\xee" - "\xf1\x89\x3b\x85\x4b\x5b\xaa\xaf" - "\xfc\x42\xc8\x31\x59\xbe\x16\x60" - "\x4f\xf9\xfa\x12\xea\xd0\xa7\x14" - "\xf0\x7a\xf3\xd5\x8d\xbd\x81\xef" - "\x52\x7f\x29\x51\x94\x20\x67\x3c" - "\xd1\xaf\x77\x9f\x22\x5a\x4e\x63" - "\xe7\xff\x73\x25\xd1\xdd\x96\x8a" - "\x98\x52\x6d\xf3\xac\x3e\xf2\x18" - "\x6d\xf6\x0a\x29\xa6\x34\x3d\xed" - "\xe3\x27\x0d\x9d\x0a\x02\x44\x7e" - "\x5a\x7e\x67\x0f\x0a\x9e\xd6\xad" - "\x91\xe6\x4d\x81\x8c\x5c\x59\xaa" - "\xfb\xeb\x56\x53\xd2\x7d\x4c\x81" - "\x65\x53\x0f\x41\x11\xbd\x98\x99" - "\xf9\xc6\xfa\x51\x2e\xa3\xdd\x8d" - "\x84\x98\xf9\x34\xed\x33\x2a\x1f" - "\x82\xed\xc1\x73\x98\xd3\x02\xdc" - "\xe6\xc2\x33\x1d\xa2\xb4\xca\x76" - "\x63\x51\x34\x9d\x96\x12\xae\xce" - "\x83\xc9\x76\x5e\xa4\x1b\x53\x37" - "\x17\xd5\xc0\x80\x1d\x62\xf8\x3d" - "\x54\x27\x74\xbb\x10\x86\x57\x46" - "\x68\xe1\xed\x14\xe7\x9d\xfc\x84" - "\x47\xbc\xc2\xf8\x19\x4b\x99\xcf" - "\x7a\xe9\xc4\xb8\x8c\x82\x72\x4d" - "\x7b\x4f\x38\x55\x36\x71\x64\xc1" - "\xfc\x5c\x75\x52\x33\x02\x18\xf8" - "\x17\xe1\x2b\xc2\x43\x39\xbd\x76" - "\x9b\x63\x76\x32\x2f\x19\x72\x10" - "\x9f\x21\x0c\xf1\x66\x50\x7f\xa5" - "\x0d\x1f\x46\xe0\xba\xd3\x2f\x3c", - .ilen = 512, - .result = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" - "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", - .rlen = 512, - .also_non_np = 1, - .np = 3, - .tap = { 512 - 20, 4, 16 }, - } -}; - /* Cast6 test vectors from RFC 2612 */ static const struct cipher_testvec cast6_enc_tv_template[] = { { -- GitLab From 12513b3c7ec6437891df476369650a4283664efc Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Mon, 15 Oct 2018 14:48:49 -0700 Subject: [PATCH 1010/2269] Revert "FROMGIT: crypto: speck - add test vectors for Speck128-XTS" This reverts commit dfd5e0277af3249994cbf23508727322ad293503. Bug: 116008047 Change-Id: Id68b333c66c89bc69e61bdd96d21fa6bc6dbf3b8 Signed-off-by: Alistair Strachan --- crypto/testmgr.c | 9 - crypto/testmgr.h | 687 ----------------------------------------------- 2 files changed, 696 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 4fc1c990690b2..5e3168a8aad21 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -3602,15 +3602,6 @@ static const struct alg_test_desc alg_test_descs[] = { .dec = __VECS(serpent_xts_dec_tv_template) } } - }, { - .alg = "xts(speck128)", - .test = alg_test_skcipher, - .suite = { - .cipher = { - .enc = __VECS(speck128_xts_enc_tv_template), - .dec = __VECS(speck128_xts_dec_tv_template) - } - } }, { .alg = "xts(twofish)", .test = alg_test_skcipher, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index f165574ef5587..1b6dfd1e522d3 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -13794,693 +13794,6 @@ static const struct cipher_testvec speck128_dec_tv_template[] = { }, }; -/* - * Speck128-XTS test vectors, taken from the AES-XTS test vectors with the - * result recomputed with Speck128 as the cipher - */ - -static const struct cipher_testvec speck128_xts_enc_tv_template[] = { - { - .key = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .klen = 32, - .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .ilen = 32, - .result = "\xbe\xa0\xe7\x03\xd7\xfe\xab\x62" - "\x3b\x99\x4a\x64\x74\x77\xac\xed" - "\xd8\xf4\xa6\xcf\xae\xb9\x07\x42" - "\x51\xd9\xb6\x1d\xe0\x5e\xbc\x54", - .rlen = 32, - }, { - .key = "\x11\x11\x11\x11\x11\x11\x11\x11" - "\x11\x11\x11\x11\x11\x11\x11\x11" - "\x22\x22\x22\x22\x22\x22\x22\x22" - "\x22\x22\x22\x22\x22\x22\x22\x22", - .klen = 32, - .iv = "\x33\x33\x33\x33\x33\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44", - .ilen = 32, - .result = "\xfb\x53\x81\x75\x6f\x9f\x34\xad" - "\x7e\x01\xed\x7b\xcc\xda\x4e\x4a" - "\xd4\x84\xa4\x53\xd5\x88\x73\x1b" - "\xfd\xcb\xae\x0d\xf3\x04\xee\xe6", - .rlen = 32, - }, { - .key = "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8" - "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0" - "\x22\x22\x22\x22\x22\x22\x22\x22" - "\x22\x22\x22\x22\x22\x22\x22\x22", - .klen = 32, - .iv = "\x33\x33\x33\x33\x33\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44", - .ilen = 32, - .result = "\x21\x52\x84\x15\xd1\xf7\x21\x55" - "\xd9\x75\x4a\xd3\xc5\xdb\x9f\x7d" - "\xda\x63\xb2\xf1\x82\xb0\x89\x59" - "\x86\xd4\xaa\xaa\xdd\xff\x4f\x92", - .rlen = 32, - }, { - .key = "\x27\x18\x28\x18\x28\x45\x90\x45" - "\x23\x53\x60\x28\x74\x71\x35\x26" - "\x31\x41\x59\x26\x53\x58\x97\x93" - "\x23\x84\x62\x64\x33\x83\x27\x95", - .klen = 32, - .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" - "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", - .ilen = 512, - .result = "\x57\xb5\xf8\x71\x6e\x6d\xdd\x82" - "\x53\xd0\xed\x2d\x30\xc1\x20\xef" - "\x70\x67\x5e\xff\x09\x70\xbb\xc1" - "\x3a\x7b\x48\x26\xd9\x0b\xf4\x48" - "\xbe\xce\xb1\xc7\xb2\x67\xc4\xa7" - "\x76\xf8\x36\x30\xb7\xb4\x9a\xd9" - "\xf5\x9d\xd0\x7b\xc1\x06\x96\x44" - "\x19\xc5\x58\x84\x63\xb9\x12\x68" - "\x68\xc7\xaa\x18\x98\xf2\x1f\x5c" - "\x39\xa6\xd8\x32\x2b\xc3\x51\xfd" - "\x74\x79\x2e\xb4\x44\xd7\x69\xc4" - "\xfc\x29\xe6\xed\x26\x1e\xa6\x9d" - "\x1c\xbe\x00\x0e\x7f\x3a\xca\xfb" - "\x6d\x13\x65\xa0\xf9\x31\x12\xe2" - "\x26\xd1\xec\x2b\x0a\x8b\x59\x99" - "\xa7\x49\xa0\x0e\x09\x33\x85\x50" - "\xc3\x23\xca\x7a\xdd\x13\x45\x5f" - "\xde\x4c\xa7\xcb\x00\x8a\x66\x6f" - "\xa2\xb6\xb1\x2e\xe1\xa0\x18\xf6" - "\xad\xf3\xbd\xeb\xc7\xef\x55\x4f" - "\x79\x91\x8d\x36\x13\x7b\xd0\x4a" - "\x6c\x39\xfb\x53\xb8\x6f\x02\x51" - "\xa5\x20\xac\x24\x1c\x73\x59\x73" - "\x58\x61\x3a\x87\x58\xb3\x20\x56" - "\x39\x06\x2b\x4d\xd3\x20\x2b\x89" - "\x3f\xa2\xf0\x96\xeb\x7f\xa4\xcd" - "\x11\xae\xbd\xcb\x3a\xb4\xd9\x91" - "\x09\x35\x71\x50\x65\xac\x92\xe3" - "\x7b\x32\xc0\x7a\xdd\xd4\xc3\x92" - "\x6f\xeb\x79\xde\x6f\xd3\x25\xc9" - "\xcd\x63\xf5\x1e\x7a\x3b\x26\x9d" - "\x77\x04\x80\xa9\xbf\x38\xb5\xbd" - "\xb8\x05\x07\xbd\xfd\xab\x7b\xf8" - "\x2a\x26\xcc\x49\x14\x6d\x55\x01" - "\x06\x94\xd8\xb2\x2d\x53\x83\x1b" - "\x8f\xd4\xdd\x57\x12\x7e\x18\xba" - "\x8e\xe2\x4d\x80\xef\x7e\x6b\x9d" - "\x24\xa9\x60\xa4\x97\x85\x86\x2a" - "\x01\x00\x09\xf1\xcb\x4a\x24\x1c" - "\xd8\xf6\xe6\x5b\xe7\x5d\xf2\xc4" - "\x97\x1c\x10\xc6\x4d\x66\x4f\x98" - "\x87\x30\xac\xd5\xea\x73\x49\x10" - "\x80\xea\xe5\x5f\x4d\x5f\x03\x33" - "\x66\x02\x35\x3d\x60\x06\x36\x4f" - "\x14\x1c\xd8\x07\x1f\x78\xd0\xf8" - "\x4f\x6c\x62\x7c\x15\xa5\x7c\x28" - "\x7c\xcc\xeb\x1f\xd1\x07\x90\x93" - "\x7e\xc2\xa8\x3a\x80\xc0\xf5\x30" - "\xcc\x75\xcf\x16\x26\xa9\x26\x3b" - "\xe7\x68\x2f\x15\x21\x5b\xe4\x00" - "\xbd\x48\x50\xcd\x75\x70\xc4\x62" - "\xbb\x41\xfb\x89\x4a\x88\x3b\x3b" - "\x51\x66\x02\x69\x04\x97\x36\xd4" - "\x75\xae\x0b\xa3\x42\xf8\xca\x79" - "\x8f\x93\xe9\xcc\x38\xbd\xd6\xd2" - "\xf9\x70\x4e\xc3\x6a\x8e\x25\xbd" - "\xea\x15\x5a\xa0\x85\x7e\x81\x0d" - "\x03\xe7\x05\x39\xf5\x05\x26\xee" - "\xec\xaa\x1f\x3d\xc9\x98\x76\x01" - "\x2c\xf4\xfc\xa3\x88\x77\x38\xc4" - "\x50\x65\x50\x6d\x04\x1f\xdf\x5a" - "\xaa\xf2\x01\xa9\xc1\x8d\xee\xca" - "\x47\x26\xef\x39\xb8\xb4\xf2\xd1" - "\xd6\xbb\x1b\x2a\xc1\x34\x14\xcf", - .rlen = 512, - }, { - .key = "\x27\x18\x28\x18\x28\x45\x90\x45" - "\x23\x53\x60\x28\x74\x71\x35\x26" - "\x62\x49\x77\x57\x24\x70\x93\x69" - "\x99\x59\x57\x49\x66\x96\x76\x27" - "\x31\x41\x59\x26\x53\x58\x97\x93" - "\x23\x84\x62\x64\x33\x83\x27\x95" - "\x02\x88\x41\x97\x16\x93\x99\x37" - "\x51\x05\x82\x09\x74\x94\x45\x92", - .klen = 64, - .iv = "\xff\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" - "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", - .ilen = 512, - .result = "\xc5\x85\x2a\x4b\x73\xe4\xf6\xf1" - "\x7e\xf9\xf6\xe9\xa3\x73\x36\xcb" - "\xaa\xb6\x22\xb0\x24\x6e\x3d\x73" - "\x92\x99\xde\xd3\x76\xed\xcd\x63" - "\x64\x3a\x22\x57\xc1\x43\x49\xd4" - "\x79\x36\x31\x19\x62\xae\x10\x7e" - "\x7d\xcf\x7a\xe2\x6b\xce\x27\xfa" - "\xdc\x3d\xd9\x83\xd3\x42\x4c\xe0" - "\x1b\xd6\x1d\x1a\x6f\xd2\x03\x00" - "\xfc\x81\x99\x8a\x14\x62\xf5\x7e" - "\x0d\xe7\x12\xe8\x17\x9d\x0b\xec" - "\xe2\xf7\xc9\xa7\x63\xd1\x79\xb6" - "\x62\x62\x37\xfe\x0a\x4c\x4a\x37" - "\x70\xc7\x5e\x96\x5f\xbc\x8e\x9e" - "\x85\x3c\x4f\x26\x64\x85\xbc\x68" - "\xb0\xe0\x86\x5e\x26\x41\xce\x11" - "\x50\xda\x97\x14\xe9\x9e\xc7\x6d" - "\x3b\xdc\x43\xde\x2b\x27\x69\x7d" - "\xfc\xb0\x28\xbd\x8f\xb1\xc6\x31" - "\x14\x4d\xf0\x74\x37\xfd\x07\x25" - "\x96\x55\xe5\xfc\x9e\x27\x2a\x74" - "\x1b\x83\x4d\x15\x83\xac\x57\xa0" - "\xac\xa5\xd0\x38\xef\x19\x56\x53" - "\x25\x4b\xfc\xce\x04\x23\xe5\x6b" - "\xf6\xc6\x6c\x32\x0b\xb3\x12\xc5" - "\xed\x22\x34\x1c\x5d\xed\x17\x06" - "\x36\xa3\xe6\x77\xb9\x97\x46\xb8" - "\xe9\x3f\x7e\xc7\xbc\x13\x5c\xdc" - "\x6e\x3f\x04\x5e\xd1\x59\xa5\x82" - "\x35\x91\x3d\x1b\xe4\x97\x9f\x92" - "\x1c\x5e\x5f\x6f\x41\xd4\x62\xa1" - "\x8d\x39\xfc\x42\xfb\x38\x80\xb9" - "\x0a\xe3\xcc\x6a\x93\xd9\x7a\xb1" - "\xe9\x69\xaf\x0a\x6b\x75\x38\xa7" - "\xa1\xbf\xf7\xda\x95\x93\x4b\x78" - "\x19\xf5\x94\xf9\xd2\x00\x33\x37" - "\xcf\xf5\x9e\x9c\xf3\xcc\xa6\xee" - "\x42\xb2\x9e\x2c\x5f\x48\x23\x26" - "\x15\x25\x17\x03\x3d\xfe\x2c\xfc" - "\xeb\xba\xda\xe0\x00\x05\xb6\xa6" - "\x07\xb3\xe8\x36\x5b\xec\x5b\xbf" - "\xd6\x5b\x00\x74\xc6\x97\xf1\x6a" - "\x49\xa1\xc3\xfa\x10\x52\xb9\x14" - "\xad\xb7\x73\xf8\x78\x12\xc8\x59" - "\x17\x80\x4c\x57\x39\xf1\x6d\x80" - "\x25\x77\x0f\x5e\x7d\xf0\xaf\x21" - "\xec\xce\xb7\xc8\x02\x8a\xed\x53" - "\x2c\x25\x68\x2e\x1f\x85\x5e\x67" - "\xd1\x07\x7a\x3a\x89\x08\xe0\x34" - "\xdc\xdb\x26\xb4\x6b\x77\xfc\x40" - "\x31\x15\x72\xa0\xf0\x73\xd9\x3b" - "\xd5\xdb\xfe\xfc\x8f\xa9\x44\xa2" - "\x09\x9f\xc6\x33\xe5\xe2\x88\xe8" - "\xf3\xf0\x1a\xf4\xce\x12\x0f\xd6" - "\xf7\x36\xe6\xa4\xf4\x7a\x10\x58" - "\xcc\x1f\x48\x49\x65\x47\x75\xe9" - "\x28\xe1\x65\x7b\xf2\xc4\xb5\x07" - "\xf2\xec\x76\xd8\x8f\x09\xf3\x16" - "\xa1\x51\x89\x3b\xeb\x96\x42\xac" - "\x65\xe0\x67\x63\x29\xdc\xb4\x7d" - "\xf2\x41\x51\x6a\xcb\xde\x3c\xfb" - "\x66\x8d\x13\xca\xe0\x59\x2a\x00" - "\xc9\x53\x4c\xe6\x9e\xe2\x73\xd5" - "\x67\x19\xb2\xbd\x9a\x63\xd7\x5c", - .rlen = 512, - .also_non_np = 1, - .np = 3, - .tap = { 512 - 20, 4, 16 }, - } -}; - -static const struct cipher_testvec speck128_xts_dec_tv_template[] = { - { - .key = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .klen = 32, - .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\xbe\xa0\xe7\x03\xd7\xfe\xab\x62" - "\x3b\x99\x4a\x64\x74\x77\xac\xed" - "\xd8\xf4\xa6\xcf\xae\xb9\x07\x42" - "\x51\xd9\xb6\x1d\xe0\x5e\xbc\x54", - .ilen = 32, - .result = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .rlen = 32, - }, { - .key = "\x11\x11\x11\x11\x11\x11\x11\x11" - "\x11\x11\x11\x11\x11\x11\x11\x11" - "\x22\x22\x22\x22\x22\x22\x22\x22" - "\x22\x22\x22\x22\x22\x22\x22\x22", - .klen = 32, - .iv = "\x33\x33\x33\x33\x33\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\xfb\x53\x81\x75\x6f\x9f\x34\xad" - "\x7e\x01\xed\x7b\xcc\xda\x4e\x4a" - "\xd4\x84\xa4\x53\xd5\x88\x73\x1b" - "\xfd\xcb\xae\x0d\xf3\x04\xee\xe6", - .ilen = 32, - .result = "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44", - .rlen = 32, - }, { - .key = "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8" - "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0" - "\x22\x22\x22\x22\x22\x22\x22\x22" - "\x22\x22\x22\x22\x22\x22\x22\x22", - .klen = 32, - .iv = "\x33\x33\x33\x33\x33\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x21\x52\x84\x15\xd1\xf7\x21\x55" - "\xd9\x75\x4a\xd3\xc5\xdb\x9f\x7d" - "\xda\x63\xb2\xf1\x82\xb0\x89\x59" - "\x86\xd4\xaa\xaa\xdd\xff\x4f\x92", - .ilen = 32, - .result = "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44", - .rlen = 32, - }, { - .key = "\x27\x18\x28\x18\x28\x45\x90\x45" - "\x23\x53\x60\x28\x74\x71\x35\x26" - "\x31\x41\x59\x26\x53\x58\x97\x93" - "\x23\x84\x62\x64\x33\x83\x27\x95", - .klen = 32, - .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x57\xb5\xf8\x71\x6e\x6d\xdd\x82" - "\x53\xd0\xed\x2d\x30\xc1\x20\xef" - "\x70\x67\x5e\xff\x09\x70\xbb\xc1" - "\x3a\x7b\x48\x26\xd9\x0b\xf4\x48" - "\xbe\xce\xb1\xc7\xb2\x67\xc4\xa7" - "\x76\xf8\x36\x30\xb7\xb4\x9a\xd9" - "\xf5\x9d\xd0\x7b\xc1\x06\x96\x44" - "\x19\xc5\x58\x84\x63\xb9\x12\x68" - "\x68\xc7\xaa\x18\x98\xf2\x1f\x5c" - "\x39\xa6\xd8\x32\x2b\xc3\x51\xfd" - "\x74\x79\x2e\xb4\x44\xd7\x69\xc4" - "\xfc\x29\xe6\xed\x26\x1e\xa6\x9d" - "\x1c\xbe\x00\x0e\x7f\x3a\xca\xfb" - "\x6d\x13\x65\xa0\xf9\x31\x12\xe2" - "\x26\xd1\xec\x2b\x0a\x8b\x59\x99" - "\xa7\x49\xa0\x0e\x09\x33\x85\x50" - "\xc3\x23\xca\x7a\xdd\x13\x45\x5f" - "\xde\x4c\xa7\xcb\x00\x8a\x66\x6f" - "\xa2\xb6\xb1\x2e\xe1\xa0\x18\xf6" - "\xad\xf3\xbd\xeb\xc7\xef\x55\x4f" - "\x79\x91\x8d\x36\x13\x7b\xd0\x4a" - "\x6c\x39\xfb\x53\xb8\x6f\x02\x51" - "\xa5\x20\xac\x24\x1c\x73\x59\x73" - "\x58\x61\x3a\x87\x58\xb3\x20\x56" - "\x39\x06\x2b\x4d\xd3\x20\x2b\x89" - "\x3f\xa2\xf0\x96\xeb\x7f\xa4\xcd" - "\x11\xae\xbd\xcb\x3a\xb4\xd9\x91" - "\x09\x35\x71\x50\x65\xac\x92\xe3" - "\x7b\x32\xc0\x7a\xdd\xd4\xc3\x92" - "\x6f\xeb\x79\xde\x6f\xd3\x25\xc9" - "\xcd\x63\xf5\x1e\x7a\x3b\x26\x9d" - "\x77\x04\x80\xa9\xbf\x38\xb5\xbd" - "\xb8\x05\x07\xbd\xfd\xab\x7b\xf8" - "\x2a\x26\xcc\x49\x14\x6d\x55\x01" - "\x06\x94\xd8\xb2\x2d\x53\x83\x1b" - "\x8f\xd4\xdd\x57\x12\x7e\x18\xba" - "\x8e\xe2\x4d\x80\xef\x7e\x6b\x9d" - "\x24\xa9\x60\xa4\x97\x85\x86\x2a" - "\x01\x00\x09\xf1\xcb\x4a\x24\x1c" - "\xd8\xf6\xe6\x5b\xe7\x5d\xf2\xc4" - "\x97\x1c\x10\xc6\x4d\x66\x4f\x98" - "\x87\x30\xac\xd5\xea\x73\x49\x10" - "\x80\xea\xe5\x5f\x4d\x5f\x03\x33" - "\x66\x02\x35\x3d\x60\x06\x36\x4f" - "\x14\x1c\xd8\x07\x1f\x78\xd0\xf8" - "\x4f\x6c\x62\x7c\x15\xa5\x7c\x28" - "\x7c\xcc\xeb\x1f\xd1\x07\x90\x93" - "\x7e\xc2\xa8\x3a\x80\xc0\xf5\x30" - "\xcc\x75\xcf\x16\x26\xa9\x26\x3b" - "\xe7\x68\x2f\x15\x21\x5b\xe4\x00" - "\xbd\x48\x50\xcd\x75\x70\xc4\x62" - "\xbb\x41\xfb\x89\x4a\x88\x3b\x3b" - "\x51\x66\x02\x69\x04\x97\x36\xd4" - "\x75\xae\x0b\xa3\x42\xf8\xca\x79" - "\x8f\x93\xe9\xcc\x38\xbd\xd6\xd2" - "\xf9\x70\x4e\xc3\x6a\x8e\x25\xbd" - "\xea\x15\x5a\xa0\x85\x7e\x81\x0d" - "\x03\xe7\x05\x39\xf5\x05\x26\xee" - "\xec\xaa\x1f\x3d\xc9\x98\x76\x01" - "\x2c\xf4\xfc\xa3\x88\x77\x38\xc4" - "\x50\x65\x50\x6d\x04\x1f\xdf\x5a" - "\xaa\xf2\x01\xa9\xc1\x8d\xee\xca" - "\x47\x26\xef\x39\xb8\xb4\xf2\xd1" - "\xd6\xbb\x1b\x2a\xc1\x34\x14\xcf", - .ilen = 512, - .result = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" - "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", - .rlen = 512, - }, { - .key = "\x27\x18\x28\x18\x28\x45\x90\x45" - "\x23\x53\x60\x28\x74\x71\x35\x26" - "\x62\x49\x77\x57\x24\x70\x93\x69" - "\x99\x59\x57\x49\x66\x96\x76\x27" - "\x31\x41\x59\x26\x53\x58\x97\x93" - "\x23\x84\x62\x64\x33\x83\x27\x95" - "\x02\x88\x41\x97\x16\x93\x99\x37" - "\x51\x05\x82\x09\x74\x94\x45\x92", - .klen = 64, - .iv = "\xff\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\xc5\x85\x2a\x4b\x73\xe4\xf6\xf1" - "\x7e\xf9\xf6\xe9\xa3\x73\x36\xcb" - "\xaa\xb6\x22\xb0\x24\x6e\x3d\x73" - "\x92\x99\xde\xd3\x76\xed\xcd\x63" - "\x64\x3a\x22\x57\xc1\x43\x49\xd4" - "\x79\x36\x31\x19\x62\xae\x10\x7e" - "\x7d\xcf\x7a\xe2\x6b\xce\x27\xfa" - "\xdc\x3d\xd9\x83\xd3\x42\x4c\xe0" - "\x1b\xd6\x1d\x1a\x6f\xd2\x03\x00" - "\xfc\x81\x99\x8a\x14\x62\xf5\x7e" - "\x0d\xe7\x12\xe8\x17\x9d\x0b\xec" - "\xe2\xf7\xc9\xa7\x63\xd1\x79\xb6" - "\x62\x62\x37\xfe\x0a\x4c\x4a\x37" - "\x70\xc7\x5e\x96\x5f\xbc\x8e\x9e" - "\x85\x3c\x4f\x26\x64\x85\xbc\x68" - "\xb0\xe0\x86\x5e\x26\x41\xce\x11" - "\x50\xda\x97\x14\xe9\x9e\xc7\x6d" - "\x3b\xdc\x43\xde\x2b\x27\x69\x7d" - "\xfc\xb0\x28\xbd\x8f\xb1\xc6\x31" - "\x14\x4d\xf0\x74\x37\xfd\x07\x25" - "\x96\x55\xe5\xfc\x9e\x27\x2a\x74" - "\x1b\x83\x4d\x15\x83\xac\x57\xa0" - "\xac\xa5\xd0\x38\xef\x19\x56\x53" - "\x25\x4b\xfc\xce\x04\x23\xe5\x6b" - "\xf6\xc6\x6c\x32\x0b\xb3\x12\xc5" - "\xed\x22\x34\x1c\x5d\xed\x17\x06" - "\x36\xa3\xe6\x77\xb9\x97\x46\xb8" - "\xe9\x3f\x7e\xc7\xbc\x13\x5c\xdc" - "\x6e\x3f\x04\x5e\xd1\x59\xa5\x82" - "\x35\x91\x3d\x1b\xe4\x97\x9f\x92" - "\x1c\x5e\x5f\x6f\x41\xd4\x62\xa1" - "\x8d\x39\xfc\x42\xfb\x38\x80\xb9" - "\x0a\xe3\xcc\x6a\x93\xd9\x7a\xb1" - "\xe9\x69\xaf\x0a\x6b\x75\x38\xa7" - "\xa1\xbf\xf7\xda\x95\x93\x4b\x78" - "\x19\xf5\x94\xf9\xd2\x00\x33\x37" - "\xcf\xf5\x9e\x9c\xf3\xcc\xa6\xee" - "\x42\xb2\x9e\x2c\x5f\x48\x23\x26" - "\x15\x25\x17\x03\x3d\xfe\x2c\xfc" - "\xeb\xba\xda\xe0\x00\x05\xb6\xa6" - "\x07\xb3\xe8\x36\x5b\xec\x5b\xbf" - "\xd6\x5b\x00\x74\xc6\x97\xf1\x6a" - "\x49\xa1\xc3\xfa\x10\x52\xb9\x14" - "\xad\xb7\x73\xf8\x78\x12\xc8\x59" - "\x17\x80\x4c\x57\x39\xf1\x6d\x80" - "\x25\x77\x0f\x5e\x7d\xf0\xaf\x21" - "\xec\xce\xb7\xc8\x02\x8a\xed\x53" - "\x2c\x25\x68\x2e\x1f\x85\x5e\x67" - "\xd1\x07\x7a\x3a\x89\x08\xe0\x34" - "\xdc\xdb\x26\xb4\x6b\x77\xfc\x40" - "\x31\x15\x72\xa0\xf0\x73\xd9\x3b" - "\xd5\xdb\xfe\xfc\x8f\xa9\x44\xa2" - "\x09\x9f\xc6\x33\xe5\xe2\x88\xe8" - "\xf3\xf0\x1a\xf4\xce\x12\x0f\xd6" - "\xf7\x36\xe6\xa4\xf4\x7a\x10\x58" - "\xcc\x1f\x48\x49\x65\x47\x75\xe9" - "\x28\xe1\x65\x7b\xf2\xc4\xb5\x07" - "\xf2\xec\x76\xd8\x8f\x09\xf3\x16" - "\xa1\x51\x89\x3b\xeb\x96\x42\xac" - "\x65\xe0\x67\x63\x29\xdc\xb4\x7d" - "\xf2\x41\x51\x6a\xcb\xde\x3c\xfb" - "\x66\x8d\x13\xca\xe0\x59\x2a\x00" - "\xc9\x53\x4c\xe6\x9e\xe2\x73\xd5" - "\x67\x19\xb2\xbd\x9a\x63\xd7\x5c", - .ilen = 512, - .result = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" - "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", - .rlen = 512, - .also_non_np = 1, - .np = 3, - .tap = { 512 - 20, 4, 16 }, - } -}; - static const struct cipher_testvec speck64_enc_tv_template[] = { { /* Speck64/96 */ .key = "\x00\x01\x02\x03\x08\x09\x0a\x0b" -- GitLab From b8bf62f9197c2b307d3fd2e334ca63228816427b Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Mon, 15 Oct 2018 14:48:57 -0700 Subject: [PATCH 1011/2269] Revert "FROMGIT: crypto: arm/speck - add NEON-accelerated implementation of Speck-XTS" This reverts commit 7931411663e85917eca8e26423fd28d3cacd1544. Bug: 116008047 Change-Id: I3c76a77dfae5894b46e39266f9f8d7c7294ab615 Signed-off-by: Alistair Strachan --- arch/arm/crypto/Kconfig | 6 - arch/arm/crypto/Makefile | 2 - arch/arm/crypto/speck-neon-core.S | 432 ------------------------------ arch/arm/crypto/speck-neon-glue.c | 288 -------------------- 4 files changed, 728 deletions(-) delete mode 100644 arch/arm/crypto/speck-neon-core.S delete mode 100644 arch/arm/crypto/speck-neon-glue.c diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 925d1364727a5..b8e69fe282b8d 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -121,10 +121,4 @@ config CRYPTO_CHACHA20_NEON select CRYPTO_BLKCIPHER select CRYPTO_CHACHA20 -config CRYPTO_SPECK_NEON - tristate "NEON accelerated Speck cipher algorithms" - depends on KERNEL_MODE_NEON - select CRYPTO_BLKCIPHER - select CRYPTO_SPECK - endif diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 3304e671918d6..c9919c2b7ad11 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -10,7 +10,6 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o -obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o @@ -54,7 +53,6 @@ ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o -speck-neon-y := speck-neon-core.o speck-neon-glue.o ifdef REGENERATE_ARM_CRYPTO quiet_cmd_perl = PERL $@ diff --git a/arch/arm/crypto/speck-neon-core.S b/arch/arm/crypto/speck-neon-core.S deleted file mode 100644 index 3c1e203e53b9c..0000000000000 --- a/arch/arm/crypto/speck-neon-core.S +++ /dev/null @@ -1,432 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS - * - * Copyright (c) 2018 Google, Inc - * - * Author: Eric Biggers - */ - -#include - - .text - .fpu neon - - // arguments - ROUND_KEYS .req r0 // const {u64,u32} *round_keys - NROUNDS .req r1 // int nrounds - DST .req r2 // void *dst - SRC .req r3 // const void *src - NBYTES .req r4 // unsigned int nbytes - TWEAK .req r5 // void *tweak - - // registers which hold the data being encrypted/decrypted - X0 .req q0 - X0_L .req d0 - X0_H .req d1 - Y0 .req q1 - Y0_H .req d3 - X1 .req q2 - X1_L .req d4 - X1_H .req d5 - Y1 .req q3 - Y1_H .req d7 - X2 .req q4 - X2_L .req d8 - X2_H .req d9 - Y2 .req q5 - Y2_H .req d11 - X3 .req q6 - X3_L .req d12 - X3_H .req d13 - Y3 .req q7 - Y3_H .req d15 - - // the round key, duplicated in all lanes - ROUND_KEY .req q8 - ROUND_KEY_L .req d16 - ROUND_KEY_H .req d17 - - // index vector for vtbl-based 8-bit rotates - ROTATE_TABLE .req d18 - - // multiplication table for updating XTS tweaks - GF128MUL_TABLE .req d19 - GF64MUL_TABLE .req d19 - - // current XTS tweak value(s) - TWEAKV .req q10 - TWEAKV_L .req d20 - TWEAKV_H .req d21 - - TMP0 .req q12 - TMP0_L .req d24 - TMP0_H .req d25 - TMP1 .req q13 - TMP2 .req q14 - TMP3 .req q15 - - .align 4 -.Lror64_8_table: - .byte 1, 2, 3, 4, 5, 6, 7, 0 -.Lror32_8_table: - .byte 1, 2, 3, 0, 5, 6, 7, 4 -.Lrol64_8_table: - .byte 7, 0, 1, 2, 3, 4, 5, 6 -.Lrol32_8_table: - .byte 3, 0, 1, 2, 7, 4, 5, 6 -.Lgf128mul_table: - .byte 0, 0x87 - .fill 14 -.Lgf64mul_table: - .byte 0, 0x1b, (0x1b << 1), (0x1b << 1) ^ 0x1b - .fill 12 - -/* - * _speck_round_128bytes() - Speck encryption round on 128 bytes at a time - * - * Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for - * Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes - * of ROUND_KEY. 'n' is the lane size: 64 for Speck128, or 32 for Speck64. - * - * The 8-bit rotates are implemented using vtbl instead of vshr + vsli because - * the vtbl approach is faster on some processors and the same speed on others. - */ -.macro _speck_round_128bytes n - - // x = ror(x, 8) - vtbl.8 X0_L, {X0_L}, ROTATE_TABLE - vtbl.8 X0_H, {X0_H}, ROTATE_TABLE - vtbl.8 X1_L, {X1_L}, ROTATE_TABLE - vtbl.8 X1_H, {X1_H}, ROTATE_TABLE - vtbl.8 X2_L, {X2_L}, ROTATE_TABLE - vtbl.8 X2_H, {X2_H}, ROTATE_TABLE - vtbl.8 X3_L, {X3_L}, ROTATE_TABLE - vtbl.8 X3_H, {X3_H}, ROTATE_TABLE - - // x += y - vadd.u\n X0, Y0 - vadd.u\n X1, Y1 - vadd.u\n X2, Y2 - vadd.u\n X3, Y3 - - // x ^= k - veor X0, ROUND_KEY - veor X1, ROUND_KEY - veor X2, ROUND_KEY - veor X3, ROUND_KEY - - // y = rol(y, 3) - vshl.u\n TMP0, Y0, #3 - vshl.u\n TMP1, Y1, #3 - vshl.u\n TMP2, Y2, #3 - vshl.u\n TMP3, Y3, #3 - vsri.u\n TMP0, Y0, #(\n - 3) - vsri.u\n TMP1, Y1, #(\n - 3) - vsri.u\n TMP2, Y2, #(\n - 3) - vsri.u\n TMP3, Y3, #(\n - 3) - - // y ^= x - veor Y0, TMP0, X0 - veor Y1, TMP1, X1 - veor Y2, TMP2, X2 - veor Y3, TMP3, X3 -.endm - -/* - * _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time - * - * This is the inverse of _speck_round_128bytes(). - */ -.macro _speck_unround_128bytes n - - // y ^= x - veor TMP0, Y0, X0 - veor TMP1, Y1, X1 - veor TMP2, Y2, X2 - veor TMP3, Y3, X3 - - // y = ror(y, 3) - vshr.u\n Y0, TMP0, #3 - vshr.u\n Y1, TMP1, #3 - vshr.u\n Y2, TMP2, #3 - vshr.u\n Y3, TMP3, #3 - vsli.u\n Y0, TMP0, #(\n - 3) - vsli.u\n Y1, TMP1, #(\n - 3) - vsli.u\n Y2, TMP2, #(\n - 3) - vsli.u\n Y3, TMP3, #(\n - 3) - - // x ^= k - veor X0, ROUND_KEY - veor X1, ROUND_KEY - veor X2, ROUND_KEY - veor X3, ROUND_KEY - - // x -= y - vsub.u\n X0, Y0 - vsub.u\n X1, Y1 - vsub.u\n X2, Y2 - vsub.u\n X3, Y3 - - // x = rol(x, 8); - vtbl.8 X0_L, {X0_L}, ROTATE_TABLE - vtbl.8 X0_H, {X0_H}, ROTATE_TABLE - vtbl.8 X1_L, {X1_L}, ROTATE_TABLE - vtbl.8 X1_H, {X1_H}, ROTATE_TABLE - vtbl.8 X2_L, {X2_L}, ROTATE_TABLE - vtbl.8 X2_H, {X2_H}, ROTATE_TABLE - vtbl.8 X3_L, {X3_L}, ROTATE_TABLE - vtbl.8 X3_H, {X3_H}, ROTATE_TABLE -.endm - -.macro _xts128_precrypt_one dst_reg, tweak_buf, tmp - - // Load the next source block - vld1.8 {\dst_reg}, [SRC]! - - // Save the current tweak in the tweak buffer - vst1.8 {TWEAKV}, [\tweak_buf:128]! - - // XOR the next source block with the current tweak - veor \dst_reg, TWEAKV - - /* - * Calculate the next tweak by multiplying the current one by x, - * modulo p(x) = x^128 + x^7 + x^2 + x + 1. - */ - vshr.u64 \tmp, TWEAKV, #63 - vshl.u64 TWEAKV, #1 - veor TWEAKV_H, \tmp\()_L - vtbl.8 \tmp\()_H, {GF128MUL_TABLE}, \tmp\()_H - veor TWEAKV_L, \tmp\()_H -.endm - -.macro _xts64_precrypt_two dst_reg, tweak_buf, tmp - - // Load the next two source blocks - vld1.8 {\dst_reg}, [SRC]! - - // Save the current two tweaks in the tweak buffer - vst1.8 {TWEAKV}, [\tweak_buf:128]! - - // XOR the next two source blocks with the current two tweaks - veor \dst_reg, TWEAKV - - /* - * Calculate the next two tweaks by multiplying the current ones by x^2, - * modulo p(x) = x^64 + x^4 + x^3 + x + 1. - */ - vshr.u64 \tmp, TWEAKV, #62 - vshl.u64 TWEAKV, #2 - vtbl.8 \tmp\()_L, {GF64MUL_TABLE}, \tmp\()_L - vtbl.8 \tmp\()_H, {GF64MUL_TABLE}, \tmp\()_H - veor TWEAKV, \tmp -.endm - -/* - * _speck_xts_crypt() - Speck-XTS encryption/decryption - * - * Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer - * using Speck-XTS, specifically the variant with a block size of '2n' and round - * count given by NROUNDS. The expanded round keys are given in ROUND_KEYS, and - * the current XTS tweak value is given in TWEAK. It's assumed that NBYTES is a - * nonzero multiple of 128. - */ -.macro _speck_xts_crypt n, decrypting - push {r4-r7} - mov r7, sp - - /* - * The first four parameters were passed in registers r0-r3. Load the - * additional parameters, which were passed on the stack. - */ - ldr NBYTES, [sp, #16] - ldr TWEAK, [sp, #20] - - /* - * If decrypting, modify the ROUND_KEYS parameter to point to the last - * round key rather than the first, since for decryption the round keys - * are used in reverse order. - */ -.if \decrypting -.if \n == 64 - add ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #3 - sub ROUND_KEYS, #8 -.else - add ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #2 - sub ROUND_KEYS, #4 -.endif -.endif - - // Load the index vector for vtbl-based 8-bit rotates -.if \decrypting - ldr r12, =.Lrol\n\()_8_table -.else - ldr r12, =.Lror\n\()_8_table -.endif - vld1.8 {ROTATE_TABLE}, [r12:64] - - // One-time XTS preparation - - /* - * Allocate stack space to store 128 bytes worth of tweaks. For - * performance, this space is aligned to a 16-byte boundary so that we - * can use the load/store instructions that declare 16-byte alignment. - */ - sub sp, #128 - bic sp, #0xf - -.if \n == 64 - // Load first tweak - vld1.8 {TWEAKV}, [TWEAK] - - // Load GF(2^128) multiplication table - ldr r12, =.Lgf128mul_table - vld1.8 {GF128MUL_TABLE}, [r12:64] -.else - // Load first tweak - vld1.8 {TWEAKV_L}, [TWEAK] - - // Load GF(2^64) multiplication table - ldr r12, =.Lgf64mul_table - vld1.8 {GF64MUL_TABLE}, [r12:64] - - // Calculate second tweak, packing it together with the first - vshr.u64 TMP0_L, TWEAKV_L, #63 - vtbl.u8 TMP0_L, {GF64MUL_TABLE}, TMP0_L - vshl.u64 TWEAKV_H, TWEAKV_L, #1 - veor TWEAKV_H, TMP0_L -.endif - -.Lnext_128bytes_\@: - - /* - * Load the source blocks into {X,Y}[0-3], XOR them with their XTS tweak - * values, and save the tweaks on the stack for later. Then - * de-interleave the 'x' and 'y' elements of each block, i.e. make it so - * that the X[0-3] registers contain only the second halves of blocks, - * and the Y[0-3] registers contain only the first halves of blocks. - * (Speck uses the order (y, x) rather than the more intuitive (x, y).) - */ - mov r12, sp -.if \n == 64 - _xts128_precrypt_one X0, r12, TMP0 - _xts128_precrypt_one Y0, r12, TMP0 - _xts128_precrypt_one X1, r12, TMP0 - _xts128_precrypt_one Y1, r12, TMP0 - _xts128_precrypt_one X2, r12, TMP0 - _xts128_precrypt_one Y2, r12, TMP0 - _xts128_precrypt_one X3, r12, TMP0 - _xts128_precrypt_one Y3, r12, TMP0 - vswp X0_L, Y0_H - vswp X1_L, Y1_H - vswp X2_L, Y2_H - vswp X3_L, Y3_H -.else - _xts64_precrypt_two X0, r12, TMP0 - _xts64_precrypt_two Y0, r12, TMP0 - _xts64_precrypt_two X1, r12, TMP0 - _xts64_precrypt_two Y1, r12, TMP0 - _xts64_precrypt_two X2, r12, TMP0 - _xts64_precrypt_two Y2, r12, TMP0 - _xts64_precrypt_two X3, r12, TMP0 - _xts64_precrypt_two Y3, r12, TMP0 - vuzp.32 Y0, X0 - vuzp.32 Y1, X1 - vuzp.32 Y2, X2 - vuzp.32 Y3, X3 -.endif - - // Do the cipher rounds - - mov r12, ROUND_KEYS - mov r6, NROUNDS - -.Lnext_round_\@: -.if \decrypting -.if \n == 64 - vld1.64 ROUND_KEY_L, [r12] - sub r12, #8 - vmov ROUND_KEY_H, ROUND_KEY_L -.else - vld1.32 {ROUND_KEY_L[],ROUND_KEY_H[]}, [r12] - sub r12, #4 -.endif - _speck_unround_128bytes \n -.else -.if \n == 64 - vld1.64 ROUND_KEY_L, [r12]! - vmov ROUND_KEY_H, ROUND_KEY_L -.else - vld1.32 {ROUND_KEY_L[],ROUND_KEY_H[]}, [r12]! -.endif - _speck_round_128bytes \n -.endif - subs r6, r6, #1 - bne .Lnext_round_\@ - - // Re-interleave the 'x' and 'y' elements of each block -.if \n == 64 - vswp X0_L, Y0_H - vswp X1_L, Y1_H - vswp X2_L, Y2_H - vswp X3_L, Y3_H -.else - vzip.32 Y0, X0 - vzip.32 Y1, X1 - vzip.32 Y2, X2 - vzip.32 Y3, X3 -.endif - - // XOR the encrypted/decrypted blocks with the tweaks we saved earlier - mov r12, sp - vld1.8 {TMP0, TMP1}, [r12:128]! - vld1.8 {TMP2, TMP3}, [r12:128]! - veor X0, TMP0 - veor Y0, TMP1 - veor X1, TMP2 - veor Y1, TMP3 - vld1.8 {TMP0, TMP1}, [r12:128]! - vld1.8 {TMP2, TMP3}, [r12:128]! - veor X2, TMP0 - veor Y2, TMP1 - veor X3, TMP2 - veor Y3, TMP3 - - // Store the ciphertext in the destination buffer - vst1.8 {X0, Y0}, [DST]! - vst1.8 {X1, Y1}, [DST]! - vst1.8 {X2, Y2}, [DST]! - vst1.8 {X3, Y3}, [DST]! - - // Continue if there are more 128-byte chunks remaining, else return - subs NBYTES, #128 - bne .Lnext_128bytes_\@ - - // Store the next tweak -.if \n == 64 - vst1.8 {TWEAKV}, [TWEAK] -.else - vst1.8 {TWEAKV_L}, [TWEAK] -.endif - - mov sp, r7 - pop {r4-r7} - bx lr -.endm - -ENTRY(speck128_xts_encrypt_neon) - _speck_xts_crypt n=64, decrypting=0 -ENDPROC(speck128_xts_encrypt_neon) - -ENTRY(speck128_xts_decrypt_neon) - _speck_xts_crypt n=64, decrypting=1 -ENDPROC(speck128_xts_decrypt_neon) - -ENTRY(speck64_xts_encrypt_neon) - _speck_xts_crypt n=32, decrypting=0 -ENDPROC(speck64_xts_encrypt_neon) - -ENTRY(speck64_xts_decrypt_neon) - _speck_xts_crypt n=32, decrypting=1 -ENDPROC(speck64_xts_decrypt_neon) diff --git a/arch/arm/crypto/speck-neon-glue.c b/arch/arm/crypto/speck-neon-glue.c deleted file mode 100644 index f012c3ea998fb..0000000000000 --- a/arch/arm/crypto/speck-neon-glue.c +++ /dev/null @@ -1,288 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS - * - * Copyright (c) 2018 Google, Inc - * - * Note: the NIST recommendation for XTS only specifies a 128-bit block size, - * but a 64-bit version (needed for Speck64) is fairly straightforward; the math - * is just done in GF(2^64) instead of GF(2^128), with the reducing polynomial - * x^64 + x^4 + x^3 + x + 1 from the original XEX paper (Rogaway, 2004: - * "Efficient Instantiations of Tweakable Blockciphers and Refinements to Modes - * OCB and PMAC"), represented as 0x1B. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* The assembly functions only handle multiples of 128 bytes */ -#define SPECK_NEON_CHUNK_SIZE 128 - -/* Speck128 */ - -struct speck128_xts_tfm_ctx { - struct speck128_tfm_ctx main_key; - struct speck128_tfm_ctx tweak_key; -}; - -asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds, - void *dst, const void *src, - unsigned int nbytes, void *tweak); - -asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds, - void *dst, const void *src, - unsigned int nbytes, void *tweak); - -typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *, - u8 *, const u8 *); -typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *, - const void *, unsigned int, void *); - -static __always_inline int -__speck128_xts_crypt(struct skcipher_request *req, - speck128_crypt_one_t crypt_one, - speck128_xts_crypt_many_t crypt_many) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - const struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - le128 tweak; - int err; - - err = skcipher_walk_virt(&walk, req, true); - - crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv); - - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; - u8 *dst = walk.dst.virt.addr; - const u8 *src = walk.src.virt.addr; - - if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) { - unsigned int count; - - count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE); - kernel_neon_begin(); - (*crypt_many)(ctx->main_key.round_keys, - ctx->main_key.nrounds, - dst, src, count, &tweak); - kernel_neon_end(); - dst += count; - src += count; - nbytes -= count; - } - - /* Handle any remainder with generic code */ - while (nbytes >= sizeof(tweak)) { - le128_xor((le128 *)dst, (const le128 *)src, &tweak); - (*crypt_one)(&ctx->main_key, dst, dst); - le128_xor((le128 *)dst, (const le128 *)dst, &tweak); - gf128mul_x_ble(&tweak, &tweak); - - dst += sizeof(tweak); - src += sizeof(tweak); - nbytes -= sizeof(tweak); - } - err = skcipher_walk_done(&walk, nbytes); - } - - return err; -} - -static int speck128_xts_encrypt(struct skcipher_request *req) -{ - return __speck128_xts_crypt(req, crypto_speck128_encrypt, - speck128_xts_encrypt_neon); -} - -static int speck128_xts_decrypt(struct skcipher_request *req) -{ - return __speck128_xts_crypt(req, crypto_speck128_decrypt, - speck128_xts_decrypt_neon); -} - -static int speck128_xts_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen) -{ - struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); - int err; - - err = xts_verify_key(tfm, key, keylen); - if (err) - return err; - - keylen /= 2; - - err = crypto_speck128_setkey(&ctx->main_key, key, keylen); - if (err) - return err; - - return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen); -} - -/* Speck64 */ - -struct speck64_xts_tfm_ctx { - struct speck64_tfm_ctx main_key; - struct speck64_tfm_ctx tweak_key; -}; - -asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds, - void *dst, const void *src, - unsigned int nbytes, void *tweak); - -asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds, - void *dst, const void *src, - unsigned int nbytes, void *tweak); - -typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *, - u8 *, const u8 *); -typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *, - const void *, unsigned int, void *); - -static __always_inline int -__speck64_xts_crypt(struct skcipher_request *req, speck64_crypt_one_t crypt_one, - speck64_xts_crypt_many_t crypt_many) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - const struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - __le64 tweak; - int err; - - err = skcipher_walk_virt(&walk, req, true); - - crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv); - - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; - u8 *dst = walk.dst.virt.addr; - const u8 *src = walk.src.virt.addr; - - if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) { - unsigned int count; - - count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE); - kernel_neon_begin(); - (*crypt_many)(ctx->main_key.round_keys, - ctx->main_key.nrounds, - dst, src, count, &tweak); - kernel_neon_end(); - dst += count; - src += count; - nbytes -= count; - } - - /* Handle any remainder with generic code */ - while (nbytes >= sizeof(tweak)) { - *(__le64 *)dst = *(__le64 *)src ^ tweak; - (*crypt_one)(&ctx->main_key, dst, dst); - *(__le64 *)dst ^= tweak; - tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^ - ((tweak & cpu_to_le64(1ULL << 63)) ? - 0x1B : 0)); - dst += sizeof(tweak); - src += sizeof(tweak); - nbytes -= sizeof(tweak); - } - err = skcipher_walk_done(&walk, nbytes); - } - - return err; -} - -static int speck64_xts_encrypt(struct skcipher_request *req) -{ - return __speck64_xts_crypt(req, crypto_speck64_encrypt, - speck64_xts_encrypt_neon); -} - -static int speck64_xts_decrypt(struct skcipher_request *req) -{ - return __speck64_xts_crypt(req, crypto_speck64_decrypt, - speck64_xts_decrypt_neon); -} - -static int speck64_xts_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen) -{ - struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); - int err; - - err = xts_verify_key(tfm, key, keylen); - if (err) - return err; - - keylen /= 2; - - err = crypto_speck64_setkey(&ctx->main_key, key, keylen); - if (err) - return err; - - return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen); -} - -static struct skcipher_alg speck_algs[] = { - { - .base.cra_name = "xts(speck128)", - .base.cra_driver_name = "xts-speck128-neon", - .base.cra_priority = 300, - .base.cra_blocksize = SPECK128_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct speck128_xts_tfm_ctx), - .base.cra_alignmask = 7, - .base.cra_module = THIS_MODULE, - .min_keysize = 2 * SPECK128_128_KEY_SIZE, - .max_keysize = 2 * SPECK128_256_KEY_SIZE, - .ivsize = SPECK128_BLOCK_SIZE, - .walksize = SPECK_NEON_CHUNK_SIZE, - .setkey = speck128_xts_setkey, - .encrypt = speck128_xts_encrypt, - .decrypt = speck128_xts_decrypt, - }, { - .base.cra_name = "xts(speck64)", - .base.cra_driver_name = "xts-speck64-neon", - .base.cra_priority = 300, - .base.cra_blocksize = SPECK64_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct speck64_xts_tfm_ctx), - .base.cra_alignmask = 7, - .base.cra_module = THIS_MODULE, - .min_keysize = 2 * SPECK64_96_KEY_SIZE, - .max_keysize = 2 * SPECK64_128_KEY_SIZE, - .ivsize = SPECK64_BLOCK_SIZE, - .walksize = SPECK_NEON_CHUNK_SIZE, - .setkey = speck64_xts_setkey, - .encrypt = speck64_xts_encrypt, - .decrypt = speck64_xts_decrypt, - } -}; - -static int __init speck_neon_module_init(void) -{ - if (!(elf_hwcap & HWCAP_NEON)) - return -ENODEV; - return crypto_register_skciphers(speck_algs, ARRAY_SIZE(speck_algs)); -} - -static void __exit speck_neon_module_exit(void) -{ - crypto_unregister_skciphers(speck_algs, ARRAY_SIZE(speck_algs)); -} - -module_init(speck_neon_module_init); -module_exit(speck_neon_module_exit); - -MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Eric Biggers "); -MODULE_ALIAS_CRYPTO("xts(speck128)"); -MODULE_ALIAS_CRYPTO("xts-speck128-neon"); -MODULE_ALIAS_CRYPTO("xts(speck64)"); -MODULE_ALIAS_CRYPTO("xts-speck64-neon"); -- GitLab From fef6a1c04a3fdf35cb54805033ed35c6e19fdb40 Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Mon, 15 Oct 2018 14:49:52 -0700 Subject: [PATCH 1012/2269] Revert "FROMGIT: crypto: speck - export common helpers" This reverts commit b456daecc73ca80d8e0e2c1afdf5b1fd2ed695b2. Bug: 116008047 Change-Id: I9d0a8357be1ab090a793646716771015299fb7fe Signed-off-by: Alistair Strachan --- crypto/speck.c | 90 +++++++++++++++++++----------------------- include/crypto/speck.h | 62 ----------------------------- 2 files changed, 41 insertions(+), 111 deletions(-) delete mode 100644 include/crypto/speck.h diff --git a/crypto/speck.c b/crypto/speck.c index 58aa9f7f91f79..4e80ad76bcd77 100644 --- a/crypto/speck.c +++ b/crypto/speck.c @@ -24,7 +24,6 @@ */ #include -#include #include #include #include @@ -32,6 +31,22 @@ /* Speck128 */ +#define SPECK128_BLOCK_SIZE 16 + +#define SPECK128_128_KEY_SIZE 16 +#define SPECK128_128_NROUNDS 32 + +#define SPECK128_192_KEY_SIZE 24 +#define SPECK128_192_NROUNDS 33 + +#define SPECK128_256_KEY_SIZE 32 +#define SPECK128_256_NROUNDS 34 + +struct speck128_tfm_ctx { + u64 round_keys[SPECK128_256_NROUNDS]; + int nrounds; +}; + static __always_inline void speck128_round(u64 *x, u64 *y, u64 k) { *x = ror64(*x, 8); @@ -50,9 +65,9 @@ static __always_inline void speck128_unround(u64 *x, u64 *y, u64 k) *x = rol64(*x, 8); } -void crypto_speck128_encrypt(const struct speck128_tfm_ctx *ctx, - u8 *out, const u8 *in) +static void speck128_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { + const struct speck128_tfm_ctx *ctx = crypto_tfm_ctx(tfm); u64 y = get_unaligned_le64(in); u64 x = get_unaligned_le64(in + 8); int i; @@ -63,16 +78,10 @@ void crypto_speck128_encrypt(const struct speck128_tfm_ctx *ctx, put_unaligned_le64(y, out); put_unaligned_le64(x, out + 8); } -EXPORT_SYMBOL_GPL(crypto_speck128_encrypt); - -static void speck128_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) -{ - crypto_speck128_encrypt(crypto_tfm_ctx(tfm), out, in); -} -void crypto_speck128_decrypt(const struct speck128_tfm_ctx *ctx, - u8 *out, const u8 *in) +static void speck128_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { + const struct speck128_tfm_ctx *ctx = crypto_tfm_ctx(tfm); u64 y = get_unaligned_le64(in); u64 x = get_unaligned_le64(in + 8); int i; @@ -83,16 +92,11 @@ void crypto_speck128_decrypt(const struct speck128_tfm_ctx *ctx, put_unaligned_le64(y, out); put_unaligned_le64(x, out + 8); } -EXPORT_SYMBOL_GPL(crypto_speck128_decrypt); - -static void speck128_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) -{ - crypto_speck128_decrypt(crypto_tfm_ctx(tfm), out, in); -} -int crypto_speck128_setkey(struct speck128_tfm_ctx *ctx, const u8 *key, +static int speck128_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { + struct speck128_tfm_ctx *ctx = crypto_tfm_ctx(tfm); u64 l[3]; u64 k; int i; @@ -134,16 +138,22 @@ int crypto_speck128_setkey(struct speck128_tfm_ctx *ctx, const u8 *key, return 0; } -EXPORT_SYMBOL_GPL(crypto_speck128_setkey); - -static int speck128_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - return crypto_speck128_setkey(crypto_tfm_ctx(tfm), key, keylen); -} /* Speck64 */ +#define SPECK64_BLOCK_SIZE 8 + +#define SPECK64_96_KEY_SIZE 12 +#define SPECK64_96_NROUNDS 26 + +#define SPECK64_128_KEY_SIZE 16 +#define SPECK64_128_NROUNDS 27 + +struct speck64_tfm_ctx { + u32 round_keys[SPECK64_128_NROUNDS]; + int nrounds; +}; + static __always_inline void speck64_round(u32 *x, u32 *y, u32 k) { *x = ror32(*x, 8); @@ -162,9 +172,9 @@ static __always_inline void speck64_unround(u32 *x, u32 *y, u32 k) *x = rol32(*x, 8); } -void crypto_speck64_encrypt(const struct speck64_tfm_ctx *ctx, - u8 *out, const u8 *in) +static void speck64_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { + const struct speck64_tfm_ctx *ctx = crypto_tfm_ctx(tfm); u32 y = get_unaligned_le32(in); u32 x = get_unaligned_le32(in + 4); int i; @@ -175,16 +185,10 @@ void crypto_speck64_encrypt(const struct speck64_tfm_ctx *ctx, put_unaligned_le32(y, out); put_unaligned_le32(x, out + 4); } -EXPORT_SYMBOL_GPL(crypto_speck64_encrypt); -static void speck64_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) -{ - crypto_speck64_encrypt(crypto_tfm_ctx(tfm), out, in); -} - -void crypto_speck64_decrypt(const struct speck64_tfm_ctx *ctx, - u8 *out, const u8 *in) +static void speck64_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { + const struct speck64_tfm_ctx *ctx = crypto_tfm_ctx(tfm); u32 y = get_unaligned_le32(in); u32 x = get_unaligned_le32(in + 4); int i; @@ -195,16 +199,11 @@ void crypto_speck64_decrypt(const struct speck64_tfm_ctx *ctx, put_unaligned_le32(y, out); put_unaligned_le32(x, out + 4); } -EXPORT_SYMBOL_GPL(crypto_speck64_decrypt); -static void speck64_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) -{ - crypto_speck64_decrypt(crypto_tfm_ctx(tfm), out, in); -} - -int crypto_speck64_setkey(struct speck64_tfm_ctx *ctx, const u8 *key, +static int speck64_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { + struct speck64_tfm_ctx *ctx = crypto_tfm_ctx(tfm); u32 l[3]; u32 k; int i; @@ -237,13 +236,6 @@ int crypto_speck64_setkey(struct speck64_tfm_ctx *ctx, const u8 *key, return 0; } -EXPORT_SYMBOL_GPL(crypto_speck64_setkey); - -static int speck64_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - return crypto_speck64_setkey(crypto_tfm_ctx(tfm), key, keylen); -} /* Algorithm definitions */ diff --git a/include/crypto/speck.h b/include/crypto/speck.h deleted file mode 100644 index 73cfc952d4055..0000000000000 --- a/include/crypto/speck.h +++ /dev/null @@ -1,62 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Common values for the Speck algorithm - */ - -#ifndef _CRYPTO_SPECK_H -#define _CRYPTO_SPECK_H - -#include - -/* Speck128 */ - -#define SPECK128_BLOCK_SIZE 16 - -#define SPECK128_128_KEY_SIZE 16 -#define SPECK128_128_NROUNDS 32 - -#define SPECK128_192_KEY_SIZE 24 -#define SPECK128_192_NROUNDS 33 - -#define SPECK128_256_KEY_SIZE 32 -#define SPECK128_256_NROUNDS 34 - -struct speck128_tfm_ctx { - u64 round_keys[SPECK128_256_NROUNDS]; - int nrounds; -}; - -void crypto_speck128_encrypt(const struct speck128_tfm_ctx *ctx, - u8 *out, const u8 *in); - -void crypto_speck128_decrypt(const struct speck128_tfm_ctx *ctx, - u8 *out, const u8 *in); - -int crypto_speck128_setkey(struct speck128_tfm_ctx *ctx, const u8 *key, - unsigned int keysize); - -/* Speck64 */ - -#define SPECK64_BLOCK_SIZE 8 - -#define SPECK64_96_KEY_SIZE 12 -#define SPECK64_96_NROUNDS 26 - -#define SPECK64_128_KEY_SIZE 16 -#define SPECK64_128_NROUNDS 27 - -struct speck64_tfm_ctx { - u32 round_keys[SPECK64_128_NROUNDS]; - int nrounds; -}; - -void crypto_speck64_encrypt(const struct speck64_tfm_ctx *ctx, - u8 *out, const u8 *in); - -void crypto_speck64_decrypt(const struct speck64_tfm_ctx *ctx, - u8 *out, const u8 *in); - -int crypto_speck64_setkey(struct speck64_tfm_ctx *ctx, const u8 *key, - unsigned int keysize); - -#endif /* _CRYPTO_SPECK_H */ -- GitLab From a3ac63b1887338cd9d60b9290648b8cac4102fe9 Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Mon, 15 Oct 2018 14:49:56 -0700 Subject: [PATCH 1013/2269] Revert "FROMGIT: crypto: speck - add support for the Speck block cipher" This reverts commit 1b5dd7104e24af451c311c6d1222a0d4ed5f05ba. Bug: 116008047 Change-Id: If9192b30cdb4212fb6c8111d70c532a109695fbd Signed-off-by: Alistair Strachan --- crypto/Kconfig | 14 --- crypto/Makefile | 1 - crypto/speck.c | 299 ----------------------------------------------- crypto/testmgr.c | 18 --- crypto/testmgr.h | 128 -------------------- 5 files changed, 460 deletions(-) delete mode 100644 crypto/speck.c diff --git a/crypto/Kconfig b/crypto/Kconfig index f7bc7e373a46d..e3ab31af413b4 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1468,20 +1468,6 @@ config CRYPTO_SERPENT_AVX2_X86_64 See also: -config CRYPTO_SPECK - tristate "Speck cipher algorithm" - select CRYPTO_ALGAPI - help - Speck is a lightweight block cipher that is tuned for optimal - performance in software (rather than hardware). - - Speck may not be as secure as AES, and should only be used on systems - where AES is not fast enough. - - See also: - - If unsure, say N. - config CRYPTO_TEA tristate "TEA, XTEA and XETA cipher algorithms" select CRYPTO_ALGAPI diff --git a/crypto/Makefile b/crypto/Makefile index 76d889a59d4cc..e24b837d786bc 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -109,7 +109,6 @@ obj-$(CONFIG_CRYPTO_TEA) += tea.o obj-$(CONFIG_CRYPTO_KHAZAD) += khazad.o obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o obj-$(CONFIG_CRYPTO_SEED) += seed.o -obj-$(CONFIG_CRYPTO_SPECK) += speck.o obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_generic.o obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o diff --git a/crypto/speck.c b/crypto/speck.c deleted file mode 100644 index 4e80ad76bcd77..0000000000000 --- a/crypto/speck.c +++ /dev/null @@ -1,299 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Speck: a lightweight block cipher - * - * Copyright (c) 2018 Google, Inc - * - * Speck has 10 variants, including 5 block sizes. For now we only implement - * the variants Speck128/128, Speck128/192, Speck128/256, Speck64/96, and - * Speck64/128. Speck${B}/${K} denotes the variant with a block size of B bits - * and a key size of K bits. The Speck128 variants are believed to be the most - * secure variants, and they use the same block size and key sizes as AES. The - * Speck64 variants are less secure, but on 32-bit processors are usually - * faster. The remaining variants (Speck32, Speck48, and Speck96) are even less - * secure and/or not as well suited for implementation on either 32-bit or - * 64-bit processors, so are omitted. - * - * Reference: "The Simon and Speck Families of Lightweight Block Ciphers" - * https://eprint.iacr.org/2013/404.pdf - * - * In a correspondence, the Speck designers have also clarified that the words - * should be interpreted in little-endian format, and the words should be - * ordered such that the first word of each block is 'y' rather than 'x', and - * the first key word (rather than the last) becomes the first round key. - */ - -#include -#include -#include -#include -#include - -/* Speck128 */ - -#define SPECK128_BLOCK_SIZE 16 - -#define SPECK128_128_KEY_SIZE 16 -#define SPECK128_128_NROUNDS 32 - -#define SPECK128_192_KEY_SIZE 24 -#define SPECK128_192_NROUNDS 33 - -#define SPECK128_256_KEY_SIZE 32 -#define SPECK128_256_NROUNDS 34 - -struct speck128_tfm_ctx { - u64 round_keys[SPECK128_256_NROUNDS]; - int nrounds; -}; - -static __always_inline void speck128_round(u64 *x, u64 *y, u64 k) -{ - *x = ror64(*x, 8); - *x += *y; - *x ^= k; - *y = rol64(*y, 3); - *y ^= *x; -} - -static __always_inline void speck128_unround(u64 *x, u64 *y, u64 k) -{ - *y ^= *x; - *y = ror64(*y, 3); - *x ^= k; - *x -= *y; - *x = rol64(*x, 8); -} - -static void speck128_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) -{ - const struct speck128_tfm_ctx *ctx = crypto_tfm_ctx(tfm); - u64 y = get_unaligned_le64(in); - u64 x = get_unaligned_le64(in + 8); - int i; - - for (i = 0; i < ctx->nrounds; i++) - speck128_round(&x, &y, ctx->round_keys[i]); - - put_unaligned_le64(y, out); - put_unaligned_le64(x, out + 8); -} - -static void speck128_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) -{ - const struct speck128_tfm_ctx *ctx = crypto_tfm_ctx(tfm); - u64 y = get_unaligned_le64(in); - u64 x = get_unaligned_le64(in + 8); - int i; - - for (i = ctx->nrounds - 1; i >= 0; i--) - speck128_unround(&x, &y, ctx->round_keys[i]); - - put_unaligned_le64(y, out); - put_unaligned_le64(x, out + 8); -} - -static int speck128_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct speck128_tfm_ctx *ctx = crypto_tfm_ctx(tfm); - u64 l[3]; - u64 k; - int i; - - switch (keylen) { - case SPECK128_128_KEY_SIZE: - k = get_unaligned_le64(key); - l[0] = get_unaligned_le64(key + 8); - ctx->nrounds = SPECK128_128_NROUNDS; - for (i = 0; i < ctx->nrounds; i++) { - ctx->round_keys[i] = k; - speck128_round(&l[0], &k, i); - } - break; - case SPECK128_192_KEY_SIZE: - k = get_unaligned_le64(key); - l[0] = get_unaligned_le64(key + 8); - l[1] = get_unaligned_le64(key + 16); - ctx->nrounds = SPECK128_192_NROUNDS; - for (i = 0; i < ctx->nrounds; i++) { - ctx->round_keys[i] = k; - speck128_round(&l[i % 2], &k, i); - } - break; - case SPECK128_256_KEY_SIZE: - k = get_unaligned_le64(key); - l[0] = get_unaligned_le64(key + 8); - l[1] = get_unaligned_le64(key + 16); - l[2] = get_unaligned_le64(key + 24); - ctx->nrounds = SPECK128_256_NROUNDS; - for (i = 0; i < ctx->nrounds; i++) { - ctx->round_keys[i] = k; - speck128_round(&l[i % 3], &k, i); - } - break; - default: - return -EINVAL; - } - - return 0; -} - -/* Speck64 */ - -#define SPECK64_BLOCK_SIZE 8 - -#define SPECK64_96_KEY_SIZE 12 -#define SPECK64_96_NROUNDS 26 - -#define SPECK64_128_KEY_SIZE 16 -#define SPECK64_128_NROUNDS 27 - -struct speck64_tfm_ctx { - u32 round_keys[SPECK64_128_NROUNDS]; - int nrounds; -}; - -static __always_inline void speck64_round(u32 *x, u32 *y, u32 k) -{ - *x = ror32(*x, 8); - *x += *y; - *x ^= k; - *y = rol32(*y, 3); - *y ^= *x; -} - -static __always_inline void speck64_unround(u32 *x, u32 *y, u32 k) -{ - *y ^= *x; - *y = ror32(*y, 3); - *x ^= k; - *x -= *y; - *x = rol32(*x, 8); -} - -static void speck64_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) -{ - const struct speck64_tfm_ctx *ctx = crypto_tfm_ctx(tfm); - u32 y = get_unaligned_le32(in); - u32 x = get_unaligned_le32(in + 4); - int i; - - for (i = 0; i < ctx->nrounds; i++) - speck64_round(&x, &y, ctx->round_keys[i]); - - put_unaligned_le32(y, out); - put_unaligned_le32(x, out + 4); -} - -static void speck64_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) -{ - const struct speck64_tfm_ctx *ctx = crypto_tfm_ctx(tfm); - u32 y = get_unaligned_le32(in); - u32 x = get_unaligned_le32(in + 4); - int i; - - for (i = ctx->nrounds - 1; i >= 0; i--) - speck64_unround(&x, &y, ctx->round_keys[i]); - - put_unaligned_le32(y, out); - put_unaligned_le32(x, out + 4); -} - -static int speck64_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct speck64_tfm_ctx *ctx = crypto_tfm_ctx(tfm); - u32 l[3]; - u32 k; - int i; - - switch (keylen) { - case SPECK64_96_KEY_SIZE: - k = get_unaligned_le32(key); - l[0] = get_unaligned_le32(key + 4); - l[1] = get_unaligned_le32(key + 8); - ctx->nrounds = SPECK64_96_NROUNDS; - for (i = 0; i < ctx->nrounds; i++) { - ctx->round_keys[i] = k; - speck64_round(&l[i % 2], &k, i); - } - break; - case SPECK64_128_KEY_SIZE: - k = get_unaligned_le32(key); - l[0] = get_unaligned_le32(key + 4); - l[1] = get_unaligned_le32(key + 8); - l[2] = get_unaligned_le32(key + 12); - ctx->nrounds = SPECK64_128_NROUNDS; - for (i = 0; i < ctx->nrounds; i++) { - ctx->round_keys[i] = k; - speck64_round(&l[i % 3], &k, i); - } - break; - default: - return -EINVAL; - } - - return 0; -} - -/* Algorithm definitions */ - -static struct crypto_alg speck_algs[] = { - { - .cra_name = "speck128", - .cra_driver_name = "speck128-generic", - .cra_priority = 100, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = SPECK128_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct speck128_tfm_ctx), - .cra_module = THIS_MODULE, - .cra_u = { - .cipher = { - .cia_min_keysize = SPECK128_128_KEY_SIZE, - .cia_max_keysize = SPECK128_256_KEY_SIZE, - .cia_setkey = speck128_setkey, - .cia_encrypt = speck128_encrypt, - .cia_decrypt = speck128_decrypt - } - } - }, { - .cra_name = "speck64", - .cra_driver_name = "speck64-generic", - .cra_priority = 100, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = SPECK64_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct speck64_tfm_ctx), - .cra_module = THIS_MODULE, - .cra_u = { - .cipher = { - .cia_min_keysize = SPECK64_96_KEY_SIZE, - .cia_max_keysize = SPECK64_128_KEY_SIZE, - .cia_setkey = speck64_setkey, - .cia_encrypt = speck64_encrypt, - .cia_decrypt = speck64_decrypt - } - } - } -}; - -static int __init speck_module_init(void) -{ - return crypto_register_algs(speck_algs, ARRAY_SIZE(speck_algs)); -} - -static void __exit speck_module_exit(void) -{ - crypto_unregister_algs(speck_algs, ARRAY_SIZE(speck_algs)); -} - -module_init(speck_module_init); -module_exit(speck_module_exit); - -MODULE_DESCRIPTION("Speck block cipher (generic)"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Eric Biggers "); -MODULE_ALIAS_CRYPTO("speck128"); -MODULE_ALIAS_CRYPTO("speck128-generic"); -MODULE_ALIAS_CRYPTO("speck64"); -MODULE_ALIAS_CRYPTO("speck64-generic"); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 5e3168a8aad21..8a124d3e91131 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -3033,24 +3033,6 @@ static const struct alg_test_desc alg_test_descs[] = { .dec = __VECS(serpent_dec_tv_template) } } - }, { - .alg = "ecb(speck128)", - .test = alg_test_skcipher, - .suite = { - .cipher = { - .enc = __VECS(speck128_enc_tv_template), - .dec = __VECS(speck128_dec_tv_template) - } - } - }, { - .alg = "ecb(speck64)", - .test = alg_test_skcipher, - .suite = { - .cipher = { - .enc = __VECS(speck64_enc_tv_template), - .dec = __VECS(speck64_dec_tv_template) - } - } }, { .alg = "ecb(tea)", .test = alg_test_skcipher, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 1b6dfd1e522d3..d39431877e81f 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -13706,134 +13706,6 @@ static const struct cipher_testvec serpent_xts_dec_tv_template[] = { }, }; -/* - * Speck test vectors taken from the original paper: - * "The Simon and Speck Families of Lightweight Block Ciphers" - * https://eprint.iacr.org/2013/404.pdf - * - * Note that the paper does not make byte and word order clear. But it was - * confirmed with the authors that the intended orders are little endian byte - * order and (y, x) word order. Equivalently, the printed test vectors, when - * looking at only the bytes (ignoring the whitespace that divides them into - * words), are backwards: the left-most byte is actually the one with the - * highest memory address, while the right-most byte is actually the one with - * the lowest memory address. - */ - -static const struct cipher_testvec speck128_enc_tv_template[] = { - { /* Speck128/128 */ - .key = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", - .klen = 16, - .input = "\x20\x6d\x61\x64\x65\x20\x69\x74" - "\x20\x65\x71\x75\x69\x76\x61\x6c", - .ilen = 16, - .result = "\x18\x0d\x57\x5c\xdf\xfe\x60\x78" - "\x65\x32\x78\x79\x51\x98\x5d\xa6", - .rlen = 16, - }, { /* Speck128/192 */ - .key = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17", - .klen = 24, - .input = "\x65\x6e\x74\x20\x74\x6f\x20\x43" - "\x68\x69\x65\x66\x20\x48\x61\x72", - .ilen = 16, - .result = "\x86\x18\x3c\xe0\x5d\x18\xbc\xf9" - "\x66\x55\x13\x13\x3a\xcf\xe4\x1b", - .rlen = 16, - }, { /* Speck128/256 */ - .key = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", - .klen = 32, - .input = "\x70\x6f\x6f\x6e\x65\x72\x2e\x20" - "\x49\x6e\x20\x74\x68\x6f\x73\x65", - .ilen = 16, - .result = "\x43\x8f\x18\x9c\x8d\xb4\xee\x4e" - "\x3e\xf5\xc0\x05\x04\x01\x09\x41", - .rlen = 16, - }, -}; - -static const struct cipher_testvec speck128_dec_tv_template[] = { - { /* Speck128/128 */ - .key = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", - .klen = 16, - .input = "\x18\x0d\x57\x5c\xdf\xfe\x60\x78" - "\x65\x32\x78\x79\x51\x98\x5d\xa6", - .ilen = 16, - .result = "\x20\x6d\x61\x64\x65\x20\x69\x74" - "\x20\x65\x71\x75\x69\x76\x61\x6c", - .rlen = 16, - }, { /* Speck128/192 */ - .key = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17", - .klen = 24, - .input = "\x86\x18\x3c\xe0\x5d\x18\xbc\xf9" - "\x66\x55\x13\x13\x3a\xcf\xe4\x1b", - .ilen = 16, - .result = "\x65\x6e\x74\x20\x74\x6f\x20\x43" - "\x68\x69\x65\x66\x20\x48\x61\x72", - .rlen = 16, - }, { /* Speck128/256 */ - .key = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", - .klen = 32, - .input = "\x43\x8f\x18\x9c\x8d\xb4\xee\x4e" - "\x3e\xf5\xc0\x05\x04\x01\x09\x41", - .ilen = 16, - .result = "\x70\x6f\x6f\x6e\x65\x72\x2e\x20" - "\x49\x6e\x20\x74\x68\x6f\x73\x65", - .rlen = 16, - }, -}; - -static const struct cipher_testvec speck64_enc_tv_template[] = { - { /* Speck64/96 */ - .key = "\x00\x01\x02\x03\x08\x09\x0a\x0b" - "\x10\x11\x12\x13", - .klen = 12, - .input = "\x65\x61\x6e\x73\x20\x46\x61\x74", - .ilen = 8, - .result = "\x6c\x94\x75\x41\xec\x52\x79\x9f", - .rlen = 8, - }, { /* Speck64/128 */ - .key = "\x00\x01\x02\x03\x08\x09\x0a\x0b" - "\x10\x11\x12\x13\x18\x19\x1a\x1b", - .klen = 16, - .input = "\x2d\x43\x75\x74\x74\x65\x72\x3b", - .ilen = 8, - .result = "\x8b\x02\x4e\x45\x48\xa5\x6f\x8c", - .rlen = 8, - }, -}; - -static const struct cipher_testvec speck64_dec_tv_template[] = { - { /* Speck64/96 */ - .key = "\x00\x01\x02\x03\x08\x09\x0a\x0b" - "\x10\x11\x12\x13", - .klen = 12, - .input = "\x6c\x94\x75\x41\xec\x52\x79\x9f", - .ilen = 8, - .result = "\x65\x61\x6e\x73\x20\x46\x61\x74", - .rlen = 8, - }, { /* Speck64/128 */ - .key = "\x00\x01\x02\x03\x08\x09\x0a\x0b" - "\x10\x11\x12\x13\x18\x19\x1a\x1b", - .klen = 16, - .input = "\x8b\x02\x4e\x45\x48\xa5\x6f\x8c", - .ilen = 8, - .result = "\x2d\x43\x75\x74\x74\x65\x72\x3b", - .rlen = 8, - }, -}; - /* Cast6 test vectors from RFC 2612 */ static const struct cipher_testvec cast6_enc_tv_template[] = { { -- GitLab From ff26b00b484bd75ac1988c1ffc0b710ccf25d7ac Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Tue, 23 Oct 2018 17:43:34 +0100 Subject: [PATCH 1014/2269] ANDROID: sched/fair: initialise util_est values to 0 on fork Since "sched/fair: Align PELT windows between cfs_rq and its se" the upstream kernel has initialised the whole content of sched_avg to zero on fork. When util_est was backported, we missed this and so ended up with util_est values copied from the parent task. Add the zero initialisation which is present upstream and ensure that util_est values always start from a known point. Fixes: 700f1172f7a7 ("BACKPORT: sched/fair: Add util_est on top of PELT") Reported-by: Puja Gupta Cc: Dietmar Eggemann Cc: Abhijeet Dharmapurikar Cc: Patrick Bellasi Cc: Todd Kjos Cc: Saravana Kannan Change-Id: I06995e4320d606a52761d0e773baf28fcd1e2680 Signed-off-by: Chris Redpath --- kernel/sched/fair.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 74ba97b51fa8c..d87193ef09125 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -736,8 +736,10 @@ void init_entity_runnable_average(struct sched_entity *se) { struct sched_avg *sa = &se->avg; - sa->last_update_time = 0; + memset(sa, 0, sizeof(*sa)); /* + * util_avg is initialized in post_init_entity_util_avg. + * util_est should start from zero. * sched_avg's period_contrib should be strictly less then 1024, so * we give it 1023 to make sure it is almost a period (1024us), and * will definitely be update (after enqueue). @@ -752,11 +754,6 @@ void init_entity_runnable_average(struct sched_entity *se) if (entity_is_task(se)) sa->load_avg = scale_load_down(se->load.weight); sa->load_sum = sa->load_avg * LOAD_AVG_MAX; - /* - * At this point, util_avg won't be used in select_task_rq_fair anyway - */ - sa->util_avg = 0; - sa->util_sum = 0; /* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */ } -- GitLab From 35a066ea5bf97bc12577bf4f8f3f5841ec07038f Mon Sep 17 00:00:00 2001 From: Evgenii Stepanov Date: Mon, 22 Oct 2018 17:48:13 -0700 Subject: [PATCH 1015/2269] Revert "ANDROID: Revert "arm: move ELF_ET_DYN_BASE to 4MB"" This reverts commit 040c05c6e4f823a86d49fe87a480d32b884d7bce. ASan can handle the new memory layout in Android P and later, which means this change can be reverted in 4.14. Bug: 67425063 Signed-off-by: Evgenii Stepanov Change-Id: I128de2797bca0bc17fe991e1cb48788ecf5f25de --- arch/arm/include/asm/elf.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index 51f0d2417fa99..8c5ca92a87a99 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -113,12 +113,8 @@ int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs); #define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE 4096 -/* This is the location that an ET_DYN program is loaded if exec'ed. Typical - use of this is to invoke "./ld.so someprog" to test out a new version of - the loader. We need to make sure that it is out of the way of the program - that it will "exec", and that there is sufficient room for the brk. */ - -#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) +/* This is the base location for PIE (ET_DYN with INTERP) loads. */ +#define ELF_ET_DYN_BASE 0x400000UL /* When the program starts, a1 contains a pointer to a function to be registered with atexit, as per the SVR4 ABI. A value of 0 means we -- GitLab From 4ed22187defde44f5c49a4e09e478867c84892e5 Mon Sep 17 00:00:00 2001 From: Evgenii Stepanov Date: Wed, 24 Oct 2018 13:37:42 -0700 Subject: [PATCH 1016/2269] Revert "ANDROID: Revert "arm64: move ELF_ET_DYN_BASE to 4GB / 4MB"" This reverts commit 031af365d5498e5d108463dabfe7f4bbf3814497. ASan can handle the new memory layout in Android P and later, which means this change can be reverted in 4.14. Bug: 67425063 Signed-off-by: Evgenii Stepanov Change-Id: I1b09210c521ce65bbdc1e9cdaca2b5e66e097789 --- arch/arm64/include/asm/elf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 36d9863cb3cb6..33be513ef24c8 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -173,7 +173,7 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, #ifdef CONFIG_COMPAT /* PIE load location for compat arm. Must match ARM ELF_ET_DYN_BASE. */ -#define COMPAT_ELF_ET_DYN_BASE (2 * TASK_SIZE_32 / 3) +#define COMPAT_ELF_ET_DYN_BASE 0x000400000UL /* AArch32 registers. */ #define COMPAT_ELF_NGREG 18 -- GitLab From e34e1361b682f96dd8e599f657d51733e9359af3 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Thu, 30 Aug 2018 21:33:31 +0800 Subject: [PATCH 1017/2269] f2fs: add additional sanity check in f2fs_acl_from_disk() Add additinal sanity check for irregular case(e.g. corruption). If size of extended attribution is smaller than size of acl header, then return -EINVAL. Signed-off-by: Chengguang Xu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 111824199a886..20caf341701d3 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -53,6 +53,9 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size) struct f2fs_acl_entry *entry = (struct f2fs_acl_entry *)(hdr + 1); const char *end = value + size; + if (size < sizeof(struct f2fs_acl_header)) + return ERR_PTR(-EINVAL); + if (hdr->a_version != cpu_to_le32(F2FS_ACL_VERSION)) return ERR_PTR(-EINVAL); -- GitLab From 85a15dccc53b444cf992684fd8dec0f87e21ba6b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 4 Sep 2018 03:52:17 +0800 Subject: [PATCH 1018/2269] f2fs: fix to avoid NULL pointer dereference on se->discard_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://bugzilla.kernel.org/show_bug.cgi?id=200951 These is a NULL pointer dereference issue reported in bugzilla: Hi, in the setup there is a SATA SSD connected to a SATA-to-USB bridge. The disc is "Samsung SSD 850 PRO 256G" which supports TRIM. There are four partitions: sda1: FAT /boot sda2: F2FS / sda3: F2FS /home sda4: F2FS The bridge is ASMT1153e which uses the "uas" driver. There is no TRIM pass-through, so, when mounting it reports: mounting with "discard" option, but the device does not support discard The USB host is USB3.0 and UASP capable. It is the one on RK3399. Given this everything works fine, except there is no TRIM support. In order to enable TRIM a new UDEV rule is added [1]: /etc/udev/rules.d/10-sata-bridge-trim.rules: ACTION=="add|change", ATTRS{idVendor}=="174c", ATTRS{idProduct}=="55aa", SUBSYSTEM=="scsi_disk", ATTR{provisioning_mode}="unmap" After reboot any F2FS write hangs forever and dmesg reports: Unable to handle kernel NULL pointer dereference Also tested on a x86_64 system: works fine even with TRIM enabled. same disc same bridge different usb host controller different cpu architecture not root filesystem Regards, Vicenç. [1] Post #5 in https://bbs.archlinux.org/viewtopic.php?id=236280 Unable to handle kernel NULL pointer dereference at virtual address 000000000000003e Mem abort info: ESR = 0x96000004 Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000004 CM = 0, WnR = 0 user pgtable: 4k pages, 48-bit VAs, pgdp = 00000000626e3122 [000000000000003e] pgd=0000000000000000 Internal error: Oops: 96000004 [#1] SMP Modules linked in: overlay snd_soc_hdmi_codec rc_cec dw_hdmi_i2s_audio dw_hdmi_cec snd_soc_simple_card snd_soc_simple_card_utils snd_soc_rockchip_i2s rockchip_rga snd_soc_rockchip_pcm rockchipdrm videobuf2_dma_sg v4l2_mem2mem rtc_rk808 videobuf2_memops analogix_dp videobuf2_v4l2 videobuf2_common dw_hdmi dw_wdt cec rc_core videodev drm_kms_helper media drm rockchip_thermal rockchip_saradc realtek drm_panel_orientation_quirks syscopyarea sysfillrect sysimgblt fb_sys_fops dwmac_rk stmmac_platform stmmac pwm_bl squashfs loop crypto_user gpio_keys hid_kensington CPU: 5 PID: 957 Comm: nvim Not tainted 4.19.0-rc1-1-ARCH #1 Hardware name: Sapphire-RK3399 Board (DT) pstate: 00000005 (nzcv daif -PAN -UAO) pc : update_sit_entry+0x304/0x4b0 lr : update_sit_entry+0x108/0x4b0 sp : ffff00000ca13bd0 x29: ffff00000ca13bd0 x28: 000000000000003e x27: 0000000000000020 x26: 0000000000080000 x25: 0000000000000048 x24: ffff8000ebb85cf8 x23: 0000000000000253 x22: 00000000ffffffff x21: 00000000000535f2 x20: 00000000ffffffdf x19: ffff8000eb9e6800 x18: ffff8000eb9e6be8 x17: 0000000007ce6926 x16: 000000001c83ffa8 x15: 0000000000000000 x14: ffff8000f602df90 x13: 0000000000000006 x12: 0000000000000040 x11: 0000000000000228 x10: 0000000000000000 x9 : 0000000000000000 x8 : 0000000000000000 x7 : 00000000000535f2 x6 : ffff8000ebff3440 x5 : ffff8000ebff3440 x4 : ffff8000ebe3a6c8 x3 : 00000000ffffffff x2 : 0000000000000020 x1 : 0000000000000000 x0 : ffff8000eb9e5800 Process nvim (pid: 957, stack limit = 0x0000000063a78320) Call trace: update_sit_entry+0x304/0x4b0 f2fs_invalidate_blocks+0x98/0x140 truncate_node+0x90/0x400 f2fs_remove_inode_page+0xe8/0x340 f2fs_evict_inode+0x2b0/0x408 evict+0xe0/0x1e0 iput+0x160/0x260 do_unlinkat+0x214/0x298 __arm64_sys_unlinkat+0x3c/0x68 el0_svc_handler+0x94/0x118 el0_svc+0x8/0xc Code: f9400800 b9488400 36080140 f9400f01 (387c4820) ---[ end trace a0f21a307118c477 ]--- The reason is it is possible to enable discard flag on block queue via UDEV, but during mount, f2fs will initialize se->discard_map only if this flag is set, once the flag is set after mount, f2fs may dereference NULL pointer on se->discard_map. So this patch does below changes to fix this issue: - initialize and update se->discard_map all the time. - don't clear DISCARD option if device has no QUEUE_FLAG_DISCARD flag during mount. - don't issue small discard on zoned block device. - introduce some functions to enhance the readability. Signed-off-by: Chao Yu Tested-by: Vicente Bergas Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 3 +-- fs/f2fs/f2fs.h | 15 ++++++++--- fs/f2fs/file.c | 2 +- fs/f2fs/segment.c | 65 ++++++++++++++++++++--------------------------- fs/f2fs/super.c | 16 +++--------- 5 files changed, 46 insertions(+), 55 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 214a968962a1d..ebe649d9793cb 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -190,8 +190,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry); si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi)); si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); - if (f2fs_discard_en(sbi)) - si->base_mem += SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); + si->base_mem += SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); si->base_mem += SIT_VBLOCK_MAP_SIZE; if (sbi->segs_per_sec > 1) si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 08f2389c52bee..dd89ff5a0c9bf 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3400,11 +3400,20 @@ static inline int get_blkz_type(struct f2fs_sb_info *sbi, } #endif -static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi) +static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi) { - struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev); + return f2fs_sb_has_blkzoned(sbi->sb); +} - return blk_queue_discard(q) || f2fs_sb_has_blkzoned(sbi->sb); +static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi) +{ + return blk_queue_discard(bdev_get_queue(sbi->sb->s_bdev)); +} + +static inline bool f2fs_realtime_discard_enable(struct f2fs_sb_info *sbi) +{ + return (test_opt(sbi, DISCARD) && f2fs_hw_support_discard(sbi)) || + f2fs_hw_should_discard(sbi); } static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1d46de58f9d4b..6222c881baed9 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1978,7 +1978,7 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!blk_queue_discard(q)) + if (!f2fs_hw_support_discard(F2FS_SB(sb))) return -EOPNOTSUPP; if (copy_from_user(&range, (struct fstrim_range __user *)arg, diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 30779aaa9dbae..c372ff755818f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1725,11 +1725,11 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, struct list_head *head = &SM_I(sbi)->dcc_info->entry_list; int i; - if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi)) + if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi)) return false; if (!force) { - if (!test_opt(sbi, DISCARD) || !se->valid_blocks || + if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks || SM_I(sbi)->dcc_info->nr_discards >= SM_I(sbi)->dcc_info->max_discards) return false; @@ -1835,7 +1835,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, dirty_i->nr_dirty[PRE]--; } - if (!test_opt(sbi, DISCARD)) + if (!f2fs_realtime_discard_enable(sbi)) continue; if (force && start >= cpc->trim_start && @@ -2025,8 +2025,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) del = 0; } - if (f2fs_discard_en(sbi) && - !f2fs_test_and_set_bit(offset, se->discard_map)) + if (!f2fs_test_and_set_bit(offset, se->discard_map)) sbi->discard_blks--; /* don't overwrite by SSR to keep node chain */ @@ -2054,8 +2053,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) del = 0; } - if (f2fs_discard_en(sbi) && - f2fs_test_and_clear_bit(offset, se->discard_map)) + if (f2fs_test_and_clear_bit(offset, se->discard_map)) sbi->discard_blks++; } if (!f2fs_test_bit(offset, se->ckpt_valid_map)) @@ -2671,7 +2669,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) * discard option. User configuration looks like using runtime discard * or periodic fstrim instead of it. */ - if (test_opt(sbi, DISCARD)) + if (f2fs_realtime_discard_enable(sbi)) goto out; start_block = START_BLOCK(sbi, start_segno); @@ -3762,13 +3760,11 @@ static int build_sit_info(struct f2fs_sb_info *sbi) return -ENOMEM; #endif - if (f2fs_discard_en(sbi)) { - sit_i->sentries[start].discard_map - = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, - GFP_KERNEL); - if (!sit_i->sentries[start].discard_map) - return -ENOMEM; - } + sit_i->sentries[start].discard_map + = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, + GFP_KERNEL); + if (!sit_i->sentries[start].discard_map) + return -ENOMEM; } sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); @@ -3916,18 +3912,16 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) total_node_blocks += se->valid_blocks; /* build discard map only one time */ - if (f2fs_discard_en(sbi)) { - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { - memset(se->discard_map, 0xff, - SIT_VBLOCK_MAP_SIZE); - } else { - memcpy(se->discard_map, - se->cur_valid_map, - SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += - sbi->blocks_per_seg - - se->valid_blocks; - } + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { + memset(se->discard_map, 0xff, + SIT_VBLOCK_MAP_SIZE); + } else { + memcpy(se->discard_map, + se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += + sbi->blocks_per_seg - + se->valid_blocks; } if (sbi->segs_per_sec > 1) @@ -3965,16 +3959,13 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) if (IS_NODESEG(se->type)) total_node_blocks += se->valid_blocks; - if (f2fs_discard_en(sbi)) { - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { - memset(se->discard_map, 0xff, - SIT_VBLOCK_MAP_SIZE); - } else { - memcpy(se->discard_map, se->cur_valid_map, - SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += old_valid_blocks; - sbi->discard_blks -= se->valid_blocks; - } + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { + memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE); + } else { + memcpy(se->discard_map, se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += old_valid_blocks; + sbi->discard_blks -= se->valid_blocks; } if (sbi->segs_per_sec > 1) { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1c8226dfd4f3f..ad01f64dfb677 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -360,7 +360,6 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) static int parse_options(struct super_block *sb, char *options) { struct f2fs_sb_info *sbi = F2FS_SB(sb); - struct request_queue *q; substring_t args[MAX_OPT_ARGS]; char *p, *name; int arg = 0; @@ -415,14 +414,7 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; break; case Opt_discard: - q = bdev_get_queue(sb->s_bdev); - if (blk_queue_discard(q)) { - set_opt(sbi, DISCARD); - } else if (!f2fs_sb_has_blkzoned(sb)) { - f2fs_msg(sb, KERN_WARNING, - "mounting with \"discard\" option, but " - "the device does not support discard"); - } + set_opt(sbi, DISCARD); break; case Opt_nodiscard: if (f2fs_sb_has_blkzoned(sb)) { @@ -1032,7 +1024,8 @@ static void f2fs_put_super(struct super_block *sb) /* be sure to wait for any on-going discard commands */ dropped = f2fs_wait_discard_bios(sbi); - if (f2fs_discard_en(sbi) && !sbi->discard_blks && !dropped) { + if ((f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) && + !sbi->discard_blks && !dropped) { struct cp_control cpc = { .reason = CP_UMOUNT | CP_TRIMMED, }; @@ -1399,8 +1392,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, NOHEAP); sbi->sb->s_flags |= MS_LAZYTIME; set_opt(sbi, FLUSH_MERGE); - if (blk_queue_discard(bdev_get_queue(sbi->sb->s_bdev))) - set_opt(sbi, DISCARD); + set_opt(sbi, DISCARD); if (f2fs_sb_has_blkzoned(sbi->sb)) set_opt_mode(sbi, F2FS_MOUNT_LFS); else -- GitLab From df5b29fb1f28db1da46298cb23df55df0cf70d03 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Fri, 31 Aug 2018 15:09:26 +0530 Subject: [PATCH 1019/2269] f2fs: fix unnecessary periodic wakeup of discard thread when dev is busy When dev is busy, discard thread wake up timeout can be aligned with the exact time that it needs to wait for dev to come out of busy. This helps to avoid unnecessary periodic wakeups and thus save some power. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c372ff755818f..f36a4b71595fc 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1564,6 +1564,8 @@ static int issue_discard_thread(void *data) struct discard_policy dpolicy; unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; int issued; + unsigned long interval = sbi->interval_time[REQ_TIME] * HZ; + long delta; set_freezable(); @@ -1600,7 +1602,11 @@ static int issue_discard_thread(void *data) __wait_all_discard_cmd(sbi, &dpolicy); wait_ms = dpolicy.min_interval; } else if (issued == -1){ - wait_ms = dpolicy.mid_interval; + delta = (sbi->last_time[REQ_TIME] + interval) - jiffies; + if (delta > 0) + wait_ms = jiffies_to_msecs(delta); + else + wait_ms = dpolicy.mid_interval; } else { wait_ms = dpolicy.max_interval; } -- GitLab From 07ba9407e8d0a276e72e9f7016d87d3673bb745a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 22 Aug 2018 17:17:47 +0800 Subject: [PATCH 1020/2269] Revert "f2fs: use printk_ratelimited for f2fs_msg" Don't limit printing log, so that we will not miss any key messages. This reverts commit a36c106dffb616250117efb1cab271c19a8f94ff. In addition, we use printk_ratelimited to avoid too many log prints. - error injection - discard submission failure Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 6 +++--- fs/f2fs/segment.c | 6 +++--- fs/f2fs/super.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index dd89ff5a0c9bf..3fe4ae0bb10fc 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1307,9 +1307,9 @@ struct f2fs_sb_info { }; #ifdef CONFIG_F2FS_FAULT_INJECTION -#define f2fs_show_injection_info(type) \ - printk("%sF2FS-fs : inject %s in %s of %pF\n", \ - KERN_INFO, f2fs_fault_name[type], \ +#define f2fs_show_injection_info(type) \ + printk_ratelimited("%sF2FS-fs : inject %s in %s of %pF\n", \ + KERN_INFO, f2fs_fault_name[type], \ __func__, __builtin_return_address(0)) static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f36a4b71595fc..34e05777941ed 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -905,9 +905,9 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, dc->error = 0; if (dc->error) - f2fs_msg(sbi->sb, KERN_INFO, - "Issue discard(%u, %u, %u) failed, ret: %d", - dc->lstart, dc->start, dc->len, dc->error); + printk_ratelimited( + "%sF2FS-fs: Issue discard(%u, %u, %u) failed, ret: %d", + KERN_INFO, dc->lstart, dc->start, dc->len, dc->error); __detach_discard_cmd(dcc, dc); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ad01f64dfb677..25b6082485a5d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -207,7 +207,7 @@ void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; - printk_ratelimited("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf); + printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf); va_end(args); } -- GitLab From c7dd0d17dced3117362ccd320180b747f478dbd5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 22 Aug 2018 21:18:00 -0700 Subject: [PATCH 1021/2269] f2fs: avoid wrong decrypted data from disk 1. Create a file in an encrypted directory 2. Do GC & drop caches 3. Read stale data before its bio for metapage was not issued yet Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 18 ++++++++++-------- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 3 +-- fs/f2fs/segment.c | 6 +++++- 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1a3414514cff5..0edc500ea089d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -565,9 +565,6 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, ctx->bio = bio; ctx->enabled_steps = post_read_steps; bio->bi_private = ctx; - - /* wait the page to be moved by cleaning */ - f2fs_wait_on_block_writeback(sbi, blkaddr); } return bio; @@ -582,6 +579,9 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, if (IS_ERR(bio)) return PTR_ERR(bio); + /* wait for GCed page writeback via META_MAPPING */ + f2fs_wait_on_block_writeback(inode, blkaddr); + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_put(bio); return -EFAULT; @@ -1558,6 +1558,12 @@ submit_and_realloc: } } + /* + * If the page is under writeback, we need to wait for + * its completion to see the correct decrypted data. + */ + f2fs_wait_on_block_writeback(inode, block_nr); + if (bio_add_page(bio, page, blocksize, 0) < blocksize) goto submit_and_realloc; @@ -1625,7 +1631,7 @@ static int encrypt_one_page(struct f2fs_io_info *fio) return 0; /* wait for GCed page writeback via META_MAPPING */ - f2fs_wait_on_block_writeback(fio->sbi, fio->old_blkaddr); + f2fs_wait_on_block_writeback(inode, fio->old_blkaddr); retry_encrypt: fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page, @@ -2382,10 +2388,6 @@ repeat: f2fs_wait_on_page_writeback(page, DATA, false); - /* wait for GCed page writeback via META_MAPPING */ - if (f2fs_post_read_required(inode)) - f2fs_wait_on_block_writeback(sbi, blkaddr); - if (len == PAGE_SIZE || PageUptodate(page)) return 0; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3fe4ae0bb10fc..c30b17a6ee52e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2943,7 +2943,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, struct f2fs_io_info *fio, bool add_list); void f2fs_wait_on_page_writeback(struct page *page, enum page_type type, bool ordered); -void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr); +void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr); void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk); void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk); int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6222c881baed9..571454f6602df 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -112,8 +112,7 @@ mapped: f2fs_wait_on_page_writeback(page, DATA, false); /* wait for GCed page writeback via META_MAPPING */ - if (f2fs_post_read_required(inode)) - f2fs_wait_on_block_writeback(sbi, dn.data_blkaddr); + f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); out_sem: up_read(&F2FS_I(inode)->i_mmap_sem); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 34e05777941ed..187c848a65b82 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3195,10 +3195,14 @@ void f2fs_wait_on_page_writeback(struct page *page, } } -void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr) +void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct page *cpage; + if (!f2fs_post_read_required(inode)) + return; + if (!is_valid_data_blkaddr(sbi, blkaddr)) return; -- GitLab From 3e876842654afa440c400a5725763244a55e92b5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 6 Sep 2018 11:40:12 -0700 Subject: [PATCH 1022/2269] f2fs: submit bio after shutdown Sometimes, some merged IOs could get a chance to be submitted, resulting in system hang in shutdown test. This issues IOs all the time after shutdown. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0edc500ea089d..919974220298a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -533,6 +533,8 @@ skip: if (fio->in_list) goto next; out: + if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) + __submit_merged_bio(io); up_write(&io->io_rwsem); } -- GitLab From 187aa152fe96ba68b9cdbb22c220c2c360e982bf Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Tue, 26 Jun 2018 13:12:43 +0800 Subject: [PATCH 1023/2269] f2fs: report error if quota off error during umount Now, we depend on fsck to ensure quota file data is ok, so we scan whole partition if checkpoint without umount flag. It's same for quota off error case, which may make quota file data inconsistent. generic/019 reports below error: __quota_error: 1160 callbacks suppressed Quota error (device zram1): write_blk: dquota write failed Quota error (device zram1): qtree_write_dquot: Error -28 occurred while creating quota Quota error (device zram1): write_blk: dquota write failed Quota error (device zram1): qtree_write_dquot: Error -28 occurred while creating quota Quota error (device zram1): write_blk: dquota write failed Quota error (device zram1): qtree_write_dquot: Error -28 occurred while creating quota Quota error (device zram1): write_blk: dquota write failed Quota error (device zram1): qtree_write_dquot: Error -28 occurred while creating quota Quota error (device zram1): write_blk: dquota write failed Quota error (device zram1): qtree_write_dquot: Error -28 occurred while creating quota VFS: Busy inodes after unmount of zram1. Self-destruct in 5 seconds. Have a nice day... If we failed in below path due to fail to write dquot block, we will miss to release quota inode, fix it. - f2fs_put_super - f2fs_quota_off_umount - f2fs_quota_off - f2fs_quota_sync <-- failed - dquot_quota_off <-- missed to call Signed-off-by: Yunlei He Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 25b6082485a5d..6aa571bb0baa5 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1844,7 +1844,9 @@ static int f2fs_quota_off(struct super_block *sb, int type) if (!inode || !igrab(inode)) return dquot_quota_off(sb, type); - f2fs_quota_sync(sb, type); + err = f2fs_quota_sync(sb, type); + if (err) + goto out_put; err = dquot_quota_off(sb, type); if (err || f2fs_sb_has_quota_ino(sb)) @@ -1863,9 +1865,20 @@ out_put: void f2fs_quota_off_umount(struct super_block *sb) { int type; + int err; + + for (type = 0; type < MAXQUOTAS; type++) { + err = f2fs_quota_off(sb, type); + if (err) { + int ret = dquot_quota_off(sb, type); - for (type = 0; type < MAXQUOTAS; type++) - f2fs_quota_off(sb, type); + f2fs_msg(sb, KERN_ERR, + "Fail to turn off disk quota " + "(type: %d, err: %d, ret:%d), Please " + "run fsck to fix it.", type, err, ret); + set_sbi_flag(F2FS_SB(sb), SBI_NEED_FSCK); + } + } } static int f2fs_get_projid(struct inode *inode, kprojid_t *projid) -- GitLab From 8dee4cc0babf061b97a4c219b96348a6674cad78 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 22 Aug 2018 17:11:05 +0800 Subject: [PATCH 1024/2269] f2fs: fix to flush all dirty inodes recovered in readonly fs generic/417 reported as blow: ------------[ cut here ]------------ kernel BUG at /home/yuchao/git/devf2fs/inode.c:695! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 1 PID: 21697 Comm: umount Tainted: G W O 4.18.0-rc2+ #39 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 EIP: f2fs_evict_inode+0x556/0x580 [f2fs] Call Trace: ? _raw_spin_unlock+0x2c/0x50 evict+0xa8/0x170 dispose_list+0x34/0x40 evict_inodes+0x118/0x120 generic_shutdown_super+0x41/0x100 ? rcu_read_lock_sched_held+0x97/0xa0 kill_block_super+0x22/0x50 kill_f2fs_super+0x6f/0x80 [f2fs] deactivate_locked_super+0x3d/0x70 deactivate_super+0x40/0x60 cleanup_mnt+0x39/0x70 __cleanup_mnt+0x10/0x20 task_work_run+0x81/0xa0 exit_to_usermode_loop+0x59/0xa7 do_fast_syscall_32+0x1f5/0x22c entry_SYSENTER_32+0x53/0x86 EIP: f2fs_evict_inode+0x556/0x580 [f2fs] It can simply reproduced with scripts: Enable quota feature during mkfs. Testcase1: 1. mkfs.f2fs /dev/zram0 2. mount -t f2fs /dev/zram0 /mnt/f2fs 3. xfs_io -f /mnt/f2fs/file -c "pwrite 0 4k" -c "fsync" 4. godown /mnt/f2fs 5. umount /mnt/f2fs 6. mount -t f2fs -o ro /dev/zram0 /mnt/f2fs 7. umount /mnt/f2fs Testcase2: 1. mkfs.f2fs /dev/zram0 2. mount -t f2fs /dev/zram0 /mnt/f2fs 3. touch /mnt/f2fs/file 4. create process[pid = x] do: a) open /mnt/f2fs/file; b) unlink /mnt/f2fs/file 5. godown -f /mnt/f2fs 6. kill process[pid = x] 7. umount /mnt/f2fs 8. mount -t f2fs -o ro /dev/zram0 /mnt/f2fs 9. umount /mnt/f2fs The reason is: during recovery, i_{c,m}time of inode will be updated, then the inode can be set dirty w/o being tracked in sbi->inode_list[DIRTY_META] global list, so later write_checkpoint will not flush such dirty inode into node page. Once umount is called, sync_filesystem() in generic_shutdown_super() will skip syncng dirty inodes due to sb_rdonly check, leaving dirty inodes there. To solve this issue, during umount, add remove SB_RDONLY flag in sb->s_flags, to make sure sync_filesystem() will not be skipped. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 ++ fs/f2fs/f2fs.h | 1 + fs/f2fs/recovery.c | 14 +++++++++----- fs/f2fs/super.c | 3 +++ 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6b24eb4814780..504b6eff7a6d0 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -696,6 +696,8 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi) /* clear Orphan Flag */ clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG); out: + set_sbi_flag(sbi, SBI_IS_RECOVERED); + #ifdef CONFIG_QUOTA /* Turn quotas off */ if (quota_enabled) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c30b17a6ee52e..2ede7083ffc03 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1089,6 +1089,7 @@ enum { SBI_NEED_SB_WRITE, /* need to recover superblock */ SBI_NEED_CP, /* need to checkpoint */ SBI_IS_SHUTDOWN, /* shutdown by ioctl */ + SBI_IS_RECOVERED, /* recovered orphan/data */ }; enum { diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 501bb0fdda1b3..e34ca1686e3b5 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -697,11 +697,15 @@ skip: /* let's drop all the directory inodes for clean checkpoint */ destroy_fsync_dnodes(&dir_list); - if (!err && need_writecp) { - struct cp_control cpc = { - .reason = CP_RECOVERY, - }; - err = f2fs_write_checkpoint(sbi, &cpc); + if (need_writecp) { + set_sbi_flag(sbi, SBI_IS_RECOVERED); + + if (!err) { + struct cp_control cpc = { + .reason = CP_RECOVERY, + }; + err = f2fs_write_checkpoint(sbi, &cpc); + } } kmem_cache_destroy(fsync_entry_slab); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6aa571bb0baa5..f650e51c10609 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3179,6 +3179,9 @@ static void kill_f2fs_super(struct super_block *sb) }; f2fs_write_checkpoint(sbi, &cpc); } + + if (is_sbi_flag_set(sbi, SBI_IS_RECOVERED) && f2fs_readonly(sb)) + sb->s_flags &= ~SB_RDONLY; } kill_block_super(sb); } -- GitLab From 110427278005c13e489a432049ba28e67fc2dca4 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Fri, 31 Aug 2018 22:33:50 +0800 Subject: [PATCH 1025/2269] f2fs: cache NULL when both default_acl and acl are NULL default_acl and acl of newly created inode will be initiated as ACL_NOT_CACHED in vfs function inode_init_always() and later will be updated by calling xxx_init_acl() in specific filesystems. Howerver, when default_acl and acl are NULL then they keep the value of ACL_NOT_CACHED, this patch tries to cache NULL for acl/default_acl in this case. Signed-off-by: Chengguang Xu Acked-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 20caf341701d3..cd82e1ce5d678 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -397,12 +397,16 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl, ipage); posix_acl_release(default_acl); + } else { + inode->i_default_acl = NULL; } if (acl) { if (!error) error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, ipage); posix_acl_release(acl); + } else { + inode->i_acl = NULL; } return error; -- GitLab From bced25a088e747f4022986f078fc97769a817b5a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 5 Sep 2018 14:54:01 +0800 Subject: [PATCH 1026/2269] f2fs: fix memory leak of write_io in fill_super() It needs to release memory allocated for sbi->write_io in error path, otherwise, it will cause memory leak. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f650e51c10609..bd8956692e12a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2871,7 +2871,7 @@ try_onemore: GFP_KERNEL); if (!sbi->write_io[i]) { err = -ENOMEM; - goto free_options; + goto free_bio_info; } for (j = HOT; j < n; j++) { -- GitLab From 407d6a66cbcc266114eefcc7bcc56d9b07878130 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 5 Sep 2018 14:54:02 +0800 Subject: [PATCH 1027/2269] f2fs: fix memory leak of percpu counter in fill_super() In fill_super -> init_percpu_info, we should destroy percpu counter in error path, otherwise memory allcoated for percpu counter will leak. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index bd8956692e12a..f321b99a72a77 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2458,8 +2458,12 @@ static int init_percpu_info(struct f2fs_sb_info *sbi) if (err) return err; - return percpu_counter_init(&sbi->total_valid_inode_count, 0, + err = percpu_counter_init(&sbi->total_valid_inode_count, 0, GFP_KERNEL); + if (err) + percpu_counter_destroy(&sbi->alloc_valid_block_count); + + return err; } #ifdef CONFIG_BLK_DEV_ZONED -- GitLab From 2312b6134cb85e57e2a5f35ca8a4cdf83352a7b8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 6 Sep 2018 20:34:12 +0800 Subject: [PATCH 1028/2269] f2fs: fix to do sanity check with current segment number https://bugzilla.kernel.org/show_bug.cgi?id=200219 Reproduction way: - mount image - run poc code - umount image F2FS-fs (loop1): Bitmap was wrongly set, blk:15364 ------------[ cut here ]------------ kernel BUG at /home/yuchao/git/devf2fs/segment.c:2061! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 2 PID: 17686 Comm: umount Tainted: G W O 4.18.0-rc2+ #39 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 EIP: update_sit_entry+0x459/0x4e0 [f2fs] Code: e8 1c b5 fd ff 0f 0b 0f 0b 8b 45 e4 c7 44 24 08 9c 7a 6c f8 c7 44 24 04 bc 4a 6c f8 89 44 24 0c 8b 06 89 04 24 e8 f7 b4 fd ff <0f> 0b 8b 45 e4 0f b6 d2 89 54 24 10 c7 44 24 08 60 7a 6c f8 c7 44 EAX: 00000032 EBX: 000000f8 ECX: 00000002 EDX: 00000001 ESI: d7177000 EDI: f520fe68 EBP: d6477c6c ESP: d6477c34 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010282 CR0: 80050033 CR2: b7fbe000 CR3: 2a99b3c0 CR4: 000406f0 Call Trace: f2fs_allocate_data_block+0x124/0x580 [f2fs] do_write_page+0x78/0x150 [f2fs] f2fs_do_write_node_page+0x25/0xa0 [f2fs] __write_node_page+0x2bf/0x550 [f2fs] f2fs_sync_node_pages+0x60e/0x6d0 [f2fs] ? sync_inode_metadata+0x2f/0x40 ? f2fs_write_checkpoint+0x28f/0x7d0 [f2fs] ? up_write+0x1e/0x80 f2fs_write_checkpoint+0x2a9/0x7d0 [f2fs] ? mark_held_locks+0x5d/0x80 ? _raw_spin_unlock_irq+0x27/0x50 kill_f2fs_super+0x68/0x90 [f2fs] deactivate_locked_super+0x3d/0x70 deactivate_super+0x40/0x60 cleanup_mnt+0x39/0x70 __cleanup_mnt+0x10/0x20 task_work_run+0x81/0xa0 exit_to_usermode_loop+0x59/0xa7 do_fast_syscall_32+0x1f5/0x22c entry_SYSENTER_32+0x53/0x86 EIP: 0xb7f95c51 Code: c1 1e f7 ff ff 89 e5 8b 55 08 85 d2 8b 81 64 cd ff ff 74 02 89 02 5d c3 8b 0c 24 c3 8b 1c 24 c3 90 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d 76 00 58 b8 77 00 00 00 cd 80 90 8d 76 EAX: 00000000 EBX: 0871ab90 ECX: bfb2cd00 EDX: 00000000 ESI: 00000000 EDI: 0871ab90 EBP: 0871ab90 ESP: bfb2cd7c DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b EFLAGS: 00000246 Modules linked in: f2fs(O) crc32_generic bnep rfcomm bluetooth ecdh_generic snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq pcbc joydev aesni_intel snd_seq_device aes_i586 snd_timer crypto_simd snd cryptd soundcore mac_hid serio_raw video i2c_piix4 parport_pc ppdev lp parport hid_generic psmouse usbhid hid e1000 [last unloaded: f2fs] ---[ end trace d423f83982cfcdc5 ]--- The reason is, different log headers using the same segment, once one log's next block address is used by another log, it will cause panic as above. Main area: 24 segs, 24 secs 24 zones - COLD data: 0, 0, 0 - WARM data: 1, 1, 1 - HOT data: 20, 20, 20 - Dir dnode: 22, 22, 22 - File dnode: 22, 22, 22 - Indir nodes: 21, 21, 21 So this patch adds sanity check to detect such condition to avoid this issue. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f321b99a72a77..5d4fcab97061a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2325,7 +2325,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) unsigned int segment_count_main; unsigned int cp_pack_start_sum, cp_payload; block_t user_block_count; - int i; + int i, j; total = le32_to_cpu(raw_super->segment_count); fsmeta = le32_to_cpu(raw_super->segment_count_ckpt); @@ -2366,11 +2366,43 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs || le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg) return 1; + for (j = i + 1; j < NR_CURSEG_NODE_TYPE; j++) { + if (le32_to_cpu(ckpt->cur_node_segno[i]) == + le32_to_cpu(ckpt->cur_node_segno[j])) { + f2fs_msg(sbi->sb, KERN_ERR, + "Node segment (%u, %u) has the same " + "segno: %u", i, j, + le32_to_cpu(ckpt->cur_node_segno[i])); + return 1; + } + } } for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) { if (le32_to_cpu(ckpt->cur_data_segno[i]) >= main_segs || le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg) return 1; + for (j = i + 1; j < NR_CURSEG_DATA_TYPE; j++) { + if (le32_to_cpu(ckpt->cur_data_segno[i]) == + le32_to_cpu(ckpt->cur_data_segno[j])) { + f2fs_msg(sbi->sb, KERN_ERR, + "Data segment (%u, %u) has the same " + "segno: %u", i, j, + le32_to_cpu(ckpt->cur_data_segno[i])); + return 1; + } + } + } + for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { + for (j = i; j < NR_CURSEG_DATA_TYPE; j++) { + if (le32_to_cpu(ckpt->cur_node_segno[i]) == + le32_to_cpu(ckpt->cur_data_segno[j])) { + f2fs_msg(sbi->sb, KERN_ERR, + "Data segment (%u) and Data segment (%u)" + " has the same segno: %u", i, j, + le32_to_cpu(ckpt->cur_node_segno[i])); + return 1; + } + } } sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize); -- GitLab From 212dc9c48d297204aa3513a42845a6da2a8e9f0c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 7 Sep 2018 19:49:07 +0800 Subject: [PATCH 1029/2269] f2fs: plug readahead IO in readdir() Add a plug to merge readahead IO in readdir(), expecting it can reduce bio count before submitting to block layer. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index ecc3a4e2be96d..bd0348cc860fa 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -784,9 +784,15 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, struct f2fs_dir_entry *de = NULL; struct fscrypt_str de_name = FSTR_INIT(NULL, 0); struct f2fs_sb_info *sbi = F2FS_I_SB(d->inode); + struct blk_plug plug; + bool readdir_ra = sbi->readdir_ra == 1; + int err = 0; bit_pos = ((unsigned long)ctx->pos % d->max); + if (readdir_ra) + blk_start_plug(&plug); + while (bit_pos < d->max) { bit_pos = find_next_bit_le(d->bitmap, d->max, bit_pos); if (bit_pos >= d->max) @@ -806,29 +812,33 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, if (f2fs_encrypted_inode(d->inode)) { int save_len = fstr->len; - int err; err = fscrypt_fname_disk_to_usr(d->inode, (u32)de->hash_code, 0, &de_name, fstr); if (err) - return err; + goto out; de_name = *fstr; fstr->len = save_len; } if (!dir_emit(ctx, de_name.name, de_name.len, - le32_to_cpu(de->ino), d_type)) - return 1; + le32_to_cpu(de->ino), d_type)) { + err = 1; + goto out; + } - if (sbi->readdir_ra == 1) + if (readdir_ra) f2fs_ra_node_page(sbi, le32_to_cpu(de->ino)); bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); ctx->pos = start_pos + bit_pos; } - return 0; +out: + if (readdir_ra) + blk_finish_plug(&plug); + return err; } static int f2fs_readdir(struct file *file, struct dir_context *ctx) -- GitLab From 27fb7396f1d97004cf9597c6c6fcbec1f678fa97 Mon Sep 17 00:00:00 2001 From: Zhikang Zhang Date: Mon, 10 Sep 2018 16:18:25 +0800 Subject: [PATCH 1030/2269] f2fs: avoid sleeping under spin_lock In the call trace below, we might sleep in function dput(). So in order to avoid sleeping under spin_lock, we remove f2fs_mark_inode_dirty_sync from __try_update_largest_extent && __drop_largest_extent. BUG: sleeping function called from invalid context at fs/dcache.c:796 Call trace: dump_backtrace+0x0/0x3f4 show_stack+0x24/0x30 dump_stack+0xe0/0x138 ___might_sleep+0x2a8/0x2c8 __might_sleep+0x78/0x10c dput+0x7c/0x750 block_dump___mark_inode_dirty+0x120/0x17c __mark_inode_dirty+0x344/0x11f0 f2fs_mark_inode_dirty_sync+0x40/0x50 __insert_extent_tree+0x2e0/0x2f4 f2fs_update_extent_tree_range+0xcf4/0xde8 f2fs_update_extent_cache+0x114/0x12c f2fs_update_data_blkaddr+0x40/0x50 write_data_page+0x150/0x314 do_write_data_page+0x648/0x2318 __write_data_page+0xdb4/0x1640 f2fs_write_cache_pages+0x768/0xafc __f2fs_write_data_pages+0x590/0x1218 f2fs_write_data_pages+0x64/0x74 do_writepages+0x74/0xe4 __writeback_single_inode+0xdc/0x15f0 writeback_sb_inodes+0x574/0xc98 __writeback_inodes_wb+0x190/0x204 wb_writeback+0x730/0xf14 wb_check_old_data_flush+0x1bc/0x1c8 wb_workfn+0x554/0xf74 process_one_work+0x440/0x118c worker_thread+0xac/0x974 kthread+0x1a0/0x1c8 ret_from_fork+0x10/0x1c Signed-off-by: Zhikang Zhang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 51 ++++++++++++++++++++++++++---------------- fs/f2fs/f2fs.h | 7 +++--- 2 files changed, 36 insertions(+), 22 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 231b77ef5a53b..a70cd2580eae1 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -308,14 +308,13 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, return count - atomic_read(&et->node_cnt); } -static void __drop_largest_extent(struct inode *inode, +static void __drop_largest_extent(struct extent_tree *et, pgoff_t fofs, unsigned int len) { - struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest; - - if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs) { - largest->len = 0; - f2fs_mark_inode_dirty_sync(inode, true); + if (fofs < et->largest.fofs + et->largest.len && + fofs + len > et->largest.fofs) { + et->largest.len = 0; + et->largest_updated = true; } } @@ -416,12 +415,11 @@ out: return ret; } -static struct extent_node *__try_merge_extent_node(struct inode *inode, +static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_info *ei, struct extent_node *prev_ex, struct extent_node *next_ex) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_node *en = NULL; if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) { @@ -443,7 +441,7 @@ static struct extent_node *__try_merge_extent_node(struct inode *inode, if (!en) return NULL; - __try_update_largest_extent(inode, et, en); + __try_update_largest_extent(et, en); spin_lock(&sbi->extent_lock); if (!list_empty(&en->list)) { @@ -454,12 +452,11 @@ static struct extent_node *__try_merge_extent_node(struct inode *inode, return en; } -static struct extent_node *__insert_extent_tree(struct inode *inode, +static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_info *ei, struct rb_node **insert_p, struct rb_node *insert_parent) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct rb_node **p; struct rb_node *parent = NULL; struct extent_node *en = NULL; @@ -476,7 +473,7 @@ do_insert: if (!en) return NULL; - __try_update_largest_extent(inode, et, en); + __try_update_largest_extent(et, en); /* update in global extent list */ spin_lock(&sbi->extent_lock); @@ -497,6 +494,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, struct rb_node **insert_p = NULL, *insert_parent = NULL; unsigned int end = fofs + len; unsigned int pos = (unsigned int)fofs; + bool updated = false; if (!et) return; @@ -517,7 +515,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, * drop largest extent before lookup, in case it's already * been shrunk from extent tree */ - __drop_largest_extent(inode, fofs, len); + __drop_largest_extent(et, fofs, len); /* 1. lookup first extent node in range [fofs, fofs + len - 1] */ en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root, @@ -550,7 +548,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, set_extent_info(&ei, end, end - dei.fofs + dei.blk, org_end - end); - en1 = __insert_extent_tree(inode, et, &ei, + en1 = __insert_extent_tree(sbi, et, &ei, NULL, NULL); next_en = en1; } else { @@ -570,7 +568,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, } if (parts) - __try_update_largest_extent(inode, et, en); + __try_update_largest_extent(et, en); else __release_extent_node(sbi, et, en); @@ -590,15 +588,16 @@ static void f2fs_update_extent_tree_range(struct inode *inode, if (blkaddr) { set_extent_info(&ei, fofs, blkaddr, len); - if (!__try_merge_extent_node(inode, et, &ei, prev_en, next_en)) - __insert_extent_tree(inode, et, &ei, + if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) + __insert_extent_tree(sbi, et, &ei, insert_p, insert_parent); /* give up extent_cache, if split and small updates happen */ if (dei.len >= 1 && prev.len < F2FS_MIN_EXTENT_LEN && et->largest.len < F2FS_MIN_EXTENT_LEN) { - __drop_largest_extent(inode, 0, UINT_MAX); + et->largest.len = 0; + et->largest_updated = true; set_inode_flag(inode, FI_NO_EXTENT); } } @@ -606,7 +605,15 @@ static void f2fs_update_extent_tree_range(struct inode *inode, if (is_inode_flag_set(inode, FI_NO_EXTENT)) __free_extent_tree(sbi, et); + if (et->largest_updated) { + et->largest_updated = false; + updated = true; + } + write_unlock(&et->lock); + + if (updated) + f2fs_mark_inode_dirty_sync(inode, true); } unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) @@ -705,6 +712,7 @@ void f2fs_drop_extent_tree(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et = F2FS_I(inode)->extent_tree; + bool updated = false; if (!f2fs_may_extent_tree(inode)) return; @@ -713,8 +721,13 @@ void f2fs_drop_extent_tree(struct inode *inode) write_lock(&et->lock); __free_extent_tree(sbi, et); - __drop_largest_extent(inode, 0, UINT_MAX); + if (et->largest.len) { + et->largest.len = 0; + updated = true; + } write_unlock(&et->lock); + if (updated) + f2fs_mark_inode_dirty_sync(inode, true); } void f2fs_destroy_extent_tree(struct inode *inode) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2ede7083ffc03..a31abd711aa07 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -573,6 +573,7 @@ struct extent_tree { struct list_head list; /* to be used by sbi->zombie_list */ rwlock_t lock; /* protect extent info rb-tree */ atomic_t node_cnt; /* # of extent node in rb-tree*/ + bool largest_updated; /* largest extent updated */ }; /* @@ -755,12 +756,12 @@ static inline bool __is_front_mergeable(struct extent_info *cur, } extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync); -static inline void __try_update_largest_extent(struct inode *inode, - struct extent_tree *et, struct extent_node *en) +static inline void __try_update_largest_extent(struct extent_tree *et, + struct extent_node *en) { if (en->ei.len > et->largest.len) { et->largest = en->ei; - f2fs_mark_inode_dirty_sync(inode, true); + et->largest_updated = true; } } -- GitLab From 5f170bdd7b9a0f935864112ef6aaea543a9d100e Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Tue, 11 Sep 2018 08:54:21 +0900 Subject: [PATCH 1031/2269] f2fs: fix setattr project check upon fssetxattr ioctl Currently, project quota could be changed by fssetxattr ioctl, and existed permission check inode_owner_or_capable() is obviously not enough, just think that common users could change project id of file, that could make users to break project quota easily. This patch try to follow same regular of xfs project quota: "Project Quota ID state is only allowed to change from within the init namespace. Enforce that restriction only if we are trying to change the quota ID state. Everything else is allowed in user namespaces." Besides that, check and set project id'state should be an atomic operation, protect whole operation with inode lock. Signed-off-by: Wang Shilong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 60 +++++++++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 23 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 571454f6602df..1f03a36299fe4 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2616,34 +2616,26 @@ static int f2fs_ioc_setproject(struct file *filp, __u32 projid) if (projid_eq(kprojid, F2FS_I(inode)->i_projid)) return 0; - err = mnt_want_write_file(filp); - if (err) - return err; - err = -EPERM; - inode_lock(inode); - /* Is it quota file? Do not allow user to mess with it */ if (IS_NOQUOTA(inode)) - goto out_unlock; + return err; ipage = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) { - err = PTR_ERR(ipage); - goto out_unlock; - } + if (IS_ERR(ipage)) + return PTR_ERR(ipage); if (!F2FS_FITS_IN_INODE(F2FS_INODE(ipage), fi->i_extra_isize, i_projid)) { err = -EOVERFLOW; f2fs_put_page(ipage, 1); - goto out_unlock; + return err; } f2fs_put_page(ipage, 1); err = dquot_initialize(inode); if (err) - goto out_unlock; + return err; transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); if (!IS_ERR(transfer_to[PRJQUOTA])) { @@ -2657,9 +2649,6 @@ static int f2fs_ioc_setproject(struct file *filp, __u32 projid) inode->i_ctime = current_time(inode); out_dirty: f2fs_mark_inode_dirty_sync(inode, true); -out_unlock: - inode_unlock(inode); - mnt_drop_write_file(filp); return err; } #else @@ -2735,6 +2724,30 @@ static int f2fs_ioc_fsgetxattr(struct file *filp, unsigned long arg) return 0; } +static int f2fs_ioctl_check_project(struct inode *inode, struct fsxattr *fa) +{ + /* + * Project Quota ID state is only allowed to change from within the init + * namespace. Enforce that restriction only if we are trying to change + * the quota ID state. Everything else is allowed in user namespaces. + */ + if (current_user_ns() == &init_user_ns) + return 0; + + if (__kprojid_val(F2FS_I(inode)->i_projid) != fa->fsx_projid) + return -EINVAL; + + if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL) { + if (!(fa->fsx_xflags & FS_XFLAG_PROJINHERIT)) + return -EINVAL; + } else { + if (fa->fsx_xflags & FS_XFLAG_PROJINHERIT) + return -EINVAL; + } + + return 0; +} + static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -2762,19 +2775,20 @@ static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg) return err; inode_lock(inode); + err = f2fs_ioctl_check_project(inode, &fa); + if (err) + goto out; flags = (fi->i_flags & ~F2FS_FL_XFLAG_VISIBLE) | (flags & F2FS_FL_XFLAG_VISIBLE); err = __f2fs_ioc_setflags(inode, flags); - inode_unlock(inode); - mnt_drop_write_file(filp); if (err) - return err; + goto out; err = f2fs_ioc_setproject(filp, fa.fsx_projid); - if (err) - return err; - - return 0; +out: + inode_unlock(inode); + mnt_drop_write_file(filp); + return err; } int f2fs_pin_file_control(struct inode *inode, bool inc) -- GitLab From 84c1b87e6a4dbd09b02bbc2c1eb395f5c51c2e4e Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Wed, 12 Sep 2018 13:32:52 +0800 Subject: [PATCH 1032/2269] f2fs: surround fault_injection related option parsing using CONFIG_F2FS_FAULT_INJECTION It's a little bit strange when fault_injection related options fail with -EINVAL which were already disabled from config, so surround all fault_injection related option parsing code using CONFIG_F2FS_FAULT_INJECTION. Meanwhile, slightly change warning message to keep consistency with option POSIX_ACL and FS_XATTR. Signed-off-by: Chengguang Xu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 5d4fcab97061a..cd33e09c82a14 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -594,28 +594,31 @@ static int parse_options(struct super_block *sb, char *options) } F2FS_OPTION(sbi).write_io_size_bits = arg; break; +#ifdef CONFIG_F2FS_FAULT_INJECTION case Opt_fault_injection: if (args->from && match_int(args, &arg)) return -EINVAL; -#ifdef CONFIG_F2FS_FAULT_INJECTION f2fs_build_fault_attr(sbi, arg, F2FS_ALL_FAULT_TYPE); set_opt(sbi, FAULT_INJECTION); -#else - f2fs_msg(sb, KERN_INFO, - "FAULT_INJECTION was not selected"); -#endif break; + case Opt_fault_type: if (args->from && match_int(args, &arg)) return -EINVAL; -#ifdef CONFIG_F2FS_FAULT_INJECTION f2fs_build_fault_attr(sbi, 0, arg); set_opt(sbi, FAULT_INJECTION); + break; #else + case Opt_fault_injection: f2fs_msg(sb, KERN_INFO, - "FAULT_INJECTION was not selected"); -#endif + "fault_injection options not supported"); break; + + case Opt_fault_type: + f2fs_msg(sb, KERN_INFO, + "fault_type options not supported"); + break; +#endif case Opt_lazytime: sb->s_flags |= MS_LAZYTIME; break; -- GitLab From a123f4bab9b2cdd0e94aa7996dea2b7f06e60cb3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 12 Sep 2018 09:16:07 +0800 Subject: [PATCH 1033/2269] f2fs: add SPDX license identifiers Remove the verbose license text from f2fs files and replace them with SPDX tags. This does not change the license of any of the code. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 5 +---- fs/f2fs/acl.h | 5 +---- fs/f2fs/checkpoint.c | 5 +---- fs/f2fs/data.c | 5 +---- fs/f2fs/debug.c | 5 +---- fs/f2fs/dir.c | 5 +---- fs/f2fs/extent_cache.c | 5 +---- fs/f2fs/f2fs.h | 5 +---- fs/f2fs/file.c | 5 +---- fs/f2fs/gc.c | 5 +---- fs/f2fs/gc.h | 5 +---- fs/f2fs/hash.c | 5 +---- fs/f2fs/inline.c | 4 +--- fs/f2fs/inode.c | 5 +---- fs/f2fs/namei.c | 5 +---- fs/f2fs/node.c | 5 +---- fs/f2fs/node.h | 5 +---- fs/f2fs/recovery.c | 5 +---- fs/f2fs/segment.c | 5 +---- fs/f2fs/segment.h | 5 +---- fs/f2fs/shrinker.c | 5 +---- fs/f2fs/super.c | 5 +---- fs/f2fs/sysfs.c | 5 +---- fs/f2fs/trace.c | 5 +---- fs/f2fs/trace.h | 5 +---- fs/f2fs/xattr.c | 5 +---- fs/f2fs/xattr.h | 5 +---- include/linux/f2fs_fs.h | 5 +---- 28 files changed, 28 insertions(+), 111 deletions(-) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index cd82e1ce5d678..fa707cdd4120d 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/acl.c * @@ -7,10 +8,6 @@ * Portions of this code from linux/fs/ext2/acl.c * * Copyright (C) 2001-2003 Andreas Gruenbacher, - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include "f2fs.h" diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index 2c685185c24db..b96823c59b15a 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/acl.h * @@ -7,10 +8,6 @@ * Portions of this code from linux/fs/ext2/acl.h * * Copyright (C) 2001-2003 Andreas Gruenbacher, - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef __F2FS_ACL_H__ #define __F2FS_ACL_H__ diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 504b6eff7a6d0..86db0004774af 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/checkpoint.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 919974220298a..a695b14c74775 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/data.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index ebe649d9793cb..d3c402183e3c4 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs debugging statistics * @@ -5,10 +6,6 @@ * http://www.samsung.com/ * Copyright (c) 2012 Linux Foundation * Copyright (c) 2012 Greg Kroah-Hartman - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index bd0348cc860fa..c77a58038709d 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/dir.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index a70cd2580eae1..904ad7ba5a453 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs extent cache support * @@ -5,10 +6,6 @@ * Copyright (c) 2015 Samsung Electronics * Authors: Jaegeuk Kim * Chao Yu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a31abd711aa07..2d52b1d85f348 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/f2fs.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef _LINUX_F2FS_H #define _LINUX_F2FS_H diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1f03a36299fe4..ff0655773de03 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/file.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index c6322ef9b5fc9..c7b208f77f4f5 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/gc.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index c8619e4080090..bbac9d3787bd3 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/gc.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #define GC_THREAD_MIN_WB_PAGES 1 /* * a threshold to determine diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index eb2e031ea887b..cc82f142f811f 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/hash.c * @@ -7,10 +8,6 @@ * Portions of this code from linux/fs/ext3/hash.c * * Copyright (C) 2002 by Theodore Ts'o - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 115dc219344b1..425d740f87fdc 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/inline.c * Copyright (c) 2013, Intel Corporation * Authors: Huajun Li * Haicheng Li - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 959df2249875c..86e7333d60c14 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/inode.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 6ba092f635e3f..cc8c2ef448301 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/namei.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f213a53526aaf..3bfd89534ff2f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/node.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 0f4db7a612547..1c73d879a9bc9 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/node.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ /* start node id of a node block dedicated to the given node id */ #define START_NID(nid) (((nid) / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index e34ca1686e3b5..4e9bcdda38dff 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/recovery.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 187c848a65b82..9a8d7d415a749 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/segment.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index b3d9e317ff0c1..086150028c6d1 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/segment.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index 36cfd816c1608..9e13db994fdf4 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs shrinker support * the basic infra was copied from fs/ubifs/shrinker.c * * Copyright (c) 2015 Motorola Mobility * Copyright (c) 2015 Jaegeuk Kim - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index cd33e09c82a14..03b21293f4650 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/super.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 30fd016afeb37..4c88b0720cdd4 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs sysfs interface * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ * Copyright (c) 2017 Chao Yu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c index a1fcd00bbb2bd..ce2a5eb210b66 100644 --- a/fs/f2fs/trace.c +++ b/fs/f2fs/trace.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs IO tracer * * Copyright (c) 2014 Motorola Mobility * Copyright (c) 2014 Jaegeuk Kim - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h index 67db24ac1e85a..e8075fc5b2284 100644 --- a/fs/f2fs/trace.h +++ b/fs/f2fs/trace.h @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs IO tracer * * Copyright (c) 2014 Motorola Mobility * Copyright (c) 2014 Jaegeuk Kim - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef __F2FS_TRACE_H__ #define __F2FS_TRACE_H__ diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 77a010e625f50..7261245c208dc 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/xattr.c * @@ -13,10 +14,6 @@ * suggestion of Luka Renko . * xattr consolidation Copyright (c) 2004 James Morris , * Red Hat Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index dbcd1d16e6698..67db134da0f5f 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/xattr.h * @@ -9,10 +10,6 @@ * On-disk format of extended attributes for the ext2 filesystem. * * (C) 2001 Andreas Gruenbacher, - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef __F2FS_XATTR_H__ #define __F2FS_XATTR_H__ diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index f70f8ac9c4f44..1d4b196291d69 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /** * include/linux/f2fs_fs.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef _LINUX_F2FS_FS_H #define _LINUX_F2FS_FS_H -- GitLab From 3048a2c866e736ec9a78fa485f3b2cf24da1e065 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 12 Sep 2018 09:22:29 +0800 Subject: [PATCH 1034/2269] f2fs: split IO error injection according to RW This patch adds to support injecting error for write IO, this can simulate IO error like fail_make_request or dm_flakey does. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 3 ++- fs/f2fs/data.c | 9 +++++++-- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/super.c | 3 ++- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 68a8d2688c7a3..432351f7ace11 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -172,9 +172,10 @@ fault_type=%d Support configuring fault injection type, should be FAULT_DIR_DEPTH 0x000000100 FAULT_EVICT_INODE 0x000000200 FAULT_TRUNCATE 0x000000400 - FAULT_IO 0x000000800 + FAULT_READ_IO 0x000000800 FAULT_CHECKPOINT 0x000001000 FAULT_DISCARD 0x000002000 + FAULT_WRITE_IO 0x000004000 mode=%s Control block allocation mode which supports "adaptive" and "lfs". In "lfs" mode, there should be no random writes towards main area. diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a695b14c74775..6196c558219f7 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -123,8 +123,8 @@ static bool f2fs_bio_post_read_required(struct bio *bio) static void f2fs_read_end_io(struct bio *bio) { - if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) { - f2fs_show_injection_info(FAULT_IO); + if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_READ_IO)) { + f2fs_show_injection_info(FAULT_READ_IO); bio->bi_status = BLK_STS_IOERR; } @@ -145,6 +145,11 @@ static void f2fs_write_end_io(struct bio *bio) struct bio_vec *bvec; int i; + if (time_to_inject(sbi, FAULT_WRITE_IO)) { + f2fs_show_injection_info(FAULT_WRITE_IO); + bio->bi_status = BLK_STS_IOERR; + } + bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; enum count_type type = WB_DATA_TYPE(page); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2d52b1d85f348..dbf2ffada14d0 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -51,9 +51,10 @@ enum { FAULT_DIR_DEPTH, FAULT_EVICT_INODE, FAULT_TRUNCATE, - FAULT_IO, + FAULT_READ_IO, FAULT_CHECKPOINT, FAULT_DISCARD, + FAULT_WRITE_IO, FAULT_MAX, }; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 03b21293f4650..bc65ab52d8323 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -50,9 +50,10 @@ char *f2fs_fault_name[FAULT_MAX] = { [FAULT_DIR_DEPTH] = "too big dir depth", [FAULT_EVICT_INODE] = "evict_inode fail", [FAULT_TRUNCATE] = "truncate fail", - [FAULT_IO] = "IO error", + [FAULT_READ_IO] = "read IO error", [FAULT_CHECKPOINT] = "checkpoint error", [FAULT_DISCARD] = "discard error", + [FAULT_WRITE_IO] = "write IO error", }; void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, -- GitLab From 9fa27d0dc1a96ad47dffafc0a91769889dc4a074 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Wed, 19 Sep 2018 14:18:47 +0530 Subject: [PATCH 1035/2269] f2fs: add new idle interval timing for discard and gc paths This helps to control the frequency of submission of discard and GC requests independently, based on the need. Suggested-by: Chao Yu Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 17 +++++++++++++- fs/f2fs/f2fs.h | 30 ++++++++++++++++++++++--- fs/f2fs/gc.c | 2 +- fs/f2fs/segment.c | 14 +++++------- fs/f2fs/super.c | 2 ++ fs/f2fs/sysfs.c | 5 +++++ 6 files changed, 56 insertions(+), 14 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 41b8cb3b45256..9cd71f6e0c93a 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -121,7 +121,22 @@ What: /sys/fs/f2fs//idle_interval Date: January 2016 Contact: "Jaegeuk Kim" Description: - Controls the idle timing. + Controls the idle timing for all paths other than + discard and gc path. + +What: /sys/fs/f2fs//discard_idle_interval +Date: September 2018 +Contact: "Chao Yu" +Contact: "Sahitya Tummala" +Description: + Controls the idle timing for discard path. + +What: /sys/fs/f2fs//gc_idle_interval +Date: September 2018 +Contact: "Chao Yu" +Contact: "Sahitya Tummala" +Description: + Controls the idle timing for gc path. What: /sys/fs/f2fs//iostat_enable Date: August 2017 diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index dbf2ffada14d0..821d5acf7d165 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1094,6 +1094,8 @@ enum { enum { CP_TIME, REQ_TIME, + DISCARD_TIME, + GC_TIME, MAX_TIME, }; @@ -1345,7 +1347,15 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type) { - sbi->last_time[type] = jiffies; + unsigned long now = jiffies; + + sbi->last_time[type] = now; + + /* DISCARD_TIME and GC_TIME are based on REQ_TIME */ + if (type == REQ_TIME) { + sbi->last_time[DISCARD_TIME] = now; + sbi->last_time[GC_TIME] = now; + } } static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type) @@ -1355,7 +1365,21 @@ static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type) return time_after(jiffies, sbi->last_time[type] + interval); } -static inline bool is_idle(struct f2fs_sb_info *sbi) +static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi, + int type) +{ + unsigned long interval = sbi->interval_time[type] * HZ; + unsigned int wait_ms = 0; + long delta; + + delta = (sbi->last_time[type] + interval) - jiffies; + if (delta > 0) + wait_ms = jiffies_to_msecs(delta); + + return wait_ms; +} + +static inline bool is_idle(struct f2fs_sb_info *sbi, int type) { struct block_device *bdev = sbi->sb->s_bdev; struct request_queue *q = bdev_get_queue(bdev); @@ -1364,7 +1388,7 @@ static inline bool is_idle(struct f2fs_sb_info *sbi) if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC]) return false; - return f2fs_time_over(sbi, REQ_TIME); + return f2fs_time_over(sbi, type); } /* diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index c7b208f77f4f5..7a41f97fae688 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -80,7 +80,7 @@ static int gc_thread_func(void *data) if (!mutex_trylock(&sbi->gc_mutex)) goto next; - if (!is_idle(sbi)) { + if (!is_idle(sbi, GC_TIME)) { increase_sleep_time(gc_th, &wait_ms); mutex_unlock(&sbi->gc_mutex); goto next; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9a8d7d415a749..97a4fae75651e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -508,7 +508,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) else f2fs_build_free_nids(sbi, false, false); - if (!is_idle(sbi) && + if (!is_idle(sbi, REQ_TIME) && (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi))) return; @@ -1308,7 +1308,7 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, if (dc->state != D_PREP) goto next; - if (dpolicy->io_aware && !is_idle(sbi)) { + if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) { io_interrupted = true; break; } @@ -1368,7 +1368,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, dc->state != D_PREP); if (dpolicy->io_aware && i < dpolicy->io_aware_gran && - !is_idle(sbi)) { + !is_idle(sbi, DISCARD_TIME)) { io_interrupted = true; break; } @@ -1561,8 +1561,6 @@ static int issue_discard_thread(void *data) struct discard_policy dpolicy; unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; int issued; - unsigned long interval = sbi->interval_time[REQ_TIME] * HZ; - long delta; set_freezable(); @@ -1599,10 +1597,8 @@ static int issue_discard_thread(void *data) __wait_all_discard_cmd(sbi, &dpolicy); wait_ms = dpolicy.min_interval; } else if (issued == -1){ - delta = (sbi->last_time[REQ_TIME] + interval) - jiffies; - if (delta > 0) - wait_ms = jiffies_to_msecs(delta); - else + wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME); + if (!wait_ms) wait_ms = dpolicy.mid_interval; } else { wait_ms = dpolicy.max_interval; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index bc65ab52d8323..020e1f16ecc1d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2461,6 +2461,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->dir_level = DEF_DIR_LEVEL; sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL; sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL; + sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL; + sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL; clear_sbi_flag(sbi, SBI_NEED_FSCK); for (i = 0; i < NR_COUNT_TYPE; i++) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 4c88b0720cdd4..5a252d78d1787 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -404,6 +404,9 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, discard_idle_interval, + interval_time[DISCARD_TIME]); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold); @@ -457,6 +460,8 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(dirty_nats_ratio), ATTR_LIST(cp_interval), ATTR_LIST(idle_interval), + ATTR_LIST(discard_idle_interval), + ATTR_LIST(gc_idle_interval), ATTR_LIST(iostat_enable), ATTR_LIST(readdir_ra), ATTR_LIST(gc_pin_file_thresh), -- GitLab From bb6b18cd823b6761a37ef1b866d7c30bbbd9fc46 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 19 Sep 2018 15:45:19 -0700 Subject: [PATCH 1036/2269] f2fs: avoid infinite loop in f2fs_alloc_nid If we have an error in f2fs_build_free_nids, we're able to fall into a loop to find free nids. Suggested-by: Chao Yu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 3bfd89534ff2f..645e287770c0d 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2350,8 +2350,9 @@ retry: spin_unlock(&nm_i->nid_list_lock); /* Let's scan nat pages and its caches to get free nids */ - f2fs_build_free_nids(sbi, true, false); - goto retry; + if (!f2fs_build_free_nids(sbi, true, false)) + goto retry; + return false; } /* -- GitLab From 4129edf3df144900227e008af3a8da51d52e76c4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 20 Sep 2018 17:41:30 +0800 Subject: [PATCH 1037/2269] f2fs: fix to recover inode's uid/gid during POR Step to reproduce this bug: 1. logon as root 2. mount -t f2fs /dev/sdd /mnt; 3. touch /mnt/file; 4. chown system /mnt/file; chgrp system /mnt/file; 5. xfs_io -f /mnt/file -c "fsync"; 6. godown /mnt; 7. umount /mnt; 8. mount -t f2fs /dev/sdd /mnt; After step 8) we will expect file's uid/gid are all system, but during recovery, these two fields were not been recovered, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 4e9bcdda38dff..a990a49a07239 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -209,6 +209,8 @@ static void recover_inode(struct inode *inode, struct page *page) char *name; inode->i_mode = le16_to_cpu(raw->i_mode); + i_uid_write(inode, le32_to_cpu(raw->i_uid)); + i_gid_write(inode, le32_to_cpu(raw->i_gid)); f2fs_i_size_write(inode, le64_to_cpu(raw->i_size)); inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime); inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime); -- GitLab From faef6340ed40afe11298183e4925a545aeb1acc0 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sat, 22 Sep 2018 22:43:09 +0800 Subject: [PATCH 1038/2269] f2fs: fix remount problem of option io_bits Currently we show mount option "io_bits=%u" as "io_size=%uKB", it will cause option parsing problem(unrecognized mount option) in remount. Signed-off-by: Chengguang Xu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 020e1f16ecc1d..83509f8e640d9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1330,7 +1330,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) from_kgid_munged(&init_user_ns, F2FS_OPTION(sbi).s_resgid)); if (F2FS_IO_SIZE_BITS(sbi)) - seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi)); + seq_printf(seq, ",io_bits=%u", + F2FS_OPTION(sbi).write_io_size_bits); #ifdef CONFIG_F2FS_FAULT_INJECTION if (test_opt(sbi, FAULT_INJECTION)) { seq_printf(seq, ",fault_injection=%u", -- GitLab From 765809eabe30c9c3b491c1ec2ec1caddfacb0116 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 17 Sep 2018 13:25:04 -0700 Subject: [PATCH 1039/2269] f2fs: report ENOENT correctly in f2fs_rename This fixes wrong error report in f2fs_rename. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index cc8c2ef448301..79ed2039647b8 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -824,7 +824,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, struct f2fs_dir_entry *old_entry; struct f2fs_dir_entry *new_entry; bool is_old_inline = f2fs_has_inline_dentry(old_dir); - int err = -ENOENT; + int err; if (unlikely(f2fs_cp_error(sbi))) return -EIO; @@ -848,6 +848,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; } + err = -ENOENT; old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) @@ -1013,7 +1014,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL; struct f2fs_dir_entry *old_entry, *new_entry; int old_nlink = 0, new_nlink = 0; - int err = -ENOENT; + int err; if (unlikely(f2fs_cp_error(sbi))) return -EIO; @@ -1034,6 +1035,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if (err) goto out; + err = -ENOENT; old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) -- GitLab From b02a8e785014c7bf5222bd9f18961d022701d148 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 19 Sep 2018 15:28:40 -0700 Subject: [PATCH 1040/2269] f2fs: update i_size after DIO completion This is related to ee70daaba82d ("xfs: update i_size after unwritten conversion in dio completion") If we update i_size during dio_write, dio_read can read out stale data, which breaks xfstests/465. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 15 +++++++-------- fs/f2fs/f2fs.h | 1 + 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6196c558219f7..425abcd9290e8 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -880,7 +880,6 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) struct f2fs_summary sum; struct node_info ni; block_t old_blkaddr; - pgoff_t fofs; blkcnt_t count = 1; int err; @@ -909,12 +908,10 @@ alloc: old_blkaddr, old_blkaddr); f2fs_set_data_blkaddr(dn); - /* update i_size */ - fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + - dn->ofs_in_node; - if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT)) - f2fs_i_size_write(dn->inode, - ((loff_t)(fofs + 1) << PAGE_SHIFT)); + /* + * i_size will be updated by direct_IO. Otherwise, we'll get stale + * data from unwritten block via dio_read. + */ return 0; } @@ -1080,6 +1077,8 @@ next_block: last_ofs_in_node = dn.ofs_in_node; } } else { + WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO && + flag != F2FS_GET_BLOCK_DIO); err = __allocate_data_block(&dn, map->m_seg_type); if (!err) @@ -1259,7 +1258,7 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { return __get_data_block(inode, iblock, bh_result, create, - F2FS_GET_BLOCK_DEFAULT, NULL, + F2FS_GET_BLOCK_DIO, NULL, f2fs_rw_hint_to_seg_type( inode->i_write_hint)); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 821d5acf7d165..45add593d55c2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -600,6 +600,7 @@ enum { F2FS_GET_BLOCK_DEFAULT, F2FS_GET_BLOCK_FIEMAP, F2FS_GET_BLOCK_BMAP, + F2FS_GET_BLOCK_DIO, F2FS_GET_BLOCK_PRE_DIO, F2FS_GET_BLOCK_PRE_AIO, F2FS_GET_BLOCK_PRECACHE, -- GitLab From e07d0f650dc8a9249341a7b6c429f7c88c112237 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Sep 2018 15:35:58 +0800 Subject: [PATCH 1041/2269] f2fs: fix to recover inode's project id during POR Testcase to reproduce this bug: 1. mkfs.f2fs -O extra_attr -O project_quota /dev/sdd 2. mount -t f2fs /dev/sdd /mnt/f2fs 3. touch /mnt/f2fs/file 4. sync 5. chattr -p 1 /mnt/f2fs/file 6. xfs_io -f /mnt/f2fs/file -c "fsync" 7. godown /mnt/f2fs 8. umount /mnt/f2fs 9. mount -t f2fs /dev/sdd /mnt/f2fs 10. lsattr -p /mnt/f2fs/file 0 -----------------N- /mnt/f2fs/file But actually, we expect the correct result is: 1 -----------------N- /mnt/f2fs/file The reason is we didn't recover inode.i_projid field during mount, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index a990a49a07239..7bb2549561106 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -211,6 +211,19 @@ static void recover_inode(struct inode *inode, struct page *page) inode->i_mode = le16_to_cpu(raw->i_mode); i_uid_write(inode, le32_to_cpu(raw->i_uid)); i_gid_write(inode, le32_to_cpu(raw->i_gid)); + + if (raw->i_inline & F2FS_EXTRA_ATTR) { + if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) && + F2FS_FITS_IN_INODE(raw, le16_to_cpu(raw->i_extra_isize), + i_projid)) { + projid_t i_projid; + + i_projid = (projid_t)le32_to_cpu(raw->i_projid); + F2FS_I(inode)->i_projid = + make_kprojid(&init_user_ns, i_projid); + } + } + f2fs_i_size_write(inode, le64_to_cpu(raw->i_size)); inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime); inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime); -- GitLab From 880bb02b244e60f03edcd24c7c9ade0bcc7e6219 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Sep 2018 15:35:59 +0800 Subject: [PATCH 1042/2269] f2fs: fix to recover inode's i_flags during POR Testcase to reproduce this bug: 1. mkfs.f2fs /dev/sdd 2. mount -t f2fs /dev/sdd /mnt/f2fs 3. touch /mnt/f2fs/file 4. sync 5. chattr +A /mnt/f2fs/file 6. xfs_io -f /mnt/f2fs/file -c "fsync" 7. godown /mnt/f2fs 8. umount /mnt/f2fs 9. mount -t f2fs /dev/sdd /mnt/f2fs 10. lsattr /mnt/f2fs/file -----------------N- /mnt/f2fs/file But actually, we expect the corrct result is: -------A---------N- /mnt/f2fs/file The reason is we didn't recover inode.i_flags field during mount, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 7bb2549561106..c0d110d266564 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -233,6 +233,7 @@ static void recover_inode(struct inode *inode, struct page *page) inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); F2FS_I(inode)->i_advise = raw->i_advise; + F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags); recover_inline_flags(inode, raw); -- GitLab From 446682469c34166fcfc90d6d730fcc7e381145aa Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Sep 2018 15:36:00 +0800 Subject: [PATCH 1043/2269] f2fs: fix to recover inode's i_gc_failures during POR inode.i_gc_failures is used to indicate that skip count of migrating on blocks of inode, we should guarantee it can be recovered in sudden power-off case. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index c0d110d266564..14c156a256ca3 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -234,6 +234,8 @@ static void recover_inode(struct inode *inode, struct page *page) F2FS_I(inode)->i_advise = raw->i_advise; F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags); + F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = + le16_to_cpu(raw->i_gc_failures); recover_inline_flags(inode, raw); -- GitLab From 473418a311598478b62a2504d6cad6104d52a561 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Sep 2018 15:36:01 +0800 Subject: [PATCH 1044/2269] f2fs: fix to recover inode's crtime during POR Testcase to reproduce this bug: 1. mkfs.f2fs -O extra_attr -O inode_crtime /dev/sdd 2. mount -t f2fs /dev/sdd /mnt/f2fs 3. touch /mnt/f2fs/file 4. xfs_io -f /mnt/f2fs/file -c "fsync" 5. godown /mnt/f2fs 6. umount /mnt/f2fs 7. mount -t f2fs /dev/sdd /mnt/f2fs 8. xfs_io -f /mnt/f2fs/file -c "statx -r" stat.btime.tv_sec = 0 stat.btime.tv_nsec = 0 This patch fixes to recover inode creation time fields during mount. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 645e287770c0d..245ea6c0fcbb4 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2558,6 +2558,13 @@ retry: F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), i_projid)) dst->i_projid = src->i_projid; + + if (f2fs_sb_has_inode_crtime(sbi->sb) && + F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), + i_crtime_nsec)) { + dst->i_crtime = src->i_crtime; + dst->i_crtime_nsec = src->i_crtime_nsec; + } } new_ni = old_ni; -- GitLab From 96cc73248e6e9e176553ce816b3ee2f75940db16 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Sep 2018 15:36:03 +0800 Subject: [PATCH 1045/2269] f2fs: mark inode dirty explicitly in recover_inode() Mark inode dirty explicitly in the end of recover_inode() to make sure that all recoverable fields can be persisted later. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 14c156a256ca3..6682c8d86a511 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -239,6 +239,8 @@ static void recover_inode(struct inode *inode, struct page *page) recover_inline_flags(inode, raw); + f2fs_mark_inode_dirty_sync(inode, true); + if (file_enc_name(inode)) name = ""; else -- GitLab From 9603f1525564a349514255a65cc7439a37088428 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 17 Sep 2018 17:36:06 -0700 Subject: [PATCH 1046/2269] f2fs: avoid f2fs_bug_on if f2fs_get_meta_page_nofail got EIO This patch avoids BUG_ON when f2fs_get_meta_page_nofail got EIO during xfstests/generic/475. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 10 +++++----- fs/f2fs/f2fs.h | 2 +- fs/f2fs/gc.c | 12 ++++++++++++ fs/f2fs/node.c | 32 ++++++++++++++++++++++++-------- fs/f2fs/recovery.c | 2 ++ fs/f2fs/segment.c | 3 +++ 6 files changed, 47 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 86db0004774af..49b6c9c44fa16 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -119,11 +119,8 @@ retry: if (PTR_ERR(page) == -EIO && ++count <= DEFAULT_RETRY_IO_COUNT) goto retry; - f2fs_stop_checkpoint(sbi, false); - f2fs_bug_on(sbi, 1); } - return page; } @@ -1496,7 +1493,10 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver); /* write cached NAT/SIT entries to NAT/SIT area */ - f2fs_flush_nat_entries(sbi, cpc); + err = f2fs_flush_nat_entries(sbi, cpc); + if (err) + goto stop; + f2fs_flush_sit_entries(sbi, cpc); /* unlock all the fs_lock[] in do_checkpoint() */ @@ -1505,7 +1505,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_release_discard_addrs(sbi); else f2fs_clear_prefree_segments(sbi, cpc); - +stop: unblock_operations(sbi); stat_inc_cp_count(sbi->stat_info); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 45add593d55c2..140aafaddcc91 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2912,7 +2912,7 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page); int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page); int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, unsigned int segno, struct f2fs_summary_block *sum); -void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); +int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); int f2fs_build_node_manager(struct f2fs_sb_info *sbi); void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi); int __init f2fs_create_node_manager_caches(void); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 7a41f97fae688..55ef8a8885452 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1066,6 +1066,18 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, /* reference all summary page */ while (segno < end_segno) { sum_page = f2fs_get_sum_page(sbi, segno++); + if (IS_ERR(sum_page)) { + int err = PTR_ERR(sum_page); + + end_segno = segno - 1; + for (segno = start_segno; segno < end_segno; segno++) { + sum_page = find_get_page(META_MAPPING(sbi), + GET_SUM_BLOCK(sbi, segno)); + f2fs_put_page(sum_page, 0); + f2fs_put_page(sum_page, 0); + } + return err; + } unlock_page(sum_page); } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 245ea6c0fcbb4..4d78da6b2c957 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -126,6 +126,8 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) /* get current nat block page with lock */ src_page = get_current_nat_page(sbi, nid); + if (IS_ERR(src_page)) + return src_page; dst_page = f2fs_grab_meta_page(sbi, dst_off); f2fs_bug_on(sbi, PageDirty(src_page)); @@ -2265,15 +2267,19 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, nm_i->nat_block_bitmap)) { struct page *page = get_current_nat_page(sbi, nid); - ret = scan_nat_page(sbi, page, nid); - f2fs_put_page(page, 1); + if (IS_ERR(page)) { + ret = PTR_ERR(page); + } else { + ret = scan_nat_page(sbi, page, nid); + f2fs_put_page(page, 1); + } if (ret) { up_read(&nm_i->nat_tree_lock); f2fs_bug_on(sbi, !mount); f2fs_msg(sbi->sb, KERN_ERR, "NAT is corrupt, run fsck to fix it"); - return -EINVAL; + return ret; } } @@ -2708,7 +2714,7 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, __clear_bit_le(nat_index, nm_i->full_nat_bits); } -static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, +static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, struct nat_entry_set *set, struct cp_control *cpc) { struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -2732,6 +2738,9 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, down_write(&curseg->journal_rwsem); } else { page = get_next_nat_page(sbi, start_nid); + if (IS_ERR(page)) + return PTR_ERR(page); + nat_blk = page_address(page); f2fs_bug_on(sbi, !nat_blk); } @@ -2777,12 +2786,13 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); kmem_cache_free(nat_entry_set_slab, set); } + return 0; } /* * This function is called during the checkpointing process. */ -void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) +int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -2792,6 +2802,7 @@ void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) unsigned int found; nid_t set_idx = 0; LIST_HEAD(sets); + int err = 0; /* during unmount, let's flush nat_bits before checking dirty_nat_cnt */ if (enabled_nat_bits(sbi, cpc)) { @@ -2801,7 +2812,7 @@ void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) } if (!nm_i->dirty_nat_cnt) - return; + return 0; down_write(&nm_i->nat_tree_lock); @@ -2824,11 +2835,16 @@ void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) } /* flush dirty nats in nat entry set */ - list_for_each_entry_safe(set, tmp, &sets, set_list) - __flush_nat_entry_set(sbi, set, cpc); + list_for_each_entry_safe(set, tmp, &sets, set_list) { + err = __flush_nat_entry_set(sbi, set, cpc); + if (err) + break; + } up_write(&nm_i->nat_tree_lock); /* Allow dirty nats by node block allocation in write_begin */ + + return err; } static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 6682c8d86a511..b069b7d7403db 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -375,6 +375,8 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, } sum_page = f2fs_get_sum_page(sbi, segno); + if (IS_ERR(sum_page)) + return PTR_ERR(sum_page); sum_node = (struct f2fs_summary_block *)page_address(sum_page); sum = sum_node->entries[blkoff]; f2fs_put_page(sum_page, 1); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 97a4fae75651e..d33e9d0d5d47a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2429,6 +2429,7 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type) __next_free_blkoff(sbi, curseg, 0); sum_page = f2fs_get_sum_page(sbi, new_segno); + f2fs_bug_on(sbi, IS_ERR(sum_page)); sum_node = (struct f2fs_summary_block *)page_address(sum_page); memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); f2fs_put_page(sum_page, 1); @@ -3903,6 +3904,8 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) se = &sit_i->sentries[start]; page = get_current_sit_page(sbi, start); + if (IS_ERR(page)) + return PTR_ERR(page); sit_blk = (struct f2fs_sit_block *)page_address(page); sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; f2fs_put_page(page, 1); -- GitLab From 81869861f2057317c31aa64e8bc4d4ee3b8f9b07 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 25 Sep 2018 15:25:21 -0700 Subject: [PATCH 1047/2269] f2fs: return correct errno in f2fs_gc This fixes overriding error number in f2fs_gc. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 55ef8a8885452..23841f62a0b0b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1253,7 +1253,7 @@ stop: put_gc_inode(&gc_list); - if (sync) + if (sync && !ret) ret = sec_freed ? 0 : -EAGAIN; return ret; } -- GitLab From aad0bea98f4a7d09a524c6fb4b79c28db6c8e089 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 27 Sep 2018 22:15:31 -0700 Subject: [PATCH 1048/2269] f2fs: fix missing up_read This patch fixes missing up_read call. Fixes: c9b60788fc76 ("f2fs: fix to do sanity check with block address in main area") Cc: # 4.19+ Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4d78da6b2c957..73ad6437ea770 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1541,8 +1541,10 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, } if (__is_valid_data_blkaddr(ni.blk_addr) && - !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) + !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) { + up_read(&sbi->node_write); goto redirty_out; + } if (atomic && !test_opt(sbi, NOBARRIER)) fio.op_flags |= REQ_PREFLUSH | REQ_FUA; -- GitLab From fab89490f36b3f65f66566aeb58858ce6a827c1b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 28 Sep 2018 00:24:39 -0700 Subject: [PATCH 1049/2269] f2fs: keep lazytime on remount This patch fixes losing lazytime when remounting f2fs. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 83509f8e640d9..fe840745461bd 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1550,6 +1550,7 @@ skip: (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); limit_reserve_root(sbi); + *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME); return 0; restore_gc: if (need_restart_gc) { -- GitLab From 1186481226ee93f9a0e824fedd255fa072fdadf1 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 29 Sep 2018 18:31:27 +0800 Subject: [PATCH 1050/2269] f2fs: add to account meta IO This patch supports to account meta IO, it enables to show write IO from f2fs more comprehensively via 'status' debugfs entry. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 13 +++++++++++++ fs/f2fs/f2fs.h | 15 +++++++++++++++ fs/f2fs/segment.c | 1 + 3 files changed, 29 insertions(+) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index d3c402183e3c4..da1cabbc4973f 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -118,6 +118,9 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->curzone[i] = GET_ZONE_FROM_SEC(sbi, si->cursec[i]); } + for (i = META_CP; i < META_MAX; i++) + si->meta_count[i] = atomic_read(&sbi->meta_count[i]); + for (i = 0; i < 2; i++) { si->segment_count[i] = sbi->segment_count[i]; si->block_count[i] = sbi->block_count[i]; @@ -329,6 +332,13 @@ static int stat_show(struct seq_file *s, void *v) si->prefree_count, si->free_segs, si->free_secs); seq_printf(s, "CP calls: %d (BG: %d)\n", si->cp_count, si->bg_cp_count); + seq_printf(s, " - cp blocks : %u\n", si->meta_count[META_CP]); + seq_printf(s, " - sit blocks : %u\n", + si->meta_count[META_SIT]); + seq_printf(s, " - nat blocks : %u\n", + si->meta_count[META_NAT]); + seq_printf(s, " - ssa blocks : %u\n", + si->meta_count[META_SSA]); seq_printf(s, "GC calls: %d (BG: %d)\n", si->call_count, si->bg_gc); seq_printf(s, " - data segments : %d (%d)\n", @@ -441,6 +451,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_stat_info *si; + int i; si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL); if (!si) @@ -466,6 +477,8 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) atomic_set(&sbi->inline_inode, 0); atomic_set(&sbi->inline_dir, 0); atomic_set(&sbi->inplace_count, 0); + for (i = META_CP; i < META_MAX; i++) + atomic_set(&sbi->meta_count[i], 0); atomic_set(&sbi->aw_cnt, 0); atomic_set(&sbi->vw_cnt, 0); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 140aafaddcc91..e1dbc4ad1479a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -202,6 +202,7 @@ enum { META_NAT, META_SIT, META_SSA, + META_MAX, META_POR, DATA_GENERIC, META_GENERIC, @@ -1261,6 +1262,7 @@ struct f2fs_sb_info { */ #ifdef CONFIG_F2FS_STAT_FS struct f2fs_stat_info *stat_info; /* FS status information */ + atomic_t meta_count[META_MAX]; /* # of meta blocks */ unsigned int segment_count[2]; /* # of allocated segments */ unsigned int block_count[2]; /* # of allocated blocks */ atomic_t inplace_count; /* # of inplace update */ @@ -3124,6 +3126,7 @@ struct f2fs_stat_info { int cursec[NR_CURSEG_TYPE]; int curzone[NR_CURSEG_TYPE]; + unsigned int meta_count[META_MAX]; unsigned int segment_count[2]; unsigned int block_count[2]; unsigned int inplace_count; @@ -3175,6 +3178,17 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) if (f2fs_has_inline_dentry(inode)) \ (atomic_dec(&F2FS_I_SB(inode)->inline_dir)); \ } while (0) +#define stat_inc_meta_count(sbi, blkaddr) \ + do { \ + if (blkaddr < SIT_I(sbi)->sit_base_addr) \ + atomic_inc(&(sbi)->meta_count[META_CP]); \ + else if (blkaddr < NM_I(sbi)->nat_blkaddr) \ + atomic_inc(&(sbi)->meta_count[META_SIT]); \ + else if (blkaddr < SM_I(sbi)->ssa_blkaddr) \ + atomic_inc(&(sbi)->meta_count[META_NAT]); \ + else if (blkaddr < SM_I(sbi)->main_blkaddr) \ + atomic_inc(&(sbi)->meta_count[META_SSA]); \ + } while (0) #define stat_inc_seg_type(sbi, curseg) \ ((sbi)->segment_count[(curseg)->alloc_type]++) #define stat_inc_block_count(sbi, curseg) \ @@ -3262,6 +3276,7 @@ void f2fs_destroy_root_stats(void); #define stat_inc_volatile_write(inode) do { } while (0) #define stat_dec_volatile_write(inode) do { } while (0) #define stat_update_max_volatile_write(inode) do { } while (0) +#define stat_inc_meta_count(sbi, blkaddr) do { } while (0) #define stat_inc_seg_type(sbi, curseg) do { } while (0) #define stat_inc_block_count(sbi, curseg) do { } while (0) #define stat_inc_inplace_blocks(sbi) do { } while (0) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d33e9d0d5d47a..3fa8870569ccb 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3018,6 +3018,7 @@ void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, ClearPageError(page); f2fs_submit_page_write(&fio); + stat_inc_meta_count(sbi, page->index); f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE); } -- GitLab From b2b90587d2abed04e426671a7c17224b50164dcc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 29 Sep 2018 18:31:28 +0800 Subject: [PATCH 1051/2269] f2fs: add to account skip count of background GC This patch adds to account skip count of background GC, and show stat info via 'status' debugfs entry. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 4 ++++ fs/f2fs/f2fs.h | 7 +++++++ fs/f2fs/gc.c | 14 +++++++++++--- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index da1cabbc4973f..75bc62edc4c1f 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -101,6 +101,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->avail_nids = NM_I(sbi)->available_nids; si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID]; si->bg_gc = sbi->bg_gc; + si->io_skip_bggc = sbi->io_skip_bggc; + si->other_skip_bggc = sbi->other_skip_bggc; si->skipped_atomic_files[BG_GC] = sbi->skipped_atomic_files[BG_GC]; si->skipped_atomic_files[FG_GC] = sbi->skipped_atomic_files[FG_GC]; si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) @@ -355,6 +357,8 @@ static int stat_show(struct seq_file *s, void *v) si->skipped_atomic_files[BG_GC] + si->skipped_atomic_files[FG_GC], si->skipped_atomic_files[BG_GC]); + seq_printf(s, "BG skip : IO: %u, Other: %u\n", + si->io_skip_bggc, si->other_skip_bggc); seq_puts(s, "\nExtent Cache:\n"); seq_printf(s, " - Hit Count: L1-1:%llu L1-2:%llu L2:%llu\n", si->hit_largest, si->hit_cached, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e1dbc4ad1479a..4825b3028dfe3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1278,6 +1278,8 @@ struct f2fs_sb_info { atomic_t max_aw_cnt; /* max # of atomic writes */ atomic_t max_vw_cnt; /* max # of volatile writes */ int bg_gc; /* background gc calls */ + unsigned int io_skip_bggc; /* skip background gc for in-flight IO */ + unsigned int other_skip_bggc; /* skip background gc for other reasons */ unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */ #endif spinlock_t stat_lock; /* lock for stat operations */ @@ -3105,6 +3107,7 @@ struct f2fs_stat_info { int free_nids, avail_nids, alloc_nids; int total_count, utilization; int bg_gc, nr_wb_cp_data, nr_wb_data; + unsigned int io_skip_bggc, other_skip_bggc; int nr_flushing, nr_flushed, flush_list_empty; int nr_discarding, nr_discarded; int nr_discard_cmd; @@ -3142,6 +3145,8 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_inc_bg_cp_count(si) ((si)->bg_cp_count++) #define stat_inc_call_count(si) ((si)->call_count++) #define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) +#define stat_io_skip_bggc_count(sbi) ((sbi)->io_skip_bggc++) +#define stat_other_skip_bggc_count(sbi) ((sbi)->other_skip_bggc++) #define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++) #define stat_dec_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]--) #define stat_inc_total_hit(sbi) (atomic64_inc(&(sbi)->total_hit_ext)) @@ -3258,6 +3263,8 @@ void f2fs_destroy_root_stats(void); #define stat_inc_bg_cp_count(si) do { } while (0) #define stat_inc_call_count(si) do { } while (0) #define stat_inc_bggc_count(si) do { } while (0) +#define stat_io_skip_bggc_count(sbi) do { } while (0) +#define stat_other_skip_bggc_count(sbi) do { } while (0) #define stat_inc_dirty_inode(sbi, type) do { } while (0) #define stat_dec_dirty_inode(sbi, type) do { } while (0) #define stat_inc_total_hit(sb) do { } while (0) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 23841f62a0b0b..4ce7ba181e0fc 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -40,13 +40,16 @@ static int gc_thread_func(void *data) if (gc_th->gc_wake) gc_th->gc_wake = 0; - if (try_to_freeze()) + if (try_to_freeze()) { + stat_other_skip_bggc_count(sbi); continue; + } if (kthread_should_stop()) break; if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) { increase_sleep_time(gc_th, &wait_ms); + stat_other_skip_bggc_count(sbi); continue; } @@ -55,8 +58,10 @@ static int gc_thread_func(void *data) f2fs_stop_checkpoint(sbi, false); } - if (!sb_start_write_trylock(sbi->sb)) + if (!sb_start_write_trylock(sbi->sb)) { + stat_other_skip_bggc_count(sbi); continue; + } /* * [GC triggering condition] @@ -77,12 +82,15 @@ static int gc_thread_func(void *data) goto do_gc; } - if (!mutex_trylock(&sbi->gc_mutex)) + if (!mutex_trylock(&sbi->gc_mutex)) { + stat_other_skip_bggc_count(sbi); goto next; + } if (!is_idle(sbi, GC_TIME)) { increase_sleep_time(gc_th, &wait_ms); mutex_unlock(&sbi->gc_mutex); + stat_io_skip_bggc_count(sbi); goto next; } -- GitLab From 58107a9c62f640e520610ef955826a58aa6c9edf Mon Sep 17 00:00:00 2001 From: Junling Zheng Date: Fri, 28 Sep 2018 20:25:56 +0800 Subject: [PATCH 1052/2269] f2fs: support superblock checksum Now we support crc32 checksum for superblock. Reviewed-by: Chao Yu Signed-off-by: Junling Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/super.c | 28 ++++++++++++++++++++++++++++ fs/f2fs/sysfs.c | 7 +++++++ include/linux/f2fs_fs.h | 3 ++- 4 files changed, 39 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4825b3028dfe3..b90e71a5f21a0 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -149,6 +149,7 @@ struct f2fs_mount_info { #define F2FS_FEATURE_INODE_CRTIME 0x0100 #define F2FS_FEATURE_LOST_FOUND 0x0200 #define F2FS_FEATURE_VERITY 0x0400 /* reserved */ +#define F2FS_FEATURE_SB_CHKSUM 0x0800 #define F2FS_HAS_FEATURE(sb, mask) \ ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) @@ -3432,6 +3433,7 @@ F2FS_FEATURE_FUNCS(flexible_inline_xattr, FLEXIBLE_INLINE_XATTR); F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO); F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME); F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND); +F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM); #ifdef CONFIG_BLK_DEV_ZONED static inline int get_blkz_type(struct f2fs_sb_info *sbi, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index fe840745461bd..6919f91c7a14a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2178,6 +2178,26 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, (bh->b_data + F2FS_SUPER_OFFSET); struct super_block *sb = sbi->sb; unsigned int blocksize; + size_t crc_offset = 0; + __u32 crc = 0; + + /* Check checksum_offset and crc in superblock */ + if (le32_to_cpu(raw_super->feature) & F2FS_FEATURE_SB_CHKSUM) { + crc_offset = le32_to_cpu(raw_super->checksum_offset); + if (crc_offset != + offsetof(struct f2fs_super_block, crc)) { + f2fs_msg(sb, KERN_INFO, + "Invalid SB checksum offset: %zu", + crc_offset); + return 1; + } + crc = le32_to_cpu(raw_super->crc); + if (!f2fs_crc_valid(sbi, crc, raw_super, crc_offset)) { + f2fs_msg(sb, KERN_INFO, + "Invalid SB checksum value: %u", crc); + return 1; + } + } if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) { f2fs_msg(sb, KERN_INFO, @@ -2635,6 +2655,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) { struct buffer_head *bh; + __u32 crc = 0; int err; if ((recover && f2fs_readonly(sbi->sb)) || @@ -2643,6 +2664,13 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) return -EROFS; } + /* we should update superblock crc here */ + if (!recover && f2fs_sb_has_sb_chksum(sbi->sb)) { + crc = f2fs_crc32(sbi, F2FS_RAW_SUPER(sbi), + offsetof(struct f2fs_super_block, crc)); + F2FS_RAW_SUPER(sbi)->crc = cpu_to_le32(crc); + } + /* write back-up superblock first */ bh = sb_bread(sbi->sb, sbi->valid_super_block ? 0 : 1); if (!bh) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 5a252d78d1787..db89741b219b1 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -117,6 +117,9 @@ static ssize_t features_show(struct f2fs_attr *a, if (f2fs_sb_has_lost_found(sb)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "lost_found"); + if (f2fs_sb_has_sb_chksum(sb)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "sb_checksum"); len += snprintf(buf + len, PAGE_SIZE - len, "\n"); return len; } @@ -334,6 +337,7 @@ enum feat_id { FEAT_QUOTA_INO, FEAT_INODE_CRTIME, FEAT_LOST_FOUND, + FEAT_SB_CHECKSUM, }; static ssize_t f2fs_feature_show(struct f2fs_attr *a, @@ -350,6 +354,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a, case FEAT_QUOTA_INO: case FEAT_INODE_CRTIME: case FEAT_LOST_FOUND: + case FEAT_SB_CHECKSUM: return snprintf(buf, PAGE_SIZE, "supported\n"); } return 0; @@ -434,6 +439,7 @@ F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR); F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO); F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME); F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND); +F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -493,6 +499,7 @@ static struct attribute *f2fs_feat_attrs[] = { ATTR_LIST(quota_ino), ATTR_LIST(inode_crtime), ATTR_LIST(lost_found), + ATTR_LIST(sb_checksum), NULL, }; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 1d4b196291d69..1db13ff9a3f48 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -109,7 +109,8 @@ struct f2fs_super_block { struct f2fs_device devs[MAX_DEVICES]; /* device list */ __le32 qf_ino[F2FS_MAX_QUOTAS]; /* quota inode numbers */ __u8 hot_ext_count; /* # of hot file extension */ - __u8 reserved[314]; /* valid reserved region */ + __u8 reserved[310]; /* valid reserved region */ + __le32 crc; /* checksum of superblock */ } __packed; /* -- GitLab From 08edfe4a51b530f2bc88f8ea59dd0248622d6b04 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 27 Sep 2018 23:41:16 +0800 Subject: [PATCH 1053/2269] Revert: "f2fs: check last page index in cached bio to decide submission" There is one case that we can leave bio in f2fs, result in hanging page writeback waiter. Thread A Thread B - f2fs_write_cache_pages - f2fs_submit_page_write page #0 cached in bio #0 of cold log - f2fs_submit_page_write page #1 cached in bio #1 of warm log - f2fs_write_cache_pages - f2fs_submit_page_write bio is full, submit bio #1 contain page #1 - f2fs_submit_merged_write_cond(, page #1) fail to submit bio #0 due to page #1 is not in any cached bios. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 3 +-- fs/f2fs/data.c | 38 +++++++++++++++++++------------------- fs/f2fs/f2fs.h | 4 ++-- fs/f2fs/node.c | 11 +++++------ fs/f2fs/segment.c | 11 +++++------ 5 files changed, 32 insertions(+), 35 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 49b6c9c44fa16..72004e2965ba2 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -276,8 +276,7 @@ static int __f2fs_write_meta_page(struct page *page, dec_page_count(sbi, F2FS_DIRTY_META); if (wbc->for_reclaim) - f2fs_submit_merged_write_cond(sbi, page->mapping->host, - 0, page->index, META); + f2fs_submit_merged_write_cond(sbi, NULL, page, 0, META); unlock_page(page); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 425abcd9290e8..7bb15b360a78f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -321,8 +321,8 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) io->bio = NULL; } -static bool __has_merged_page(struct f2fs_bio_info *io, - struct inode *inode, nid_t ino, pgoff_t idx) +static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode, + struct page *page, nid_t ino) { struct bio_vec *bvec; struct page *target; @@ -331,7 +331,7 @@ static bool __has_merged_page(struct f2fs_bio_info *io, if (!io->bio) return false; - if (!inode && !ino) + if (!inode && !page && !ino) return true; bio_for_each_segment_all(bvec, io->bio, i) { @@ -341,11 +341,10 @@ static bool __has_merged_page(struct f2fs_bio_info *io, else target = fscrypt_control_page(bvec->bv_page); - if (idx != target->index) - continue; - if (inode && inode == target->mapping->host) return true; + if (page && page == target) + return true; if (ino && ino == ino_of_node(target)) return true; } @@ -354,7 +353,8 @@ static bool __has_merged_page(struct f2fs_bio_info *io, } static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode, - nid_t ino, pgoff_t idx, enum page_type type) + struct page *page, nid_t ino, + enum page_type type) { enum page_type btype = PAGE_TYPE_OF_BIO(type); enum temp_type temp; @@ -365,7 +365,7 @@ static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode, io = sbi->write_io[btype] + temp; down_read(&io->io_rwsem); - ret = __has_merged_page(io, inode, ino, idx); + ret = __has_merged_page(io, inode, page, ino); up_read(&io->io_rwsem); /* TODO: use HOT temp only for meta pages now. */ @@ -396,12 +396,12 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, } static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, nid_t ino, pgoff_t idx, - enum page_type type, bool force) + struct inode *inode, struct page *page, + nid_t ino, enum page_type type, bool force) { enum temp_type temp; - if (!force && !has_merged_page(sbi, inode, ino, idx, type)) + if (!force && !has_merged_page(sbi, inode, page, ino, type)) return; for (temp = HOT; temp < NR_TEMP_TYPE; temp++) { @@ -420,10 +420,10 @@ void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type) } void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, nid_t ino, pgoff_t idx, - enum page_type type) + struct inode *inode, struct page *page, + nid_t ino, enum page_type type) { - __submit_merged_write_cond(sbi, inode, ino, idx, type, false); + __submit_merged_write_cond(sbi, inode, page, ino, type, false); } void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi) @@ -1951,7 +1951,7 @@ out: ClearPageUptodate(page); if (wbc->for_reclaim) { - f2fs_submit_merged_write_cond(sbi, inode, 0, page->index, DATA); + f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA); clear_inode_flag(inode, FI_HOT_DATA); f2fs_remove_dirty_inode(inode); submitted = NULL; @@ -2009,10 +2009,10 @@ static int f2fs_write_cache_pages(struct address_space *mapping, pgoff_t index; pgoff_t end; /* Inclusive */ pgoff_t done_index; - pgoff_t last_idx = ULONG_MAX; int cycled; int range_whole = 0; int tag; + int nwritten = 0; pagevec_init(&pvec, 0); @@ -2115,7 +2115,7 @@ continue_unlock: done = 1; break; } else if (submitted) { - last_idx = page->index; + nwritten++; } if (--wbc->nr_to_write <= 0 && @@ -2137,9 +2137,9 @@ continue_unlock: if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = done_index; - if (last_idx != ULONG_MAX) + if (nwritten) f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host, - 0, last_idx, DATA); + NULL, 0, DATA); return ret; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b90e71a5f21a0..08fcda83298b0 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3033,8 +3033,8 @@ int f2fs_init_post_read_processing(void); void f2fs_destroy_post_read_processing(void); void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type); void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, nid_t ino, pgoff_t idx, - enum page_type type); + struct inode *inode, struct page *page, + nid_t ino, enum page_type type); void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi); int f2fs_submit_page_bio(struct f2fs_io_info *fio); void f2fs_submit_page_write(struct f2fs_io_info *fio); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 73ad6437ea770..4ad214c270503 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1565,8 +1565,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, up_read(&sbi->node_write); if (wbc->for_reclaim) { - f2fs_submit_merged_write_cond(sbi, page->mapping->host, 0, - page->index, NODE); + f2fs_submit_merged_write_cond(sbi, NULL, page, 0, NODE); submitted = NULL; } @@ -1631,13 +1630,13 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, unsigned int *seq_id) { pgoff_t index; - pgoff_t last_idx = ULONG_MAX; struct pagevec pvec; int ret = 0; struct page *last_page = NULL; bool marked = false; nid_t ino = inode->i_ino; int nr_pages; + int nwritten = 0; if (atomic) { last_page = last_fsync_dnode(sbi, ino); @@ -1715,7 +1714,7 @@ continue_unlock: f2fs_put_page(last_page, 0); break; } else if (submitted) { - last_idx = page->index; + nwritten++; } if (page == last_page) { @@ -1741,8 +1740,8 @@ continue_unlock: goto retry; } out: - if (last_idx != ULONG_MAX) - f2fs_submit_merged_write_cond(sbi, NULL, ino, last_idx, NODE); + if (nwritten) + f2fs_submit_merged_write_cond(sbi, NULL, NULL, ino, NODE); return ret ? -EIO: 0; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3fa8870569ccb..e16dae0f0a5b3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -371,7 +371,7 @@ static int __f2fs_commit_inmem_pages(struct inode *inode) .io_type = FS_DATA_IO, }; struct list_head revoke_list; - pgoff_t last_idx = ULONG_MAX; + bool submit_bio = false; int err = 0; INIT_LIST_HEAD(&revoke_list); @@ -406,14 +406,14 @@ retry: } /* record old blkaddr for revoking */ cur->old_addr = fio.old_blkaddr; - last_idx = page->index; + submit_bio = true; } unlock_page(page); list_move_tail(&cur->list, &revoke_list); } - if (last_idx != ULONG_MAX) - f2fs_submit_merged_write_cond(sbi, inode, 0, last_idx, DATA); + if (submit_bio) + f2fs_submit_merged_write_cond(sbi, inode, NULL, 0, DATA); if (err) { /* @@ -3181,8 +3181,7 @@ void f2fs_wait_on_page_writeback(struct page *page, if (PageWriteback(page)) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); - f2fs_submit_merged_write_cond(sbi, page->mapping->host, - 0, page->index, type); + f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type); if (ordered) wait_on_page_writeback(page); else -- GitLab From e2c2e182ee35da9e3303db681813b26f137a03ce Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 27 Sep 2018 18:33:18 +0800 Subject: [PATCH 1054/2269] f2fs: refactor ->page_mkwrite() flow Thread A Thread B - f2fs_vm_page_mkwrite - f2fs_setattr - down_write(i_mmap_sem) - truncate_setsize - f2fs_truncate - up_write(i_mmap_sem) - f2fs_reserve_block reserve NEW_ADDR - skip dirty page due to truncation 1. we don't need to rserve new block address for a truncated page. 2. dn.data_blkaddr is used out of node page lock coverage. Refactor ->page_mkwrite() flow to fix above issues: - use __do_map_lock() to avoid racing checkpoint() - lock data page in prior to dnode page - cover f2fs_reserve_block with i_mmap_sem lock - wait page writeback before zeroing page Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 45 ++++++++++++++++++++++----------------------- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7bb15b360a78f..00b18ec0bbbdd 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -971,7 +971,7 @@ map_blocks: return err; } -static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) { if (flag == F2FS_GET_BLOCK_PRE_AIO) { if (lock) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 08fcda83298b0..8860bb05135b5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3056,6 +3056,7 @@ struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index, struct page *f2fs_get_new_data_page(struct inode *inode, struct page *ipage, pgoff_t index, bool new_i_size); int f2fs_do_write_data_page(struct f2fs_io_info *fio); +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock); int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int create, int flag); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ff0655773de03..2f5d6690113d8 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -47,7 +47,7 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) struct page *page = vmf->page; struct inode *inode = file_inode(vmf->vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct dnode_of_data dn; + struct dnode_of_data dn = { .node_changed = false }; int err; if (unlikely(f2fs_cp_error(sbi))) { @@ -59,19 +59,6 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); - /* block allocation */ - f2fs_lock_op(sbi); - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_reserve_block(&dn, page->index); - if (err) { - f2fs_unlock_op(sbi); - goto out; - } - f2fs_put_dnode(&dn); - f2fs_unlock_op(sbi); - - f2fs_balance_fs(sbi, dn.node_changed); - file_update_time(vmf->vma->vm_file); down_read(&F2FS_I(inode)->i_mmap_sem); lock_page(page); @@ -83,11 +70,28 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) goto out_sem; } + /* block allocation */ + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = f2fs_get_block(&dn, page->index); + f2fs_put_dnode(&dn); + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); + if (err) { + unlock_page(page); + goto out_sem; + } + + /* fill the page */ + f2fs_wait_on_page_writeback(page, DATA, false); + + /* wait for GCed page writeback via META_MAPPING */ + f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); + /* * check to see if the page is mapped already (no holes) */ if (PageMappedToDisk(page)) - goto mapped; + goto out_sem; /* page is wholly or partially inside EOF */ if (((loff_t)(page->index + 1) << PAGE_SHIFT) > @@ -104,16 +108,11 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) f2fs_update_iostat(sbi, APP_MAPPED_IO, F2FS_BLKSIZE); trace_f2fs_vm_page_mkwrite(page, DATA); -mapped: - /* fill the page */ - f2fs_wait_on_page_writeback(page, DATA, false); - - /* wait for GCed page writeback via META_MAPPING */ - f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); - out_sem: up_read(&F2FS_I(inode)->i_mmap_sem); -out: + + f2fs_balance_fs(sbi, dn.node_changed); + sb_end_pagefault(inode->i_sb); f2fs_update_time(sbi, REQ_TIME); err: -- GitLab From 4d28ef1c541531fe590c487a395377cc55589d2c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 27 Sep 2018 18:34:52 +0800 Subject: [PATCH 1055/2269] f2fs: allow out-place-update for direct IO in LFS mode Normally, DIO uses in-pllace-update, but in LFS mode, f2fs doesn't allow triggering any in-place-update writes, so we fallback direct write to buffered write, result in bad performance of large size write. This patch adds to support triggering out-place-update for direct IO to enhance its performance. Note that it needs to exclude direct read IO during direct write, since new data writing to new block address will no be valid until write finished. storage: zram time xfs_io -f -d /mnt/f2fs/file -c "pwrite 0 1073741824" -c "fsync" Before: real 0m13.061s user 0m0.327s sys 0m12.486s After: real 0m6.448s user 0m0.228s sys 0m6.212s Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 44 +++++++++++++++++++++++++++++++++++--------- fs/f2fs/f2fs.h | 45 +++++++++++++++++++++++++++++++++++++++++---- fs/f2fs/file.c | 3 ++- 3 files changed, 78 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 00b18ec0bbbdd..b692945a4be6e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -892,7 +892,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) dn->data_blkaddr = datablock_addr(dn->inode, dn->node_page, dn->ofs_in_node); - if (dn->data_blkaddr == NEW_ADDR) + if (dn->data_blkaddr != NULL_ADDR) goto alloc; if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) @@ -946,7 +946,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) if (direct_io) { map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint); - flag = f2fs_force_buffered_io(inode, WRITE) ? + flag = f2fs_force_buffered_io(inode, iocb, from) ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO; goto map_blocks; @@ -1065,7 +1065,15 @@ next_block: goto sync_out; } - if (!is_valid_data_blkaddr(sbi, blkaddr)) { + if (is_valid_data_blkaddr(sbi, blkaddr)) { + /* use out-place-update for driect IO under LFS mode */ + if (test_opt(sbi, LFS) && create && + flag == F2FS_GET_BLOCK_DIO) { + err = __allocate_data_block(&dn, map->m_seg_type); + if (!err) + set_inode_flag(inode, FI_APPEND_WRITE); + } + } else { if (create) { if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; @@ -2485,36 +2493,53 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); size_t count = iov_iter_count(iter); loff_t offset = iocb->ki_pos; int rw = iov_iter_rw(iter); int err; enum rw_hint hint = iocb->ki_hint; int whint_mode = F2FS_OPTION(sbi).whint_mode; + bool do_opu; err = check_direct_IO(inode, iter, offset); if (err) return err < 0 ? err : 0; - if (f2fs_force_buffered_io(inode, rw)) + if (f2fs_force_buffered_io(inode, iocb, iter)) return 0; + do_opu = allow_outplace_dio(inode, iocb, iter); + trace_f2fs_direct_IO_enter(inode, offset, count, rw); if (rw == WRITE && whint_mode == WHINT_MODE_OFF) iocb->ki_hint = WRITE_LIFE_NOT_SET; - if (!down_read_trylock(&F2FS_I(inode)->i_gc_rwsem[rw])) { - if (iocb->ki_flags & IOCB_NOWAIT) { + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!down_read_trylock(&fi->i_gc_rwsem[rw])) { + iocb->ki_hint = hint; + err = -EAGAIN; + goto out; + } + if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) { + up_read(&fi->i_gc_rwsem[rw]); iocb->ki_hint = hint; err = -EAGAIN; goto out; } - down_read(&F2FS_I(inode)->i_gc_rwsem[rw]); + } else { + down_read(&fi->i_gc_rwsem[rw]); + if (do_opu) + down_read(&fi->i_gc_rwsem[READ]); } err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio); - up_read(&F2FS_I(inode)->i_gc_rwsem[rw]); + + if (do_opu) + up_read(&fi->i_gc_rwsem[READ]); + + up_read(&fi->i_gc_rwsem[rw]); if (rw == WRITE) { if (whint_mode == WHINT_MODE_OFF) @@ -2522,7 +2547,8 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (err > 0) { f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO, err); - set_inode_flag(inode, FI_UPDATE_WRITE); + if (!do_opu) + set_inode_flag(inode, FI_UPDATE_WRITE); } else if (err < 0) { f2fs_write_failed(mapping, offset + count); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8860bb05135b5..8723ceaa6953c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -8,6 +8,7 @@ #ifndef _LINUX_F2FS_H #define _LINUX_F2FS_H +#include #include #include #include @@ -3492,11 +3493,47 @@ static inline bool f2fs_may_encrypt(struct inode *inode) #endif } -static inline bool f2fs_force_buffered_io(struct inode *inode, int rw) +static inline int block_unaligned_IO(struct inode *inode, + struct kiocb *iocb, struct iov_iter *iter) { - return (f2fs_post_read_required(inode) || - (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) || - F2FS_I_SB(inode)->s_ndevs); + unsigned int i_blkbits = READ_ONCE(inode->i_blkbits); + unsigned int blocksize_mask = (1 << i_blkbits) - 1; + loff_t offset = iocb->ki_pos; + unsigned long align = offset | iov_iter_alignment(iter); + + return align & blocksize_mask; +} + +static inline int allow_outplace_dio(struct inode *inode, + struct kiocb *iocb, struct iov_iter *iter) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + int rw = iov_iter_rw(iter); + + return (test_opt(sbi, LFS) && (rw == WRITE) && + !block_unaligned_IO(inode, iocb, iter)); +} + +static inline bool f2fs_force_buffered_io(struct inode *inode, + struct kiocb *iocb, struct iov_iter *iter) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + int rw = iov_iter_rw(iter); + + if (f2fs_post_read_required(inode)) + return true; + if (sbi->s_ndevs) + return true; + /* + * for blkzoned device, fallback direct IO to buffered IO, so + * all IOs can be serialized by log-structured write. + */ + if (f2fs_sb_has_blkzoned(sbi->sb)) + return true; + if (test_opt(sbi, LFS) && (rw == WRITE) && + block_unaligned_IO(inode, iocb, iter)) + return true; + return false; } #ifdef CONFIG_F2FS_FAULT_INJECTION diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 2f5d6690113d8..d6ba94db28087 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3001,7 +3001,8 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (!f2fs_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from)) || f2fs_has_inline_data(inode) || - f2fs_force_buffered_io(inode, WRITE)) { + f2fs_force_buffered_io(inode, + iocb, from)) { clear_inode_flag(inode, FI_NO_PREALLOC); inode_unlock(inode); -- GitLab From 919eb288c4e35eb133b6da762556d0ab1255e8fb Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 25 Sep 2018 13:54:33 -0700 Subject: [PATCH 1056/2269] f2fs: clear PageError on the read path When running fault injection test, I hit somewhat wrong behavior in f2fs_gc -> gc_data_segment(): 0. fault injection generated some PageError'ed pages 1. gc_data_segment -> f2fs_get_read_data_page(REQ_RAHEAD) 2. move_data_page -> f2fs_get_lock_data_page() -> f2f_get_read_data_page() -> f2fs_submit_page_read() -> submit_bio(READ) -> return EIO due to PageError -> fail to move data Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b692945a4be6e..57a9f44ea78af 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -77,7 +77,8 @@ static void __read_end_io(struct bio *bio) /* PG_error was set if any post_read step failed */ if (bio->bi_status || PageError(page)) { ClearPageUptodate(page); - SetPageError(page); + /* will re-read again later */ + ClearPageError(page); } else { SetPageUptodate(page); } @@ -590,6 +591,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, bio_put(bio); return -EFAULT; } + ClearPageError(page); __submit_bio(F2FS_I_SB(inode), bio, DATA); return 0; } @@ -1578,6 +1580,7 @@ submit_and_realloc: if (bio_add_page(bio, page, blocksize, 0) < blocksize) goto submit_and_realloc; + ClearPageError(page); last_block_in_bio = block_nr; goto next_page; set_error_page: -- GitLab From 7ea0d208d6597df11f7ad965ffd2632c18aa461e Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 20 Aug 2018 19:21:43 -0700 Subject: [PATCH 1057/2269] f2fs: checkpoint disabling Note that, it requires "f2fs: return correct errno in f2fs_gc". This adds a lightweight non-persistent snapshotting scheme to f2fs. To use, mount with the option checkpoint=disable, and to return to normal operation, remount with checkpoint=enable. If the filesystem is shut down before remounting with checkpoint=enable, it will revert back to its apparent state when it was first mounted with checkpoint=disable. This is useful for situations where you wish to be able to roll back the state of the disk in case of some critical failure. Signed-off-by: Daniel Rosenberg [Jaegeuk Kim: use SB_RDONLY instead of MS_RDONLY] Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 5 ++ fs/f2fs/checkpoint.c | 12 +++ fs/f2fs/data.c | 14 +++- fs/f2fs/debug.c | 3 +- fs/f2fs/f2fs.h | 18 ++++- fs/f2fs/file.c | 12 ++- fs/f2fs/gc.c | 9 ++- fs/f2fs/inode.c | 6 +- fs/f2fs/namei.c | 19 +++++ fs/f2fs/segment.c | 98 +++++++++++++++++++++- fs/f2fs/segment.h | 15 ++++ fs/f2fs/super.c | 126 ++++++++++++++++++++++++++++- include/linux/f2fs_fs.h | 1 + 13 files changed, 324 insertions(+), 14 deletions(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 432351f7ace11..fedbea893ee73 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -212,6 +212,11 @@ fsync_mode=%s Control the policy of fsync. Currently supports "posix", non-atomic files likewise "nobarrier" mount option. test_dummy_encryption Enable dummy encryption, which provides a fake fscrypt context. The fake fscrypt context is used by xfstests. +checkpoint=%s Set to "disable" to turn off checkpointing. Set to "enable" + to reenable checkpointing. Is enabled by default. While + disabled, any unmounting or unexpected shutdowns will cause + the filesystem contents to appear as they did when the + filesystem was mounted with that option. ================================================================================ DEBUGFS ENTRIES diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 72004e2965ba2..e4c4bef78e13a 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1210,6 +1210,11 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) __set_ckpt_flags(ckpt, CP_FSCK_FLAG); + if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) + __set_ckpt_flags(ckpt, CP_DISABLED_FLAG); + else + __clear_ckpt_flags(ckpt, CP_DISABLED_FLAG); + /* set this flag to activate crc|cp_ver for recovery */ __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG); __clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG); @@ -1417,6 +1422,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) clear_sbi_flag(sbi, SBI_IS_DIRTY); clear_sbi_flag(sbi, SBI_NEED_CP); + sbi->unusable_block_count = 0; __set_cp_next_pack(sbi); /* @@ -1441,6 +1447,12 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) unsigned long long ckpt_ver; int err = 0; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + if (cpc->reason != CP_PAUSE) + return 0; + f2fs_msg(sbi->sb, KERN_WARNING, + "Start checkpoint disabled!"); + } mutex_lock(&sbi->cp_mutex); if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 57a9f44ea78af..606563867e221 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -536,7 +536,8 @@ skip: if (fio->in_list) goto next; out: - if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) + if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || + f2fs_is_checkpoint_ready(sbi)) __submit_merged_bio(io); up_write(&io->io_rwsem); } @@ -1702,6 +1703,10 @@ static inline bool check_inplace_update_policy(struct inode *inode, is_inode_flag_set(inode, FI_NEED_IPU)) return true; + if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) && + !f2fs_is_checkpointed_data(sbi, fio->old_blkaddr))) + return true; + return false; } @@ -1732,6 +1737,9 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) return true; if (IS_ATOMIC_WRITTEN_PAGE(fio->page)) return true; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) && + f2fs_is_checkpointed_data(sbi, fio->old_blkaddr))) + return true; } return false; } @@ -2352,6 +2360,10 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, trace_f2fs_write_begin(inode, pos, len, flags); + err = f2fs_is_checkpoint_ready(sbi); + if (err) + goto fail; + if ((f2fs_is_atomic_file(inode) && !f2fs_available_free_memory(sbi, INMEM_PAGES)) || is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) { diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 75bc62edc4c1f..026e10f308898 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -272,7 +272,8 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, "\n=====[ partition info(%pg). #%d, %s, CP: %s]=====\n", si->sbi->sb->s_bdev, i++, f2fs_readonly(si->sbi->sb) ? "RO": "RW", - f2fs_cp_error(si->sbi) ? "Error": "Good"); + is_set_ckpt_flags(si->sbi, CP_DISABLED_FLAG) ? + "Disabled": (f2fs_cp_error(si->sbi) ? "Error": "Good")); seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ", si->sit_area_segs, si->nat_area_segs); seq_printf(s, "[SSA: %d] [MAIN: %d", diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8723ceaa6953c..9f56580fd459d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -100,6 +100,7 @@ extern char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_QUOTA 0x00400000 #define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000 #define F2FS_MOUNT_RESERVE_ROOT 0x01000000 +#define F2FS_MOUNT_DISABLE_CHECKPOINT 0x02000000 #define F2FS_OPTION(sbi) ((sbi)->mount_opt) #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) @@ -179,6 +180,7 @@ enum { #define CP_RECOVERY 0x00000008 #define CP_DISCARD 0x00000010 #define CP_TRIMMED 0x00000020 +#define CP_PAUSE 0x00000040 #define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) #define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */ @@ -188,6 +190,7 @@ enum { #define DEF_DISCARD_URGENT_UTIL 80 /* do more discard over 80% */ #define DEF_CP_INTERVAL 60 /* 60 secs */ #define DEF_IDLE_INTERVAL 5 /* 5 secs */ +#define DEF_DISABLE_INTERVAL 5 /* 5 secs */ struct cp_control { int reason; @@ -1093,6 +1096,7 @@ enum { SBI_NEED_CP, /* need to checkpoint */ SBI_IS_SHUTDOWN, /* shutdown by ioctl */ SBI_IS_RECOVERED, /* recovered orphan/data */ + SBI_CP_DISABLED, /* CP was disabled last mount */ }; enum { @@ -1100,6 +1104,7 @@ enum { REQ_TIME, DISCARD_TIME, GC_TIME, + DISABLE_TIME, MAX_TIME, }; @@ -1226,6 +1231,9 @@ struct f2fs_sb_info { block_t reserved_blocks; /* configurable reserved blocks */ block_t current_reserved_blocks; /* current reserved blocks */ + /* Additional tracking for no checkpoint mode */ + block_t unusable_block_count; /* # of blocks saved by last cp */ + unsigned int nquota_files; /* # of quota sysfile */ u32 s_next_generation; /* for NFS support */ @@ -1736,7 +1744,8 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, if (!__allow_reserved_blocks(sbi, inode, true)) avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks; - + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + avail_user_block_count -= sbi->unusable_block_count; if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { diff = sbi->total_valid_block_count - avail_user_block_count; if (diff > *count) @@ -1943,6 +1952,8 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, if (!__allow_reserved_blocks(sbi, inode, false)) valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + valid_block_count += sbi->unusable_block_count; if (unlikely(valid_block_count > sbi->user_block_count)) { spin_unlock(&sbi->stat_lock); @@ -2946,6 +2957,8 @@ void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi); bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi); void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); +void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi); +int f2fs_disable_cp_again(struct f2fs_sb_info *sbi); void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi); int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); @@ -3533,6 +3546,9 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, if (test_opt(sbi, LFS) && (rw == WRITE) && block_unaligned_IO(inode, iocb, iter)) return true; + if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED)) + return true; + return false; } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d6ba94db28087..44a5e564609e8 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -210,7 +210,8 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, }; unsigned int seq_id = 0; - if (unlikely(f2fs_readonly(inode->i_sb))) + if (unlikely(f2fs_readonly(inode->i_sb) || + is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return 0; trace_f2fs_sync_file_enter(inode); @@ -2157,6 +2158,12 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) if (f2fs_readonly(sbi->sb)) return -EROFS; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + f2fs_msg(sbi->sb, KERN_INFO, + "Skipping Checkpoint. Checkpoints currently disabled."); + return -EINVAL; + } + ret = mnt_want_write_file(filp); if (ret) return ret; @@ -2528,6 +2535,9 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) if (f2fs_readonly(sbi->sb)) return -EROFS; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return -EINVAL; + if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg, sizeof(range))) return -EFAULT; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 4ce7ba181e0fc..b07129b08ec1e 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -370,6 +370,10 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, if (sec_usage_check(sbi, secno)) goto next; + /* Don't touch checkpointed data */ + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) && + get_ckpt_valid_blocks(sbi, segno))) + goto next; if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) goto next; @@ -1189,7 +1193,8 @@ gc_more: * threshold, we can make them free by checkpoint. Then, we * secure free segments which doesn't need fggc any more. */ - if (prefree_segments(sbi)) { + if (prefree_segments(sbi) && + !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { ret = f2fs_write_checkpoint(sbi, &cpc); if (ret) goto stop; @@ -1241,7 +1246,7 @@ gc_more: segno = NULL_SEGNO; goto gc_more; } - if (gc_type == FG_GC) + if (gc_type == FG_GC && !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) ret = f2fs_write_checkpoint(sbi, &cpc); } stop: diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 86e7333d60c14..4ee9d6c4b7191 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -607,6 +607,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) if (!is_inode_flag_set(inode, FI_DIRTY_INODE)) return 0; + if (f2fs_is_checkpoint_ready(sbi)) + return -ENOSPC; + /* * We need to balance fs here to prevent from producing dirty node pages * during the urgent cleaning time when runing out of free sections. @@ -688,7 +691,8 @@ no_delete: stat_dec_inline_dir(inode); stat_dec_inline_inode(inode); - if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG))) + if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG) && + !is_sbi_flag_set(sbi, SBI_CP_DISABLED))) f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE)); else f2fs_inode_synced(inode); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 79ed2039647b8..6d4b11f404ccc 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -16,6 +16,7 @@ #include "f2fs.h" #include "node.h" +#include "segment.h" #include "xattr.h" #include "acl.h" #include @@ -269,6 +270,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; err = dquot_initialize(dir); if (err) @@ -316,6 +320,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; err = fscrypt_prepare_link(old_dentry, dir, dentry); if (err) @@ -562,6 +569,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize, &disk_link); @@ -693,6 +703,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; err = dquot_initialize(dir); if (err) @@ -828,6 +841,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && (!projid_eq(F2FS_I(new_dir)->i_projid, @@ -1018,6 +1034,9 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && !projid_eq(F2FS_I(new_dir)->i_projid, diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e16dae0f0a5b3..195dc8142bff5 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -176,6 +176,8 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi) return false; if (sbi->gc_mode == GC_URGENT) return true; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return true; return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs + SM_I(sbi)->min_ssr_sections + reserved_sections(sbi)); @@ -480,6 +482,9 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) if (need && excess_cached_nats(sbi)) f2fs_balance_fs_bg(sbi); + if (f2fs_is_checkpoint_ready(sbi)) + return; + /* * We should do GC or end up with checkpoint, if there are so many dirty * dir/node pages without enough free segments. @@ -796,7 +801,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned short valid_blocks; + unsigned short valid_blocks, ckpt_valid_blocks; if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno)) return; @@ -804,8 +809,10 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_lock(&dirty_i->seglist_lock); valid_blocks = get_valid_blocks(sbi, segno, false); + ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno); - if (valid_blocks == 0) { + if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) || + ckpt_valid_blocks == sbi->blocks_per_seg)) { __locate_dirty_segment(sbi, segno, PRE); __remove_dirty_segment(sbi, segno, DIRTY); } else if (valid_blocks < sbi->blocks_per_seg) { @@ -818,6 +825,66 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_unlock(&dirty_i->seglist_lock); } +/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */ +void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int segno; + + mutex_lock(&dirty_i->seglist_lock); + for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { + if (get_valid_blocks(sbi, segno, false)) + continue; + if (IS_CURSEG(sbi, segno)) + continue; + __locate_dirty_segment(sbi, segno, PRE); + __remove_dirty_segment(sbi, segno, DIRTY); + } + mutex_unlock(&dirty_i->seglist_lock); +} + +int f2fs_disable_cp_again(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + block_t ovp = overprovision_segments(sbi) << sbi->log_blocks_per_seg; + block_t holes[2] = {0, 0}; /* DATA and NODE */ + struct seg_entry *se; + unsigned int segno; + + mutex_lock(&dirty_i->seglist_lock); + for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { + se = get_seg_entry(sbi, segno); + if (IS_NODESEG(se->type)) + holes[NODE] += sbi->blocks_per_seg - se->valid_blocks; + else + holes[DATA] += sbi->blocks_per_seg - se->valid_blocks; + } + mutex_unlock(&dirty_i->seglist_lock); + + if (holes[DATA] > ovp || holes[NODE] > ovp) + return -EAGAIN; + return 0; +} + +/* This is only used by SBI_CP_DISABLED */ +static unsigned int get_free_segment(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int segno = 0; + + mutex_lock(&dirty_i->seglist_lock); + for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { + if (get_valid_blocks(sbi, segno, false)) + continue; + if (get_ckpt_valid_blocks(sbi, segno)) + continue; + mutex_unlock(&dirty_i->seglist_lock); + return segno; + } + mutex_unlock(&dirty_i->seglist_lock); + return NULL_SEGNO; +} + static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t lstart, block_t start, block_t len) @@ -2028,7 +2095,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) sbi->discard_blks--; /* don't overwrite by SSR to keep node chain */ - if (IS_NODESEG(se->type)) { + if (IS_NODESEG(se->type) && + !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) se->ckpt_valid_blocks++; } @@ -2050,6 +2118,15 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) f2fs_bug_on(sbi, 1); se->valid_blocks++; del = 0; + } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + /* + * If checkpoints are off, we must not reuse data that + * was used in the previous checkpoint. If it was used + * before, we must track that to know how much space we + * really have. + */ + if (f2fs_test_bit(offset, se->ckpt_valid_map)) + sbi->unusable_block_count++; } if (f2fs_test_and_clear_bit(offset, se->discard_map)) @@ -2332,6 +2409,9 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) if (sbi->segs_per_sec != 1) return CURSEG_I(sbi, type)->segno; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return 0; + if (test_opt(sbi, NOHEAP) && (type == CURSEG_HOT_DATA || IS_NODESEG(type))) return 0; @@ -2476,6 +2556,15 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) return 1; } } + + /* find valid_blocks=0 in dirty list */ + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + segno = get_free_segment(sbi); + if (segno != NULL_SEGNO) { + curseg->next_segno = segno; + return 1; + } + } return 0; } @@ -2493,7 +2582,8 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) && type == CURSEG_WARM_NODE) new_curseg(sbi, type, false); - else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) + else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) && + likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) new_curseg(sbi, type, false); else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type)) change_curseg(sbi, type); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 086150028c6d1..ab3465faddf13 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -339,6 +339,12 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi, return get_seg_entry(sbi, segno)->valid_blocks; } +static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + return get_seg_entry(sbi, segno)->ckpt_valid_blocks; +} + static inline void seg_info_from_raw_sit(struct seg_entry *se, struct f2fs_sit_entry *rs) { @@ -576,6 +582,15 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, reserved_sections(sbi) + needed); } +static inline int f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi) +{ + if (likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return 0; + if (likely(!has_not_enough_free_secs(sbi, 0, 0))) + return 0; + return -ENOSPC; +} + static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi) { return prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6919f91c7a14a..e2291474e56bc 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -136,6 +136,7 @@ enum { Opt_alloc, Opt_fsync, Opt_test_dummy_encryption, + Opt_checkpoint, Opt_err, }; @@ -194,6 +195,7 @@ static match_table_t f2fs_tokens = { {Opt_alloc, "alloc_mode=%s"}, {Opt_fsync, "fsync_mode=%s"}, {Opt_test_dummy_encryption, "test_dummy_encryption"}, + {Opt_checkpoint, "checkpoint=%s"}, {Opt_err, NULL}, }; @@ -769,6 +771,23 @@ static int parse_options(struct super_block *sb, char *options) "Test dummy encryption mount option ignored"); #endif break; + case Opt_checkpoint: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + + if (strlen(name) == 6 && + !strncmp(name, "enable", 6)) { + clear_opt(sbi, DISABLE_CHECKPOINT); + } else if (strlen(name) == 7 && + !strncmp(name, "disable", 7)) { + set_opt(sbi, DISABLE_CHECKPOINT); + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -827,6 +846,12 @@ static int parse_options(struct super_block *sb, char *options) } } + if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) { + f2fs_msg(sb, KERN_ERR, + "LFS not compatible with checkpoint=disable\n"); + return -EINVAL; + } + /* Not pass down write hints if the number of active logs is lesser * than NR_CURSEG_TYPE. */ @@ -1014,8 +1039,8 @@ static void f2fs_put_super(struct super_block *sb) * But, the previous checkpoint was not done by umount, it needs to do * clean checkpoint again. */ - if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) || - !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { + if ((is_sbi_flag_set(sbi, SBI_IS_DIRTY) || + !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG))) { struct cp_control cpc = { .reason = CP_UMOUNT, }; @@ -1087,6 +1112,8 @@ int f2fs_sync_fs(struct super_block *sb, int sync) if (unlikely(f2fs_cp_error(sbi))) return 0; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return 0; trace_f2fs_sync_fs(sb, sync); @@ -1186,6 +1213,11 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_blocks = total_count - start_count; buf->f_bfree = user_block_count - valid_user_blocks(sbi) - sbi->current_reserved_blocks; + if (unlikely(buf->f_bfree <= sbi->unusable_block_count)) + buf->f_bfree = 0; + else + buf->f_bfree -= sbi->unusable_block_count; + if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks) buf->f_bavail = buf->f_bfree - F2FS_OPTION(sbi).root_reserved_blocks; @@ -1365,6 +1397,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE) seq_printf(seq, ",alloc_mode=%s", "reuse"); + if (test_opt(sbi, DISABLE_CHECKPOINT)) + seq_puts(seq, ",checkpoint=disable"); + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX) seq_printf(seq, ",fsync_mode=%s", "posix"); else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) @@ -1393,6 +1428,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, EXTENT_CACHE); set_opt(sbi, NOHEAP); sbi->sb->s_flags |= MS_LAZYTIME; + clear_opt(sbi, DISABLE_CHECKPOINT); set_opt(sbi, FLUSH_MERGE); set_opt(sbi, DISCARD); if (f2fs_sb_has_blkzoned(sbi->sb)) @@ -1413,6 +1449,57 @@ static void default_options(struct f2fs_sb_info *sbi) #ifdef CONFIG_QUOTA static int f2fs_enable_quotas(struct super_block *sb); #endif + +static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) +{ + struct cp_control cpc; + int err; + + sbi->sb->s_flags |= SB_ACTIVE; + + mutex_lock(&sbi->gc_mutex); + f2fs_update_time(sbi, DISABLE_TIME); + + while (!f2fs_time_over(sbi, DISABLE_TIME)) { + err = f2fs_gc(sbi, true, false, NULL_SEGNO); + if (err == -ENODATA) + break; + if (err && err != -EAGAIN) { + mutex_unlock(&sbi->gc_mutex); + return err; + } + } + mutex_unlock(&sbi->gc_mutex); + + err = sync_filesystem(sbi->sb); + if (err) + return err; + + if (f2fs_disable_cp_again(sbi)) + return -EAGAIN; + + mutex_lock(&sbi->gc_mutex); + cpc.reason = CP_PAUSE; + set_sbi_flag(sbi, SBI_CP_DISABLED); + f2fs_write_checkpoint(sbi, &cpc); + + sbi->unusable_block_count = 0; + mutex_unlock(&sbi->gc_mutex); + return 0; +} + +static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) +{ + mutex_lock(&sbi->gc_mutex); + f2fs_dirty_to_prefree(sbi); + + clear_sbi_flag(sbi, SBI_CP_DISABLED); + set_sbi_flag(sbi, SBI_IS_DIRTY); + mutex_unlock(&sbi->gc_mutex); + + f2fs_sync_fs(sbi->sb, 1); +} + static int f2fs_remount(struct super_block *sb, int *flags, char *data) { struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -1422,6 +1509,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) bool need_restart_gc = false; bool need_stop_gc = false; bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE); + bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT); + bool checkpoint_changed; #ifdef CONFIG_QUOTA int i, j; #endif @@ -1466,6 +1555,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) err = parse_options(sb, data); if (err) goto restore_opts; + checkpoint_changed = + disable_checkpoint != test_opt(sbi, DISABLE_CHECKPOINT); /* * Previous and new state of filesystem is RO, @@ -1479,7 +1570,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) err = dquot_suspend(sb, -1); if (err < 0) goto restore_opts; - } else if (f2fs_readonly(sb) && !(*flags & MS_RDONLY)) { + } else if (f2fs_readonly(sb) && !(*flags & SB_RDONLY)) { /* dquot_resume needs RW */ sb->s_flags &= ~MS_RDONLY; if (sb_any_quota_suspended(sb)) { @@ -1499,6 +1590,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } + if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) { + err = -EINVAL; + f2fs_msg(sbi->sb, KERN_WARNING, + "disabling checkpoint not compatible with read-only"); + goto restore_opts; + } + /* * We stop the GC thread if FS is mounted as RO * or if background_gc = off is passed in mount @@ -1527,6 +1625,16 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) clear_sbi_flag(sbi, SBI_IS_CLOSE); } + if (checkpoint_changed) { + if (test_opt(sbi, DISABLE_CHECKPOINT)) { + err = f2fs_disable_checkpoint(sbi); + if (err) + goto restore_gc; + } else { + f2fs_enable_checkpoint(sbi); + } + } + /* * We stop issue flush thread if FS is mounted as RO * or if flush_merge is not passed in mount option. @@ -2485,6 +2593,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL; sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL; sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL; + sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL; clear_sbi_flag(sbi, SBI_NEED_FSCK); for (i = 0; i < NR_COUNT_TYPE; i++) @@ -3092,6 +3201,9 @@ try_onemore: if (err) goto free_meta; + if (unlikely(is_set_ckpt_flags(sbi, CP_DISABLED_FLAG))) + goto skip_recovery; + /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { /* @@ -3131,6 +3243,14 @@ skip_recovery: /* f2fs_recover_fsync_data() cleared this already */ clear_sbi_flag(sbi, SBI_POR_DOING); + if (test_opt(sbi, DISABLE_CHECKPOINT)) { + err = f2fs_disable_checkpoint(sbi); + if (err) + goto free_meta; + } else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) { + f2fs_enable_checkpoint(sbi); + } + /* * If filesystem is not mounted as read-only then * do start the gc_thread. diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 1db13ff9a3f48..8b9c7dc0260c6 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -116,6 +116,7 @@ struct f2fs_super_block { /* * For checkpoint */ +#define CP_DISABLED_FLAG 0x00001000 #define CP_LARGE_NAT_BITMAP_FLAG 0x00000400 #define CP_NOCRC_RECOVERY_FLAG 0x00000200 #define CP_TRIMMED_FLAG 0x00000100 -- GitLab From d5f0cb33ab402526f3483314cecaf0c1ec8bf49b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 13 Sep 2018 07:40:53 +0800 Subject: [PATCH 1058/2269] f2fs: submit cached bio to avoid endless PageWriteback When migrating encrypted block from background GC thread, we only add them into f2fs inner bio cache, but forget to submit the cached bio, it may cause potential deadlock when we are waiting page writebacked, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/gc.c | 71 +++++++++++++++++++++++++++++++++++--------------- fs/f2fs/node.c | 13 ++++++--- 3 files changed, 61 insertions(+), 25 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9f56580fd459d..a11964a1f458a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2912,7 +2912,7 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs); void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid); struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid); struct page *f2fs_get_node_page_ra(struct page *parent, int start); -void f2fs_move_node_page(struct page *node_page, int gc_type); +int f2fs_move_node_page(struct page *node_page, int gc_type); int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, struct writeback_control *wbc, bool atomic, unsigned int *seq_id); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index b07129b08ec1e..12394086b8488 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -473,7 +473,7 @@ static int check_valid_map(struct f2fs_sb_info *sbi, * On validity, copy that node with cold status, otherwise (invalid node) * ignore that. */ -static void gc_node_segment(struct f2fs_sb_info *sbi, +static int gc_node_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, unsigned int segno, int gc_type) { struct f2fs_summary *entry; @@ -481,6 +481,7 @@ static void gc_node_segment(struct f2fs_sb_info *sbi, int off; int phase = 0; bool fggc = (gc_type == FG_GC); + int submitted = 0; start_addr = START_BLOCK(sbi, segno); @@ -494,10 +495,11 @@ next_step: nid_t nid = le32_to_cpu(entry->nid); struct page *node_page; struct node_info ni; + int err; /* stop BG_GC if there is not enough free sections. */ if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) - return; + return submitted; if (check_valid_map(sbi, segno, off) == 0) continue; @@ -534,7 +536,9 @@ next_step: continue; } - f2fs_move_node_page(node_page, gc_type); + err = f2fs_move_node_page(node_page, gc_type); + if (!err && gc_type == FG_GC) + submitted++; stat_inc_node_blk_count(sbi, 1, gc_type); } @@ -543,6 +547,7 @@ next_step: if (fggc) atomic_dec(&sbi->wb_sync_req[NODE]); + return submitted; } /* @@ -678,7 +683,7 @@ put_page: * Move data block via META_MAPPING while keeping locked data page. * This can be used to move blocks, aka LBAs, directly on disk. */ -static void move_data_block(struct inode *inode, block_t bidx, +static int move_data_block(struct inode *inode, block_t bidx, int gc_type, unsigned int segno, int off) { struct f2fs_io_info fio = { @@ -697,25 +702,29 @@ static void move_data_block(struct inode *inode, block_t bidx, struct node_info ni; struct page *page, *mpage; block_t newaddr; - int err; + int err = 0; bool lfs_mode = test_opt(fio.sbi, LFS); /* do not read out */ page = f2fs_grab_cache_page(inode->i_mapping, bidx, false); if (!page) - return; + return -ENOMEM; - if (!check_valid_map(F2FS_I_SB(inode), segno, off)) + if (!check_valid_map(F2FS_I_SB(inode), segno, off)) { + err = -ENOENT; goto out; + } if (f2fs_is_atomic_file(inode)) { F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++; F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++; + err = -EAGAIN; goto out; } if (f2fs_is_pinned_file(inode)) { f2fs_pin_file_control(inode, true); + err = -EAGAIN; goto out; } @@ -726,6 +735,7 @@ static void move_data_block(struct inode *inode, block_t bidx, if (unlikely(dn.data_blkaddr == NULL_ADDR)) { ClearPageUptodate(page); + err = -ENOENT; goto put_out; } @@ -808,6 +818,7 @@ write_page: fio.new_blkaddr = newaddr; f2fs_submit_page_write(&fio); if (fio.retry) { + err = -EAGAIN; if (PageWriteback(fio.encrypted_page)) end_page_writeback(fio.encrypted_page); goto put_page_out; @@ -831,34 +842,42 @@ put_out: f2fs_put_dnode(&dn); out: f2fs_put_page(page, 1); + return err; } -static void move_data_page(struct inode *inode, block_t bidx, int gc_type, +static int move_data_page(struct inode *inode, block_t bidx, int gc_type, unsigned int segno, int off) { struct page *page; + int err = 0; page = f2fs_get_lock_data_page(inode, bidx, true); if (IS_ERR(page)) - return; + return PTR_ERR(page); - if (!check_valid_map(F2FS_I_SB(inode), segno, off)) + if (!check_valid_map(F2FS_I_SB(inode), segno, off)) { + err = -ENOENT; goto out; + } if (f2fs_is_atomic_file(inode)) { F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++; F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++; + err = -EAGAIN; goto out; } if (f2fs_is_pinned_file(inode)) { if (gc_type == FG_GC) f2fs_pin_file_control(inode, true); + err = -EAGAIN; goto out; } if (gc_type == BG_GC) { - if (PageWriteback(page)) + if (PageWriteback(page)) { + err = -EAGAIN; goto out; + } set_page_dirty(page); set_cold_data(page); } else { @@ -876,7 +895,6 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type, .io_type = FS_GC_DATA_IO, }; bool is_dirty = PageDirty(page); - int err; retry: set_page_dirty(page); @@ -901,6 +919,7 @@ retry: } out: f2fs_put_page(page, 1); + return err; } /* @@ -910,7 +929,7 @@ out: * If the parent node is not valid or the data block address is different, * the victim data block is ignored. */ -static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, +static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, struct gc_inode_list *gc_list, unsigned int segno, int gc_type) { struct super_block *sb = sbi->sb; @@ -918,6 +937,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, block_t start_addr; int off; int phase = 0; + int submitted = 0; start_addr = START_BLOCK(sbi, segno); @@ -934,7 +954,7 @@ next_step: /* stop BG_GC if there is not enough free sections. */ if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) - return; + return submitted; if (check_valid_map(sbi, segno, off) == 0) continue; @@ -1006,6 +1026,7 @@ next_step: if (inode) { struct f2fs_inode_info *fi = F2FS_I(inode); bool locked = false; + int err; if (S_ISREG(inode->i_mode)) { if (!down_write_trylock(&fi->i_gc_rwsem[READ])) @@ -1025,12 +1046,16 @@ next_step: start_bidx = f2fs_start_bidx_of_node(nofs, inode) + ofs_in_node; if (f2fs_post_read_required(inode)) - move_data_block(inode, start_bidx, gc_type, - segno, off); + err = move_data_block(inode, start_bidx, + gc_type, segno, off); else - move_data_page(inode, start_bidx, gc_type, + err = move_data_page(inode, start_bidx, gc_type, segno, off); + if (!err && (gc_type == FG_GC || + f2fs_post_read_required(inode))) + submitted++; + if (locked) { up_write(&fi->i_gc_rwsem[WRITE]); up_write(&fi->i_gc_rwsem[READ]); @@ -1042,6 +1067,8 @@ next_step: if (++phase < 5) goto next_step; + + return submitted; } static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, @@ -1069,6 +1096,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, int seg_freed = 0; unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ? SUM_TYPE_DATA : SUM_TYPE_NODE; + int submitted = 0; /* readahead multi ssa blocks those have contiguous address */ if (sbi->segs_per_sec > 1) @@ -1124,10 +1152,11 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, * - lock_page(sum_page) */ if (type == SUM_TYPE_NODE) - gc_node_segment(sbi, sum->entries, segno, gc_type); - else - gc_data_segment(sbi, sum->entries, gc_list, segno, + submitted += gc_node_segment(sbi, sum->entries, segno, gc_type); + else + submitted += gc_data_segment(sbi, sum->entries, gc_list, + segno, gc_type); stat_inc_seg_count(sbi, type, gc_type); @@ -1138,7 +1167,7 @@ next: f2fs_put_page(sum_page, 0); } - if (gc_type == FG_GC) + if (submitted) f2fs_submit_merged_write(sbi, (type == SUM_TYPE_NODE) ? NODE : DATA); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4ad214c270503..42e9b4158978c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1587,8 +1587,10 @@ redirty_out: return AOP_WRITEPAGE_ACTIVATE; } -void f2fs_move_node_page(struct page *node_page, int gc_type) +int f2fs_move_node_page(struct page *node_page, int gc_type) { + int err = 0; + if (gc_type == FG_GC) { struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, @@ -1600,12 +1602,16 @@ void f2fs_move_node_page(struct page *node_page, int gc_type) f2fs_wait_on_page_writeback(node_page, NODE, true); f2fs_bug_on(F2FS_P_SB(node_page), PageWriteback(node_page)); - if (!clear_page_dirty_for_io(node_page)) + if (!clear_page_dirty_for_io(node_page)) { + err = -EAGAIN; goto out_page; + } if (__write_node_page(node_page, false, NULL, - &wbc, false, FS_GC_NODE_IO, NULL)) + &wbc, false, FS_GC_NODE_IO, NULL)) { + err = -EAGAIN; unlock_page(node_page); + } goto release_page; } else { /* set page dirty and write it */ @@ -1616,6 +1622,7 @@ out_page: unlock_page(node_page); release_page: f2fs_put_page(node_page, 0); + return err; } static int f2fs_write_node_page(struct page *page, -- GitLab From 6c69ba3fcbaaf341f713d74e1f8d1f8decef3e49 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 3 Oct 2018 22:32:44 +0800 Subject: [PATCH 1059/2269] f2fs: fix to recover cold bit of inode block during POR Testcase to reproduce this bug: 1. mkfs.f2fs /dev/sdd 2. mount -t f2fs /dev/sdd /mnt/f2fs 3. touch /mnt/f2fs/file 4. sync 5. chattr +A /mnt/f2fs/file 6. xfs_io -f /mnt/f2fs/file -c "fsync" 7. godown /mnt/f2fs 8. umount /mnt/f2fs 9. mount -t f2fs /dev/sdd /mnt/f2fs 10. chattr -A /mnt/f2fs/file 11. xfs_io -f /mnt/f2fs/file -c "fsync" 12. umount /mnt/f2fs 13. mount -t f2fs /dev/sdd /mnt/f2fs 14. lsattr /mnt/f2fs/file -----------------N- /mnt/f2fs/file But actually, we expect the corrct result is: -------A---------N- /mnt/f2fs/file The reason is in step 9) we missed to recover cold bit flag in inode block, so later, in fsync, we will skip write inode block due to below condition check, result in lossing data in another SPOR. f2fs_fsync_node_pages() if (!IS_DNODE(page) || !is_cold_node(page)) continue; Note that, I guess that some non-dir inode has already lost cold bit during POR, so in order to reenable recovery for those inode, let's try to recover cold bit in f2fs_iget() to save more fsynced data. Fixes: c56675750d7c ("f2fs: remove unneeded set_cold_node()") Cc: 4.17+ Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 6 ++++++ fs/f2fs/node.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 4ee9d6c4b7191..57a7a15239d69 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -365,6 +365,12 @@ static int do_read_inode(struct inode *inode) if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) __recover_inline_status(inode, node_page); + /* try to recover cold bit for non-dir inode */ + if (!S_ISDIR(inode->i_mode) && !is_cold_node(node_page)) { + set_cold_node(node_page, false); + set_page_dirty(node_page); + } + /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 42e9b4158978c..fb8683c9f9477 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2549,7 +2549,7 @@ retry: if (!PageUptodate(ipage)) SetPageUptodate(ipage); fill_node_footer(ipage, ino, ino, 0, true); - set_cold_node(page, false); + set_cold_node(ipage, false); src = F2FS_INODE(page); dst = F2FS_INODE(ipage); -- GitLab From fab12a0831af60696ca7785dd688ed60fdc2c5d3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 4 Oct 2018 11:18:30 +0800 Subject: [PATCH 1060/2269] f2fs: use rb_*_cached friends As rbtree supports caching leftmost node natively, update f2fs codes to use rb_*_cached helpers to speed up leftmost node visiting. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 78 +++++++++++++++++++++++++----------------- fs/f2fs/f2fs.h | 17 ++++----- fs/f2fs/segment.c | 22 +++++++----- 3 files changed, 69 insertions(+), 48 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 904ad7ba5a453..1cb0fcc67d2df 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -27,10 +27,10 @@ static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re, return NULL; } -static struct rb_entry *__lookup_rb_tree_slow(struct rb_root *root, +static struct rb_entry *__lookup_rb_tree_slow(struct rb_root_cached *root, unsigned int ofs) { - struct rb_node *node = root->rb_node; + struct rb_node *node = root->rb_root.rb_node; struct rb_entry *re; while (node) { @@ -46,7 +46,7 @@ static struct rb_entry *__lookup_rb_tree_slow(struct rb_root *root, return NULL; } -struct rb_entry *f2fs_lookup_rb_tree(struct rb_root *root, +struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root, struct rb_entry *cached_re, unsigned int ofs) { struct rb_entry *re; @@ -59,22 +59,25 @@ struct rb_entry *f2fs_lookup_rb_tree(struct rb_root *root, } struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, - struct rb_root *root, struct rb_node **parent, - unsigned int ofs) + struct rb_root_cached *root, + struct rb_node **parent, + unsigned int ofs, bool *leftmost) { - struct rb_node **p = &root->rb_node; + struct rb_node **p = &root->rb_root.rb_node; struct rb_entry *re; while (*p) { *parent = *p; re = rb_entry(*parent, struct rb_entry, rb_node); - if (ofs < re->ofs) + if (ofs < re->ofs) { p = &(*p)->rb_left; - else if (ofs >= re->ofs + re->len) + } else if (ofs >= re->ofs + re->len) { p = &(*p)->rb_right; - else + *leftmost = false; + } else { f2fs_bug_on(sbi, 1); + } } return p; @@ -89,16 +92,16 @@ struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, * in order to simpfy the insertion after. * tree must stay unchanged between lookup and insertion. */ -struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root *root, +struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root, struct rb_entry *cached_re, unsigned int ofs, struct rb_entry **prev_entry, struct rb_entry **next_entry, struct rb_node ***insert_p, struct rb_node **insert_parent, - bool force) + bool force, bool *leftmost) { - struct rb_node **pnode = &root->rb_node; + struct rb_node **pnode = &root->rb_root.rb_node; struct rb_node *parent = NULL, *tmp_node; struct rb_entry *re = cached_re; @@ -107,7 +110,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root *root, *prev_entry = NULL; *next_entry = NULL; - if (RB_EMPTY_ROOT(root)) + if (RB_EMPTY_ROOT(&root->rb_root)) return NULL; if (re) { @@ -115,16 +118,22 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root *root, goto lookup_neighbors; } + if (leftmost) + *leftmost = true; + while (*pnode) { parent = *pnode; re = rb_entry(*pnode, struct rb_entry, rb_node); - if (ofs < re->ofs) + if (ofs < re->ofs) { pnode = &(*pnode)->rb_left; - else if (ofs >= re->ofs + re->len) + } else if (ofs >= re->ofs + re->len) { pnode = &(*pnode)->rb_right; - else + if (leftmost) + *leftmost = false; + } else { goto lookup_neighbors; + } } *insert_p = pnode; @@ -157,10 +166,10 @@ lookup_neighbors: } bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi, - struct rb_root *root) + struct rb_root_cached *root) { #ifdef CONFIG_F2FS_CHECK_FS - struct rb_node *cur = rb_first(root), *next; + struct rb_node *cur = rb_first_cached(root), *next; struct rb_entry *cur_re, *next_re; if (!cur) @@ -193,7 +202,8 @@ static struct kmem_cache *extent_node_slab; static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_info *ei, - struct rb_node *parent, struct rb_node **p) + struct rb_node *parent, struct rb_node **p, + bool leftmost) { struct extent_node *en; @@ -206,7 +216,7 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, en->et = et; rb_link_node(&en->rb_node, parent, p); - rb_insert_color(&en->rb_node, &et->root); + rb_insert_color_cached(&en->rb_node, &et->root, leftmost); atomic_inc(&et->node_cnt); atomic_inc(&sbi->total_ext_node); return en; @@ -215,7 +225,7 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, static void __detach_extent_node(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_node *en) { - rb_erase(&en->rb_node, &et->root); + rb_erase_cached(&en->rb_node, &et->root); atomic_dec(&et->node_cnt); atomic_dec(&sbi->total_ext_node); @@ -254,7 +264,7 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode) f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et); memset(et, 0, sizeof(struct extent_tree)); et->ino = ino; - et->root = RB_ROOT; + et->root = RB_ROOT_CACHED; et->cached_en = NULL; rwlock_init(&et->lock); INIT_LIST_HEAD(&et->list); @@ -275,10 +285,10 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode) static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_info *ei) { - struct rb_node **p = &et->root.rb_node; + struct rb_node **p = &et->root.rb_root.rb_node; struct extent_node *en; - en = __attach_extent_node(sbi, et, ei, NULL, p); + en = __attach_extent_node(sbi, et, ei, NULL, p, true); if (!en) return NULL; @@ -294,7 +304,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, struct extent_node *en; unsigned int count = atomic_read(&et->node_cnt); - node = rb_first(&et->root); + node = rb_first_cached(&et->root); while (node) { next = rb_next(node); en = rb_entry(node, struct extent_node, rb_node); @@ -452,7 +462,8 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_info *ei, struct rb_node **insert_p, - struct rb_node *insert_parent) + struct rb_node *insert_parent, + bool leftmost) { struct rb_node **p; struct rb_node *parent = NULL; @@ -464,9 +475,12 @@ static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, goto do_insert; } - p = f2fs_lookup_rb_tree_for_insert(sbi, &et->root, &parent, ei->fofs); + leftmost = true; + + p = f2fs_lookup_rb_tree_for_insert(sbi, &et->root, &parent, + ei->fofs, &leftmost); do_insert: - en = __attach_extent_node(sbi, et, ei, parent, p); + en = __attach_extent_node(sbi, et, ei, parent, p, leftmost); if (!en) return NULL; @@ -492,6 +506,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, unsigned int end = fofs + len; unsigned int pos = (unsigned int)fofs; bool updated = false; + bool leftmost; if (!et) return; @@ -519,7 +534,8 @@ static void f2fs_update_extent_tree_range(struct inode *inode, (struct rb_entry *)et->cached_en, fofs, (struct rb_entry **)&prev_en, (struct rb_entry **)&next_en, - &insert_p, &insert_parent, false); + &insert_p, &insert_parent, false, + &leftmost); if (!en) en = next_en; @@ -546,7 +562,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, end - dei.fofs + dei.blk, org_end - end); en1 = __insert_extent_tree(sbi, et, &ei, - NULL, NULL); + NULL, NULL, true); next_en = en1; } else { en->ei.fofs = end; @@ -587,7 +603,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, set_extent_info(&ei, fofs, blkaddr, len); if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) __insert_extent_tree(sbi, et, &ei, - insert_p, insert_parent); + insert_p, insert_parent, leftmost); /* give up extent_cache, if split and small updates happen */ if (dei.len >= 1 && diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a11964a1f458a..cb4e91df892ee 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -329,7 +329,7 @@ struct discard_cmd_control { atomic_t issued_discard; /* # of issued discard */ atomic_t issing_discard; /* # of issing discard */ atomic_t discard_cmd_cnt; /* # of cached cmd count */ - struct rb_root root; /* root of discard rb-tree */ + struct rb_root_cached root; /* root of discard rb-tree */ bool rbtree_check; /* config for consistence check */ }; @@ -571,7 +571,7 @@ struct extent_node { struct extent_tree { nid_t ino; /* inode number */ - struct rb_root root; /* root of extent info rb-tree */ + struct rb_root_cached root; /* root of extent info rb-tree */ struct extent_node *cached_en; /* recently accessed extent node */ struct extent_info largest; /* largested extent info */ struct list_head list; /* to be used by sbi->zombie_list */ @@ -3369,18 +3369,19 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi); /* * extent_cache.c */ -struct rb_entry *f2fs_lookup_rb_tree(struct rb_root *root, +struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root, struct rb_entry *cached_re, unsigned int ofs); struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, - struct rb_root *root, struct rb_node **parent, - unsigned int ofs); -struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root *root, + struct rb_root_cached *root, + struct rb_node **parent, + unsigned int ofs, bool *leftmost); +struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root, struct rb_entry *cached_re, unsigned int ofs, struct rb_entry **prev_entry, struct rb_entry **next_entry, struct rb_node ***insert_p, struct rb_node **insert_parent, - bool force); + bool force, bool *leftmost); bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi, - struct rb_root *root); + struct rb_root_cached *root); unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink); bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext); void f2fs_drop_extent_tree(struct inode *inode); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 195dc8142bff5..805c8310d7b0c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -920,7 +920,8 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t lstart, block_t start, block_t len, - struct rb_node *parent, struct rb_node **p) + struct rb_node *parent, struct rb_node **p, + bool leftmost) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_cmd *dc; @@ -928,7 +929,7 @@ static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi, dc = __create_discard_cmd(sbi, bdev, lstart, start, len); rb_link_node(&dc->rb_node, parent, p); - rb_insert_color(&dc->rb_node, &dcc->root); + rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost); return dc; } @@ -940,7 +941,7 @@ static void __detach_discard_cmd(struct discard_cmd_control *dcc, atomic_sub(dc->issuing, &dcc->issing_discard); list_del(&dc->list); - rb_erase(&dc->rb_node, &dcc->root); + rb_erase_cached(&dc->rb_node, &dcc->root); dcc->undiscard_blks -= dc->len; kmem_cache_free(discard_cmd_slab, dc); @@ -1177,6 +1178,7 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, struct rb_node **p; struct rb_node *parent = NULL; struct discard_cmd *dc = NULL; + bool leftmost = true; if (insert_p && insert_parent) { parent = insert_parent; @@ -1184,9 +1186,11 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, goto do_insert; } - p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart); + p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, + lstart, &leftmost); do_insert: - dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p); + dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, + p, leftmost); if (!dc) return NULL; @@ -1254,7 +1258,7 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, NULL, lstart, (struct rb_entry **)&prev_dc, (struct rb_entry **)&next_dc, - &insert_p, &insert_parent, true); + &insert_p, &insert_parent, true, NULL); if (dc) prev_dc = dc; @@ -1362,7 +1366,7 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, NULL, pos, (struct rb_entry **)&prev_dc, (struct rb_entry **)&next_dc, - &insert_p, &insert_parent, true); + &insert_p, &insert_parent, true, NULL); if (!dc) dc = next_dc; @@ -1994,7 +1998,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; dcc->undiscard_blks = 0; dcc->next_pos = 0; - dcc->root = RB_ROOT; + dcc->root = RB_ROOT_CACHED; dcc->rbtree_check = false; init_waitqueue_head(&dcc->discard_wait_queue); @@ -2658,7 +2662,7 @@ next: NULL, start, (struct rb_entry **)&prev_dc, (struct rb_entry **)&next_dc, - &insert_p, &insert_parent, true); + &insert_p, &insert_parent, true, NULL); if (!dc) dc = next_dc; -- GitLab From 1be4c686b588a29bef24dec29444320523229fb1 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 4 Oct 2018 11:15:18 +0800 Subject: [PATCH 1061/2269] f2fs: shrink sbi->sb_lock coverage in set_file_temperature() file_set_{cold,hot} doesn't need holding sbi->sb_lock, so moving them out of the lock. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 6d4b11f404ccc..7e1490f4098fe 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -182,16 +182,19 @@ static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode * hot_count = sbi->raw_super->hot_ext_count; for (i = 0; i < cold_count + hot_count; i++) { - if (!is_extension_exist(name, extlist[i])) - continue; - if (i < cold_count) - file_set_cold(inode); - else - file_set_hot(inode); - break; + if (is_extension_exist(name, extlist[i])) + break; } up_read(&sbi->sb_lock); + + if (i == cold_count + hot_count) + return; + + if (i < cold_count) + file_set_cold(inode); + else + file_set_hot(inode); } int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, -- GitLab From cd692c2d1164df4595f8885d15804e9ef776bb00 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 4 Oct 2018 11:15:19 +0800 Subject: [PATCH 1062/2269] f2fs: remove unused sbi->trigger_ssr_threshold Commit a2a12b679f36 ("f2fs: export SSR allocation threshold") introduced two threshold .min_ssr_sections and .trigger_ssr_threshold, but only .min_ssr_sections is used, so just remove redundant one for cleanup. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cb4e91df892ee..bb028244a5a74 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1221,7 +1221,6 @@ struct f2fs_sb_info { unsigned int total_valid_node_count; /* valid node block count */ loff_t max_file_blocks; /* max block index of file */ int dir_level; /* directory level */ - unsigned int trigger_ssr_threshold; /* threshold to trigger ssr */ int readdir_ra; /* readahead inode in readdir */ block_t user_block_count; /* # of user blocks */ -- GitLab From d2f57f2521a3b3d970db8edfbfbee3b8039f7f9c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 4 Oct 2018 11:15:20 +0800 Subject: [PATCH 1063/2269] f2fs: remove unneeded disable_nat_bits() Commit 7735730d39d7 ("f2fs: fix to propagate error from __get_meta_page()") added disable_nat_bits() in error path of __get_nat_bitmaps(), but it's unneeded, beause we will fail mount, we won't have chance to change nid usage status w/o nat full/empty bitmaps. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index fb8683c9f9477..bea66bd7fdcb1 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2879,10 +2879,8 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) struct page *page; page = f2fs_get_meta_page(sbi, nat_bits_addr++); - if (IS_ERR(page)) { - disable_nat_bits(sbi, true); + if (IS_ERR(page)) return PTR_ERR(page); - } memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS), page_address(page), F2FS_BLKSIZE); -- GitLab From ad27a1630499489b6dc6c8682c311ce7a6c9ccea Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Fri, 5 Oct 2018 10:47:39 +0530 Subject: [PATCH 1064/2269] f2fs: do not update REQ_TIME in case of error conditions The REQ_TIME should be updated only in case of success cases as followed at all other places in the file system. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 2 +- fs/f2fs/file.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index c77a58038709d..e02db5db62dc2 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -655,9 +655,9 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) f2fs_put_page(page, 1); clear_inode_flag(inode, FI_NEW_INODE); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); fail: up_write(&F2FS_I(inode)->i_sem); - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return err; } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 44a5e564609e8..be973e6d1a99d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -106,6 +106,7 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) SetPageUptodate(page); f2fs_update_iostat(sbi, APP_MAPPED_IO, F2FS_BLKSIZE); + f2fs_update_time(sbi, REQ_TIME); trace_f2fs_vm_page_mkwrite(page, DATA); out_sem: @@ -114,7 +115,6 @@ out_sem: f2fs_balance_fs(sbi, dn.node_changed); sb_end_pagefault(inode->i_sb); - f2fs_update_time(sbi, REQ_TIME); err: return block_page_mkwrite_return(err); } -- GitLab From 9ec0c94534cbb3ae0df30aa99ccae4e54e387922 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Fri, 5 Oct 2018 10:47:40 +0530 Subject: [PATCH 1065/2269] f2fs: update REQ_TIME in f2fs_cross_rename() Update REQ_TIME in the missing path - f2fs_cross_rename(). Signed-off-by: Sahitya Tummala [Jaegeuk Kim: add it in f2fs_rename()] Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 7e1490f4098fe..f21a2278f395d 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -1004,6 +1004,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) f2fs_sync_fs(sbi->sb, 1); + + f2fs_update_time(sbi, REQ_TIME); return 0; put_out_dir: @@ -1161,6 +1163,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) f2fs_sync_fs(sbi->sb, 1); + + f2fs_update_time(sbi, REQ_TIME); return 0; out_new_dir: if (new_dir_entry) { -- GitLab From 9816fe54917fc29f9b6c7631f6745fef817ebc62 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 2 Oct 2018 17:20:58 -0700 Subject: [PATCH 1066/2269] f2fs: allow to mount, if quota is failed Since we can use the filesystem without quotas till next boot. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e2291474e56bc..31c3f279e51c5 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3189,11 +3189,9 @@ try_onemore: /* Enable quota usage during mount */ if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) { err = f2fs_enable_quotas(sb); - if (err) { + if (err) f2fs_msg(sb, KERN_ERR, "Cannot turn on quotas: error %d", err); - goto free_sysfs; - } } #endif /* if there are nt orphan nodes free them */ @@ -3294,9 +3292,6 @@ free_meta: * falls into an infinite loop in f2fs_sync_meta_pages(). */ truncate_inode_pages_final(META_MAPPING(sbi)); -#ifdef CONFIG_QUOTA -free_sysfs: -#endif f2fs_unregister_sysfs(sbi); free_root_inode: dput(sb->s_root); -- GitLab From 5a088225311085af2d9d458bcf8a86c9d47f385f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 16 Oct 2018 08:34:50 -0600 Subject: [PATCH 1067/2269] f2fs: remove request_list check in is_idle() This doesn't work on stacked devices, and it doesn't work on blk-mq devices. The request_list is only used on legacy, which we don't have much of anymore, and soon won't have any of. Kill the check. Cc: Jaegeuk Kim Cc: linux-f2fs-devel@lists.sourceforge.net Signed-off-by: Jens Axboe Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index bb028244a5a74..6df0da09d5c68 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1395,13 +1395,6 @@ static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi, static inline bool is_idle(struct f2fs_sb_info *sbi, int type) { - struct block_device *bdev = sbi->sb->s_bdev; - struct request_queue *q = bdev_get_queue(bdev); - struct request_list *rl = &q->root_rl; - - if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC]) - return false; - return f2fs_time_over(sbi, type); } -- GitLab From 91ccc89a4812ee178b4259adf778a420cd4d4bb2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 22 Oct 2018 09:12:51 +0800 Subject: [PATCH 1068/2269] f2fs: fix to account IO correctly Below race can cause reversed reference on dirty count, fix it by relocating __submit_bio() and inc_page_count(). Thread A Thread B - f2fs_inplace_write_data - f2fs_submit_page_bio - __submit_bio - f2fs_write_end_io - dec_page_count - inc_page_count Cc: Fixes: d1b3e72d5490 ("f2fs: submit bio of in-place-update pages") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 606563867e221..d67c8a5029147 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -461,10 +461,10 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) } bio_set_op_attrs(bio, fio->op, fio->op_flags); - __submit_bio(fio->sbi, bio, fio->type); - if (!is_read_io(fio->op)) inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page)); + + __submit_bio(fio->sbi, bio, fio->type); return 0; } -- GitLab From c72400308806a3710cb48d4739e4b94c19c086f8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 22 Oct 2018 23:24:28 +0800 Subject: [PATCH 1069/2269] f2fs: fix to account IO correctly for cgroup writeback Now, we have supported cgroup writeback, it depends on correctly IO account of specified filesystem. But in commit d1b3e72d5490 ("f2fs: submit bio of in-place-update pages"), we split write paths from f2fs_submit_page_mbio() to two: - f2fs_submit_page_bio() for IPU path - f2fs_submit_page_bio() for OPU path But still we account write IO only in f2fs_submit_page_mbio(), result in incorrect IO account, fix it by adding missing IO account in IPU path. Fixes: d1b3e72d5490 ("f2fs: submit bio of in-place-update pages") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d67c8a5029147..4b1e02b1d302c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -459,6 +459,10 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) bio_put(bio); return -EFAULT; } + + if (fio->io_wbc && !is_read_io(fio->op)) + wbc_account_io(fio->io_wbc, page, PAGE_SIZE); + bio_set_op_attrs(bio, fio->op, fio->op_flags); if (!is_read_io(fio->op)) -- GitLab From 23ee3b76ec9b6e76eb2dc9584cc36cbb800351fe Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 16 Oct 2018 10:20:53 -0700 Subject: [PATCH 1070/2269] f2fs: account read IOs and use IO counts for is_idle This patch adds issued read IO counts which is under block layer. Chao modified a bit, since: Below race can cause reversed reference on F2FS_RD_DATA, there is the same issue in f2fs_submit_page_bio(), fix them by relocate __submit_bio() and inc_page_count. Thread A Thread B - f2fs_write_begin - f2fs_submit_page_read - __submit_bio - f2fs_read_end_io - __read_end_io - dec_page_count(, F2FS_RD_DATA) - inc_page_count(, F2FS_RD_DATA) Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 24 ++++++++++++++++++++++-- fs/f2fs/debug.c | 7 ++++++- fs/f2fs/f2fs.h | 22 ++++++++++++++++------ 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 4b1e02b1d302c..cf25a12d3abe1 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -52,6 +52,23 @@ static bool __is_cp_guaranteed(struct page *page) return false; } +static enum count_type __read_io_type(struct page *page) +{ + struct address_space *mapping = page->mapping; + + if (mapping) { + struct inode *inode = mapping->host; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (inode->i_ino == F2FS_META_INO(sbi)) + return F2FS_RD_META; + + if (inode->i_ino == F2FS_NODE_INO(sbi)) + return F2FS_RD_NODE; + } + return F2FS_RD_DATA; +} + /* postprocessing steps for read bios */ enum bio_post_read_step { STEP_INITIAL = 0, @@ -82,6 +99,7 @@ static void __read_end_io(struct bio *bio) } else { SetPageUptodate(page); } + dec_page_count(F2FS_P_SB(page), __read_io_type(page)); unlock_page(page); } if (bio->bi_private) @@ -465,8 +483,8 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) bio_set_op_attrs(bio, fio->op, fio->op_flags); - if (!is_read_io(fio->op)) - inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page)); + inc_page_count(fio->sbi, is_read_io(fio->op) ? + __read_io_type(page): WB_DATA_TYPE(fio->page)); __submit_bio(fio->sbi, bio, fio->type); return 0; @@ -597,6 +615,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, return -EFAULT; } ClearPageError(page); + inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); __submit_bio(F2FS_I_SB(inode), bio, DATA); return 0; } @@ -1585,6 +1604,7 @@ submit_and_realloc: if (bio_add_page(bio, page, blocksize, 0) < blocksize) goto submit_and_realloc; + inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); ClearPageError(page); last_block_in_bio = block_nr; goto next_page; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 026e10f308898..139b4d5c83d5d 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -55,6 +55,9 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt); si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA); si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA); + si->nr_rd_data = get_pages(sbi, F2FS_RD_DATA); + si->nr_rd_node = get_pages(sbi, F2FS_RD_NODE); + si->nr_rd_meta = get_pages(sbi, F2FS_RD_META); if (SM_I(sbi) && SM_I(sbi)->fcc_info) { si->nr_flushed = atomic_read(&SM_I(sbi)->fcc_info->issued_flush); @@ -371,7 +374,9 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", si->ext_tree, si->zombie_tree, si->ext_node); seq_puts(s, "\nBalancing F2FS Async:\n"); - seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), " + seq_printf(s, " - IO_R (Data: %4d, Node: %4d, Meta: %4d\n", + si->nr_rd_data, si->nr_rd_node, si->nr_rd_meta); + seq_printf(s, " - IO_W (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), " "Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n", si->nr_wb_cp_data, si->nr_wb_data, si->nr_flushing, si->nr_flushed, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6df0da09d5c68..7796d0ae7eda6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -951,6 +951,9 @@ enum count_type { F2FS_DIRTY_IMETA, F2FS_WB_CP_DATA, F2FS_WB_DATA, + F2FS_RD_DATA, + F2FS_RD_NODE, + F2FS_RD_META, NR_COUNT_TYPE, }; @@ -1393,11 +1396,6 @@ static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi, return wait_ms; } -static inline bool is_idle(struct f2fs_sb_info *sbi, int type) -{ - return f2fs_time_over(sbi, type); -} - /* * Inline functions */ @@ -1788,7 +1786,9 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) atomic_inc(&sbi->nr_pages[count_type]); if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES || - count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA) + count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA || + count_type == F2FS_RD_DATA || count_type == F2FS_RD_NODE || + count_type == F2FS_RD_META) return; set_sbi_flag(sbi, SBI_IS_DIRTY); @@ -2125,6 +2125,15 @@ static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, return bio_alloc(GFP_KERNEL, npages); } +static inline bool is_idle(struct f2fs_sb_info *sbi, int type) +{ + if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) || + get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) || + get_pages(sbi, F2FS_WB_CP_DATA)) + return false; + return f2fs_time_over(sbi, type); +} + static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, unsigned long index, void *item) { @@ -3115,6 +3124,7 @@ struct f2fs_stat_info { int free_nids, avail_nids, alloc_nids; int total_count, utilization; int bg_gc, nr_wb_cp_data, nr_wb_data; + int nr_rd_data, nr_rd_node, nr_rd_meta; unsigned int io_skip_bggc, other_skip_bggc; int nr_flushing, nr_flushed, flush_list_empty; int nr_discarding, nr_discarded; -- GitLab From 03519c95d8f68c6a41536ef095239497388c047c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 16 Oct 2018 19:30:13 -0700 Subject: [PATCH 1071/2269] Revert "f2fs: fix to clear PG_checked flag in set_page_dirty()" This reverts commit 66110abc4c931f879d70e83e1281f891699364bf. If we clear the cold data flag out of the writeback flow, we can miscount -1 by end_io, which incurs a deadlock caused by all I/Os being blocked during heavy GC. Balancing F2FS Async: - IO (CP: 1, Data: -1, Flush: ( 0 0 1), Discard: ( ... GC thread: IRQ - move_data_page() - set_page_dirty() - clear_cold_data() - f2fs_write_end_io() - type = WB_DATA_TYPE(page); here, we get wrong type - dec_page_count(sbi, type); - f2fs_wait_on_page_writeback() Cc: Reported-and-Tested-by: Park Ju Hyung Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cf25a12d3abe1..1e986beec0eca 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2653,10 +2653,6 @@ static int f2fs_set_data_page_dirty(struct page *page) if (!PageUptodate(page)) SetPageUptodate(page); - /* don't remain PG_checked flag which was set during GC */ - if (is_cold_data(page)) - clear_cold_data(page); - if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) { if (!IS_ATOMIC_WRITTEN_PAGE(page)) { f2fs_register_inmem_page(inode, page); -- GitLab From 096177aa6d3f8677921055ee2e5eedfe594ce801 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 27 Jul 2018 18:15:16 +0800 Subject: [PATCH 1072/2269] f2fs: fix to spread clear_cold_data() We need to drop PG_checked flag on page as well when we clear PG_uptodate flag, in order to avoid treating the page as GCing one later. Signed-off-by: Weichao Guo Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 8 +++++++- fs/f2fs/dir.c | 1 + fs/f2fs/segment.c | 4 +++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1e986beec0eca..e889520707738 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1815,6 +1815,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) /* This page is already truncated */ if (fio->old_blkaddr == NULL_ADDR) { ClearPageUptodate(page); + clear_cold_data(page); goto out_writepage; } got_it: @@ -1990,8 +1991,10 @@ done: out: inode_dec_dirty_pages(inode); - if (err) + if (err) { ClearPageUptodate(page); + clear_cold_data(page); + } if (wbc->for_reclaim) { f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA); @@ -2620,6 +2623,8 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset, } } + clear_cold_data(page); + /* This is atomic written page, keep Private */ if (IS_ATOMIC_WRITTEN_PAGE(page)) return f2fs_drop_inmem_page(inode, page); @@ -2638,6 +2643,7 @@ int f2fs_release_page(struct page *page, gfp_t wait) if (IS_ATOMIC_WRITTEN_PAGE(page)) return 0; + clear_cold_data(page); set_page_private(page, 0); ClearPagePrivate(page); return 1; diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index e02db5db62dc2..2ef84b4590ead 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -730,6 +730,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, clear_page_dirty_for_io(page); ClearPagePrivate(page); ClearPageUptodate(page); + clear_cold_data(page); inode_dec_dirty_pages(dir); f2fs_remove_dirty_inode(dir); } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 805c8310d7b0c..e38b2f69b68e0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -266,8 +266,10 @@ retry: } next: /* we don't need to invalidate this in the sccessful status */ - if (drop || recover) + if (drop || recover) { ClearPageUptodate(page); + clear_cold_data(page); + } set_page_private(page, 0); ClearPagePrivate(page); f2fs_put_page(page, 1); -- GitLab From ea7f111f5aae5e88928d975d210b4c3fa5a52dad Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 7 Oct 2018 19:06:15 +0800 Subject: [PATCH 1073/2269] f2fs: spread f2fs_set_inode_flags() This patch changes codes as below: - use f2fs_set_inode_flags() to update i_flags atomically to avoid potential race. - synchronize F2FS_I(inode)->i_flags to inode->i_flags in f2fs_new_inode(). - use f2fs_set_inode_flags() to simply codes in f2fs_quota_{on,off}. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/namei.c | 2 ++ fs/f2fs/super.c | 5 ++--- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7796d0ae7eda6..7b676a2a94ad1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3423,7 +3423,7 @@ static inline void f2fs_set_encrypted_inode(struct inode *inode) { #ifdef CONFIG_F2FS_FS_ENCRYPTION file_set_encrypt(inode); - inode->i_flags |= S_ENCRYPTED; + f2fs_set_inode_flags(inode); #endif } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index f21a2278f395d..33d36cd9f1912 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -122,6 +122,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); + f2fs_set_inode_flags(inode); + trace_f2fs_new_inode(inode, 0); return inode; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 31c3f279e51c5..70fc498f2447a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1939,8 +1939,7 @@ static int f2fs_quota_on(struct super_block *sb, int type, int format_id, inode_lock(inode); F2FS_I(inode)->i_flags |= F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL; - inode_set_flags(inode, S_NOATIME | S_IMMUTABLE, - S_NOATIME | S_IMMUTABLE); + f2fs_set_inode_flags(inode); inode_unlock(inode); f2fs_mark_inode_dirty_sync(inode, false); @@ -1965,7 +1964,7 @@ static int f2fs_quota_off(struct super_block *sb, int type) inode_lock(inode); F2FS_I(inode)->i_flags &= ~(F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL); - inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE); + f2fs_set_inode_flags(inode); inode_unlock(inode); f2fs_mark_inode_dirty_sync(inode, false); out_put: -- GitLab From 3ea7192afe097fe2c03878ad3f80f44859394d1f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 7 Oct 2018 03:03:38 +0800 Subject: [PATCH 1074/2269] f2fs: fix to recover inode->i_flags of inode block during POR Testcase to reproduce this bug: 1. mkfs.f2fs /dev/sdd 2. mount -t f2fs /dev/sdd /mnt/f2fs 3. touch /mnt/f2fs/file 4. sync 5. chattr +a /mnt/f2fs/file 6. xfs_io -a /mnt/f2fs/file -c "fsync" 7. godown /mnt/f2fs 8. umount /mnt/f2fs 9. mount -t f2fs /dev/sdd /mnt/f2fs 10. xfs_io /mnt/f2fs/file There is no error when opening this file w/o O_APPEND, but actually, we expect the correct result should be: /mnt/f2fs/file: Operation not permitted The root cause is, in recover_inode(), we recover inode->i_flags more than F2FS_I(inode)->i_flags, so fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index b069b7d7403db..7899a9e8a6e01 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -234,6 +234,7 @@ static void recover_inode(struct inode *inode, struct page *page) F2FS_I(inode)->i_advise = raw->i_advise; F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags); + f2fs_set_inode_flags(inode); F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = le16_to_cpu(raw->i_gc_failures); -- GitLab From c315c32ca5a52d1988c8cc22b84fb985ba1dcf76 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Wed, 10 Oct 2018 10:56:22 +0530 Subject: [PATCH 1075/2269] f2fs: fix data corruption issue with hardware encryption Direct IO can be used in case of hardware encryption. The following scenario results into data corruption issue in this path - Thread A - Thread B- -> write file#1 in direct IO -> GC gets kicked in -> GC submitted bio on meta mapping for file#1, but pending completion -> write file#1 again with new data in direct IO -> GC bio gets completed now -> GC writes old data to the new location and thus file#1 is corrupted. Fix this by submitting and waiting for pending io on meta mapping for direct IO case in f2fs_map_blocks(). Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 11 +++++++++++ fs/f2fs/f2fs.h | 2 ++ fs/f2fs/segment.c | 9 +++++++++ 3 files changed, 22 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e889520707738..c016523f18baa 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1052,6 +1052,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, map->m_flags = F2FS_MAP_MAPPED; if (map->m_next_extent) *map->m_next_extent = pgofs + map->m_len; + + /* for hardware encryption, but to avoid potential issue in future */ + if (flag == F2FS_GET_BLOCK_DIO) + f2fs_wait_on_block_writeback_range(inode, + map->m_pblk, map->m_len); goto out; } @@ -1210,6 +1215,12 @@ skip: goto next_dnode; sync_out: + + /* for hardware encryption, but to avoid potential issue in future */ + if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) + f2fs_wait_on_block_writeback_range(inode, + map->m_pblk, map->m_len); + if (flag == F2FS_GET_BLOCK_PRECACHE) { if (map->m_flags & F2FS_MAP_MAPPED) { unsigned int ofs = start_pgofs - map->m_lblk; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7b676a2a94ad1..2846ad61c4d9b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2989,6 +2989,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, void f2fs_wait_on_page_writeback(struct page *page, enum page_type type, bool ordered); void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr); +void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, + block_t len); void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk); void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk); int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e38b2f69b68e0..6edcf8391dd3d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3303,6 +3303,15 @@ void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) } } +void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, + block_t len) +{ + block_t i; + + for (i = 0; i < len; i++) + f2fs_wait_on_block_writeback(inode, blkaddr + i); +} + static int read_compacted_summaries(struct f2fs_sb_info *sbi) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); -- GitLab From dc0088d4743f802479440c97ee1fc573a078df8d Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Fri, 12 Oct 2018 18:49:26 +0800 Subject: [PATCH 1076/2269] f2fs: cleanup dirty pages if recover failed During recover, we will try to create new dentries for inodes with dentry_mark. But if the parent is missing (e.g. killed by fsck), recover will break. But those recovered dirty pages are not cleanup. This will hit f2fs_bug_on: [ 53.519566] F2FS-fs (loop0): Found nat_bits in checkpoint [ 53.539354] F2FS-fs (loop0): recover_inode: ino = 5, name = file, inline = 3 [ 53.539402] F2FS-fs (loop0): recover_dentry: ino = 5, name = file, dir = 0, err = -2 [ 53.545760] F2FS-fs (loop0): Cannot recover all fsync data errno=-2 [ 53.546105] F2FS-fs (loop0): access invalid blkaddr:4294967295 [ 53.546171] WARNING: CPU: 1 PID: 1798 at fs/f2fs/checkpoint.c:163 f2fs_is_valid_blkaddr+0x26c/0x320 [ 53.546174] Modules linked in: [ 53.546183] CPU: 1 PID: 1798 Comm: mount Not tainted 4.19.0-rc2+ #1 [ 53.546186] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 53.546191] RIP: 0010:f2fs_is_valid_blkaddr+0x26c/0x320 [ 53.546195] Code: 85 bb 00 00 00 48 89 df 88 44 24 07 e8 ad a8 db ff 48 8b 3b 44 89 e1 48 c7 c2 40 03 72 a9 48 c7 c6 e0 01 72 a9 e8 84 3c ff ff <0f> 0b 0f b6 44 24 07 e9 8a 00 00 00 48 8d bf 38 01 00 00 e8 7c a8 [ 53.546201] RSP: 0018:ffff88006c067768 EFLAGS: 00010282 [ 53.546208] RAX: 0000000000000000 RBX: ffff880068844200 RCX: ffffffffa83e1a33 [ 53.546211] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88006d51e590 [ 53.546215] RBP: 0000000000000005 R08: ffffed000daa3cb3 R09: ffffed000daa3cb3 [ 53.546218] R10: 0000000000000001 R11: ffffed000daa3cb2 R12: 00000000ffffffff [ 53.546221] R13: ffff88006a1f8000 R14: 0000000000000200 R15: 0000000000000009 [ 53.546226] FS: 00007fb2f3646840(0000) GS:ffff88006d500000(0000) knlGS:0000000000000000 [ 53.546229] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 53.546234] CR2: 00007f0fd77f0008 CR3: 00000000687e6002 CR4: 00000000000206e0 [ 53.546237] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 53.546240] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 53.546242] Call Trace: [ 53.546248] f2fs_submit_page_bio+0x95/0x740 [ 53.546253] read_node_page+0x161/0x1e0 [ 53.546271] ? truncate_node+0x650/0x650 [ 53.546283] ? add_to_page_cache_lru+0x12c/0x170 [ 53.546288] ? pagecache_get_page+0x262/0x2d0 [ 53.546292] __get_node_page+0x200/0x660 [ 53.546302] f2fs_update_inode_page+0x4a/0x160 [ 53.546306] f2fs_write_inode+0x86/0xb0 [ 53.546317] __writeback_single_inode+0x49c/0x620 [ 53.546322] writeback_single_inode+0xe4/0x1e0 [ 53.546326] sync_inode_metadata+0x93/0xd0 [ 53.546330] ? sync_inode+0x10/0x10 [ 53.546342] ? do_raw_spin_unlock+0xed/0x100 [ 53.546347] f2fs_sync_inode_meta+0xe0/0x130 [ 53.546351] f2fs_fill_super+0x287d/0x2d10 [ 53.546367] ? vsnprintf+0x742/0x7a0 [ 53.546372] ? f2fs_commit_super+0x180/0x180 [ 53.546379] ? up_write+0x20/0x40 [ 53.546385] ? set_blocksize+0x5f/0x140 [ 53.546391] ? f2fs_commit_super+0x180/0x180 [ 53.546402] mount_bdev+0x181/0x200 [ 53.546406] mount_fs+0x94/0x180 [ 53.546411] vfs_kern_mount+0x6c/0x1e0 [ 53.546415] do_mount+0xe5e/0x1510 [ 53.546420] ? fs_reclaim_release+0x9/0x30 [ 53.546424] ? copy_mount_string+0x20/0x20 [ 53.546428] ? fs_reclaim_acquire+0xd/0x30 [ 53.546435] ? __might_sleep+0x2c/0xc0 [ 53.546440] ? ___might_sleep+0x53/0x170 [ 53.546453] ? __might_fault+0x4c/0x60 [ 53.546468] ? _copy_from_user+0x95/0xa0 [ 53.546474] ? memdup_user+0x39/0x60 [ 53.546478] ksys_mount+0x88/0xb0 [ 53.546482] __x64_sys_mount+0x5d/0x70 [ 53.546495] do_syscall_64+0x65/0x130 [ 53.546503] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 53.547639] ---[ end trace b804d1ea2fec893e ]--- So if recover fails, we need to drop all recovered data. Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 32 +++++++++++++++++++++----------- fs/f2fs/super.c | 15 ++++++++++++++- 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 7899a9e8a6e01..b7173ae30c75f 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -96,8 +96,12 @@ err_out: return ERR_PTR(err); } -static void del_fsync_inode(struct fsync_inode_entry *entry) +static void del_fsync_inode(struct fsync_inode_entry *entry, int drop) { + if (drop) { + /* inode should not be recovered, drop it */ + f2fs_inode_synced(entry->inode); + } iput(entry->inode); list_del(&entry->list); kmem_cache_free(fsync_entry_slab, entry); @@ -338,12 +342,12 @@ next: return err; } -static void destroy_fsync_dnodes(struct list_head *head) +static void destroy_fsync_dnodes(struct list_head *head, int drop) { struct fsync_inode_entry *entry, *tmp; list_for_each_entry_safe(entry, tmp, head, list) - del_fsync_inode(entry); + del_fsync_inode(entry, drop); } static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, @@ -580,7 +584,7 @@ out: } static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, - struct list_head *dir_list) + struct list_head *tmp_inode_list, struct list_head *dir_list) { struct curseg_info *curseg; struct page *page = NULL; @@ -634,7 +638,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, } if (entry->blkaddr == blkaddr) - del_fsync_inode(entry); + list_move_tail(&entry->list, tmp_inode_list); next: /* check next segment */ blkaddr = next_blkaddr_of_node(page); @@ -647,7 +651,7 @@ next: int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) { - struct list_head inode_list; + struct list_head inode_list, tmp_inode_list; struct list_head dir_list; int err; int ret = 0; @@ -678,6 +682,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) } INIT_LIST_HEAD(&inode_list); + INIT_LIST_HEAD(&tmp_inode_list); INIT_LIST_HEAD(&dir_list); /* prevent checkpoint */ @@ -696,11 +701,16 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) need_writecp = true; /* step #2: recover data */ - err = recover_data(sbi, &inode_list, &dir_list); + err = recover_data(sbi, &inode_list, &tmp_inode_list, &dir_list); if (!err) f2fs_bug_on(sbi, !list_empty(&inode_list)); + else { + /* restore s_flags to let iput() trash data */ + sbi->sb->s_flags = s_flags; + } skip: - destroy_fsync_dnodes(&inode_list); + destroy_fsync_dnodes(&inode_list, err); + destroy_fsync_dnodes(&tmp_inode_list, err); /* truncate meta pages to be used by the recovery */ truncate_inode_pages_range(META_MAPPING(sbi), @@ -709,13 +719,13 @@ skip: if (err) { truncate_inode_pages_final(NODE_MAPPING(sbi)); truncate_inode_pages_final(META_MAPPING(sbi)); + } else { + clear_sbi_flag(sbi, SBI_POR_DOING); } - - clear_sbi_flag(sbi, SBI_POR_DOING); mutex_unlock(&sbi->cp_mutex); /* let's drop all the directory inodes for clean checkpoint */ - destroy_fsync_dnodes(&dir_list); + destroy_fsync_dnodes(&dir_list, err); if (need_writecp) { set_sbi_flag(sbi, SBI_IS_RECOVERED); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 70fc498f2447a..89cf48b416bb7 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1991,6 +1991,19 @@ void f2fs_quota_off_umount(struct super_block *sb) } } +static void f2fs_truncate_quota_inode_pages(struct super_block *sb) +{ + struct quota_info *dqopt = sb_dqopt(sb); + int type; + + for (type = 0; type < MAXQUOTAS; type++) { + if (!dqopt->files[type]) + continue; + f2fs_inode_synced(dqopt->files[type]); + } +} + + static int f2fs_get_projid(struct inode *inode, kprojid_t *projid) { *projid = F2FS_I(inode)->i_projid; @@ -3280,10 +3293,10 @@ skip_recovery: free_meta: #ifdef CONFIG_QUOTA + f2fs_truncate_quota_inode_pages(sb); if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) f2fs_quota_off_umount(sbi->sb); #endif - f2fs_sync_inode_meta(sbi); /* * Some dirty meta pages can be produced by f2fs_recover_orphan_inodes() * failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg() -- GitLab From 4eb89a77bc2010d81626e9d16f943538820b4e34 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 20 Sep 2018 20:05:00 +0800 Subject: [PATCH 1077/2269] f2fs: guarantee journalled quota data by checkpoint For journalled quota mode, let checkpoint to flush dquot dirty data and quota file data to guarntee persistence of all quota sysfile in last checkpoint, by this way, we can avoid corrupting quota sysfile when encountering SPO. The implementation is as below: 1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is cached dquot metadata changes in quota subsystem, and later checkpoint should: a) flush dquot metadata into quota file. b) flush quota file to storage to keep file usage be consistent. 2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota operation failed due to -EIO or -ENOSPC, so later, a) checkpoint will skip syncing dquot metadata. b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a hint for fsck repairing. 3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota data updating is very heavy, it may cause hungtask in block_operation(). To avoid this, if our retry time exceed threshold, let's just skip flushing and retry in next checkpoint(). Signed-off-by: Weichao Guo Signed-off-by: Chao Yu [Jaegeuk Kim: avoid warnings and set fsck flag] Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 62 ++++++++++++++++++-- fs/f2fs/data.c | 16 ++++-- fs/f2fs/f2fs.h | 49 +++++++++++++--- fs/f2fs/file.c | 31 +++++++--- fs/f2fs/inline.c | 4 +- fs/f2fs/inode.c | 11 +++- fs/f2fs/namei.c | 4 -- fs/f2fs/recovery.c | 43 +++++++++++++- fs/f2fs/super.c | 121 ++++++++++++++++++++++++++++++++++++---- include/linux/f2fs_fs.h | 1 + 10 files changed, 294 insertions(+), 48 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index e4c4bef78e13a..6d72a47162699 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1079,6 +1079,21 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi) ckpt->next_free_nid = cpu_to_le32(last_nid); } +static bool __need_flush_quota(struct f2fs_sb_info *sbi) +{ + if (!is_journalled_quota(sbi)) + return false; + if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) + return false; + if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) + return false; + if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH)) + return true; + if (get_pages(sbi, F2FS_DIRTY_QDATA)) + return true; + return false; +} + /* * Freeze all the FS-operations for checkpoint. */ @@ -1090,12 +1105,36 @@ static int block_operations(struct f2fs_sb_info *sbi) .for_reclaim = 0, }; struct blk_plug plug; - int err = 0; + int err = 0, cnt = 0; blk_start_plug(&plug); -retry_flush_dents: +retry_flush_quotas: + if (__need_flush_quota(sbi)) { + int locked; + + if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) { + set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH); + f2fs_lock_all(sbi); + goto retry_flush_dents; + } + clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); + + /* only failed during mount/umount/freeze/quotactl */ + locked = down_read_trylock(&sbi->sb->s_umount); + f2fs_quota_sync(sbi->sb, -1); + if (locked) + up_read(&sbi->sb->s_umount); + } + f2fs_lock_all(sbi); + if (__need_flush_quota(sbi)) { + f2fs_unlock_all(sbi); + cond_resched(); + goto retry_flush_quotas; + } + +retry_flush_dents: /* write all the dirty dentry pages */ if (get_pages(sbi, F2FS_DIRTY_DENTS)) { f2fs_unlock_all(sbi); @@ -1103,7 +1142,7 @@ retry_flush_dents: if (err) goto out; cond_resched(); - goto retry_flush_dents; + goto retry_flush_quotas; } /* @@ -1112,6 +1151,12 @@ retry_flush_dents: */ down_write(&sbi->node_change); + if (__need_flush_quota(sbi)) { + up_write(&sbi->node_change); + f2fs_unlock_all(sbi); + goto retry_flush_quotas; + } + if (get_pages(sbi, F2FS_DIRTY_IMETA)) { up_write(&sbi->node_change); f2fs_unlock_all(sbi); @@ -1119,7 +1164,7 @@ retry_flush_dents: if (err) goto out; cond_resched(); - goto retry_flush_dents; + goto retry_flush_quotas; } retry_flush_nodes: @@ -1215,6 +1260,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) else __clear_ckpt_flags(ckpt, CP_DISABLED_FLAG); + if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) + __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); + else + __clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); + + if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) + __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); + /* set this flag to activate crc|cp_ver for recovery */ __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG); __clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG); @@ -1422,6 +1475,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) clear_sbi_flag(sbi, SBI_IS_DIRTY); clear_sbi_flag(sbi, SBI_NEED_CP); + clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH); sbi->unusable_block_count = 0; __set_cp_next_pack(sbi); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c016523f18baa..15a4a5703a39c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -46,7 +46,7 @@ static bool __is_cp_guaranteed(struct page *page) inode->i_ino == F2FS_NODE_INO(sbi) || S_ISDIR(inode->i_mode) || (S_ISREG(inode->i_mode) && - is_inode_flag_set(inode, FI_ATOMIC_FILE)) || + (f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) || is_cold_data(page)) return true; return false; @@ -1765,6 +1765,8 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) return true; if (S_ISDIR(inode->i_mode)) return true; + if (IS_NOQUOTA(inode)) + return true; if (f2fs_is_atomic_file(inode)) return true; if (fio) { @@ -2015,7 +2017,7 @@ out: } unlock_page(page); - if (!S_ISDIR(inode->i_mode)) + if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode)) f2fs_balance_fs(sbi, need_balance_fs); if (unlikely(f2fs_cp_error(sbi))) { @@ -2206,6 +2208,8 @@ static inline bool __should_serialize_io(struct inode *inode, { if (!S_ISREG(inode->i_mode)) return false; + if (IS_NOQUOTA(inode)) + return false; if (wbc->sync_mode != WB_SYNC_ALL) return true; if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks) @@ -2235,7 +2239,8 @@ static int __f2fs_write_data_pages(struct address_space *mapping, if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto skip_write; - if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && + if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) && + wbc->sync_mode == WB_SYNC_NONE && get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) && f2fs_available_free_memory(sbi, DIRTY_DENTS)) goto skip_write; @@ -2300,7 +2305,7 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to) down_write(&F2FS_I(inode)->i_mmap_sem); truncate_pagecache(inode, i_size); - f2fs_truncate_blocks(inode, i_size, true); + f2fs_truncate_blocks(inode, i_size, true, true); up_write(&F2FS_I(inode)->i_mmap_sem); up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); @@ -2439,7 +2444,8 @@ repeat: if (err) goto fail; - if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) { + if (need_balance && !IS_NOQUOTA(inode) && + has_not_enough_free_secs(sbi, 0, 0)) { unlock_page(page); f2fs_balance_fs(sbi, true); lock_page(page); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2846ad61c4d9b..1dc72bd17a32e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -532,6 +532,9 @@ enum { #define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO count */ +/* maximum retry quota flush count */ +#define DEFAULT_RETRY_QUOTA_FLUSH_COUNT 8 + #define F2FS_LINK_MAX 0xffffffff /* maximum link count per file */ #define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ @@ -1100,6 +1103,9 @@ enum { SBI_IS_SHUTDOWN, /* shutdown by ioctl */ SBI_IS_RECOVERED, /* recovered orphan/data */ SBI_CP_DISABLED, /* CP was disabled last mount */ + SBI_QUOTA_NEED_FLUSH, /* need to flush quota info in CP */ + SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */ + SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */ }; enum { @@ -1924,12 +1930,18 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, { block_t valid_block_count; unsigned int valid_node_count; - bool quota = inode && !is_inode; + int err; - if (quota) { - int ret = dquot_reserve_block(inode, 1); - if (ret) - return ret; + if (is_inode) { + if (inode) { + err = dquot_alloc_inode(inode); + if (err) + return err; + } + } else { + err = dquot_reserve_block(inode, 1); + if (err) + return err; } if (time_to_inject(sbi, FAULT_BLOCK)) { @@ -1973,8 +1985,12 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, return 0; enospc: - if (quota) + if (is_inode) { + if (inode) + dquot_free_inode(inode); + } else { dquot_release_reservation_block(inode, 1); + } return -ENOSPC; } @@ -1995,7 +2011,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, spin_unlock(&sbi->stat_lock); - if (!is_inode) + if (is_inode) + dquot_free_inode(inode); + else f2fs_i_blocks_write(inode, 1, false, true); } @@ -2783,7 +2801,8 @@ static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi, */ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); void f2fs_truncate_data_blocks(struct dnode_of_data *dn); -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock); +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock, + bool buf_write); int f2fs_truncate(struct inode *inode); int f2fs_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags); @@ -2871,6 +2890,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) int f2fs_inode_dirtied(struct inode *inode, bool sync); void f2fs_inode_synced(struct inode *inode); int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly); +int f2fs_quota_sync(struct super_block *sb, int type); void f2fs_quota_off_umount(struct super_block *sb); int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover); int f2fs_sync_fs(struct super_block *sb, int sync); @@ -3565,3 +3585,16 @@ extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, #endif #endif + +static inline bool is_journalled_quota(struct f2fs_sb_info *sbi) +{ +#ifdef CONFIG_QUOTA + if (f2fs_sb_has_quota_ino(sbi->sb)) + return true; + if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] || + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) + return true; +#endif + return false; +} diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index be973e6d1a99d..126730d6faaf2 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -586,7 +586,8 @@ truncate_out: return 0; } -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock, + bool buf_write) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; @@ -594,6 +595,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) int count = 0, err = 0; struct page *ipage; bool truncate_page = false; + int flag = buf_write ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO; trace_f2fs_truncate_blocks_enter(inode, from); @@ -603,7 +605,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) goto free_partial; if (lock) - f2fs_lock_op(sbi); + __do_map_lock(sbi, flag, true); ipage = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(ipage)) { @@ -641,7 +643,7 @@ free_next: err = f2fs_truncate_inode_blocks(inode, free_from); out: if (lock) - f2fs_unlock_op(sbi); + __do_map_lock(sbi, flag, false); free_partial: /* lastly zero out the first data page */ if (!err) @@ -676,7 +678,7 @@ int f2fs_truncate(struct inode *inode) return err; } - err = f2fs_truncate_blocks(inode, i_size_read(inode), true); + err = f2fs_truncate_blocks(inode, i_size_read(inode), true, false); if (err) return err; @@ -785,9 +787,24 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) !uid_eq(attr->ia_uid, inode->i_uid)) || (attr->ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) { + f2fs_lock_op(F2FS_I_SB(inode)); err = dquot_transfer(inode, attr); - if (err) + if (err) { + set_sbi_flag(F2FS_I_SB(inode), + SBI_QUOTA_NEED_REPAIR); + f2fs_unlock_op(F2FS_I_SB(inode)); return err; + } + /* + * update uid/gid under lock_op(), so that dquot and inode can + * be updated atomically. + */ + if (attr->ia_valid & ATTR_UID) + inode->i_uid = attr->ia_uid; + if (attr->ia_valid & ATTR_GID) + inode->i_gid = attr->ia_gid; + f2fs_mark_inode_dirty_sync(inode, true); + f2fs_unlock_op(F2FS_I_SB(inode)); } if (attr->ia_valid & ATTR_SIZE) { @@ -1242,7 +1259,7 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) new_size = i_size_read(inode) - len; truncate_pagecache(inode, new_size); - ret = f2fs_truncate_blocks(inode, new_size, true); + ret = f2fs_truncate_blocks(inode, new_size, true, false); up_write(&F2FS_I(inode)->i_mmap_sem); if (!ret) f2fs_i_size_write(inode, new_size); @@ -1427,7 +1444,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) f2fs_balance_fs(sbi, true); down_write(&F2FS_I(inode)->i_mmap_sem); - ret = f2fs_truncate_blocks(inode, i_size_read(inode), true); + ret = f2fs_truncate_blocks(inode, i_size_read(inode), true, false); up_write(&F2FS_I(inode)->i_mmap_sem); if (ret) return ret; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 425d740f87fdc..cb31a719b0488 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -298,7 +298,7 @@ process_inline: clear_inode_flag(inode, FI_INLINE_DATA); f2fs_put_page(ipage, 1); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { - if (f2fs_truncate_blocks(inode, 0, false)) + if (f2fs_truncate_blocks(inode, 0, false, false)) return false; goto process_inline; } @@ -470,7 +470,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry) return 0; punch_dentry_pages: truncate_inode_pages(&dir->i_data, 0); - f2fs_truncate_blocks(dir, 0, false); + f2fs_truncate_blocks(dir, 0, false, false); f2fs_remove_dirty_inode(dir); return err; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 57a7a15239d69..91ceee0ed4c40 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -654,7 +654,11 @@ void f2fs_evict_inode(struct inode *inode) if (inode->i_nlink || is_bad_inode(inode)) goto no_delete; - dquot_initialize(inode); + err = dquot_initialize(inode); + if (err) { + err = 0; + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + } f2fs_remove_ino_entry(sbi, inode->i_ino, APPEND_INO); f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); @@ -686,9 +690,10 @@ retry: goto retry; } - if (err) + if (err) { f2fs_update_inode_page(inode); - dquot_free_inode(inode); + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + } sb_end_intwrite(inode->i_sb); no_delete: dquot_drop(inode); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 33d36cd9f1912..aa53748e426a0 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -72,10 +72,6 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (err) goto fail_drop; - err = dquot_alloc_inode(inode); - if (err) - goto fail_drop; - set_inode_flag(inode, FI_NEW_INODE); /* If the directory encrypted, then we should encrypt the inode. */ diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index b7173ae30c75f..58e952d2d3ef7 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -195,6 +195,33 @@ out: return err; } +static int recover_quota_data(struct inode *inode, struct page *page) +{ + struct f2fs_inode *raw = F2FS_INODE(page); + struct iattr attr; + uid_t i_uid = le32_to_cpu(raw->i_uid); + gid_t i_gid = le32_to_cpu(raw->i_gid); + int err; + + memset(&attr, 0, sizeof(attr)); + + attr.ia_uid = make_kuid(inode->i_sb->s_user_ns, i_uid); + attr.ia_gid = make_kgid(inode->i_sb->s_user_ns, i_gid); + + if (!uid_eq(attr.ia_uid, inode->i_uid)) + attr.ia_valid |= ATTR_UID; + if (!gid_eq(attr.ia_gid, inode->i_gid)) + attr.ia_valid |= ATTR_GID; + + if (!attr.ia_valid) + return 0; + + err = dquot_transfer(inode, &attr); + if (err) + set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR); + return err; +} + static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri) { if (ri->i_inline & F2FS_PIN_FILE) @@ -207,12 +234,18 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri) clear_inode_flag(inode, FI_DATA_EXIST); } -static void recover_inode(struct inode *inode, struct page *page) +static int recover_inode(struct inode *inode, struct page *page) { struct f2fs_inode *raw = F2FS_INODE(page); char *name; + int err; inode->i_mode = le16_to_cpu(raw->i_mode); + + err = recover_quota_data(inode, page); + if (err) + return err; + i_uid_write(inode, le32_to_cpu(raw->i_uid)); i_gid_write(inode, le32_to_cpu(raw->i_gid)); @@ -254,6 +287,7 @@ static void recover_inode(struct inode *inode, struct page *page) f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s, inline = %x", ino_of_node(page), name, raw->i_inline); + return 0; } static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, @@ -622,8 +656,11 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, * In this case, we can lose the latest inode(x). * So, call recover_inode for the inode update. */ - if (IS_INODE(page)) - recover_inode(entry->inode, page); + if (IS_INODE(page)) { + err = recover_inode(entry->inode, page); + if (err) + break; + } if (entry->last_dentry == blkaddr) { err = recover_dentry(entry->inode, page, dir_list); if (err) { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 89cf48b416bb7..ec91486957644 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1711,6 +1711,7 @@ repeat: congestion_wait(BLK_RW_ASYNC, HZ/50); goto repeat; } + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); return PTR_ERR(page); } @@ -1722,6 +1723,7 @@ repeat: } if (unlikely(!PageUptodate(page))) { f2fs_put_page(page, 1); + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); return -EIO; } @@ -1763,6 +1765,7 @@ retry: congestion_wait(BLK_RW_ASYNC, HZ/50); goto retry; } + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); break; } @@ -1799,6 +1802,12 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode) static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type) { + if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) { + f2fs_msg(sbi->sb, KERN_ERR, + "quota sysfile may be corrupted, skip loading it"); + return 0; + } + return dquot_quota_on_mount(sbi->sb, F2FS_OPTION(sbi).s_qf_names[type], F2FS_OPTION(sbi).s_jquota_fmt, type); } @@ -1869,7 +1878,14 @@ static int f2fs_enable_quotas(struct super_block *sb) test_opt(F2FS_SB(sb), PRJQUOTA), }; - sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY; + if (is_set_ckpt_flags(F2FS_SB(sb), CP_QUOTA_NEED_FSCK_FLAG)) { + f2fs_msg(sb, KERN_ERR, + "quota file may be corrupted, skip loading it"); + return 0; + } + + sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; + for (type = 0; type < MAXQUOTAS; type++) { qf_inum = f2fs_qf_ino(sb, type); if (qf_inum) { @@ -1883,6 +1899,8 @@ static int f2fs_enable_quotas(struct super_block *sb) "fsck to fix.", type, err); for (type--; type >= 0; type--) dquot_quota_off(sb, type); + set_sbi_flag(F2FS_SB(sb), + SBI_QUOTA_NEED_REPAIR); return err; } } @@ -1890,35 +1908,51 @@ static int f2fs_enable_quotas(struct super_block *sb) return 0; } -static int f2fs_quota_sync(struct super_block *sb, int type) +int f2fs_quota_sync(struct super_block *sb, int type) { + struct f2fs_sb_info *sbi = F2FS_SB(sb); struct quota_info *dqopt = sb_dqopt(sb); int cnt; int ret; ret = dquot_writeback_dquots(sb, type); if (ret) - return ret; + goto out; /* * Now when everything is written we can discard the pagecache so * that userspace sees the changes. */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + struct address_space *mapping; + if (type != -1 && cnt != type) continue; if (!sb_has_quota_active(sb, cnt)) continue; - ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping); + mapping = dqopt->files[cnt]->i_mapping; + + ret = filemap_fdatawrite(mapping); if (ret) - return ret; + goto out; + + /* if we are using journalled quota */ + if (is_journalled_quota(sbi)) + continue; + + ret = filemap_fdatawait(mapping); + if (ret) + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); inode_lock(dqopt->files[cnt]); truncate_inode_pages(&dqopt->files[cnt]->i_data, 0); inode_unlock(dqopt->files[cnt]); } - return 0; +out: + if (ret) + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); + return ret; } static int f2fs_quota_on(struct super_block *sb, int type, int format_id, @@ -1986,7 +2020,7 @@ void f2fs_quota_off_umount(struct super_block *sb) "Fail to turn off disk quota " "(type: %d, err: %d, ret:%d), Please " "run fsck to fix it.", type, err, ret); - set_sbi_flag(F2FS_SB(sb), SBI_NEED_FSCK); + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); } } } @@ -2003,6 +2037,61 @@ static void f2fs_truncate_quota_inode_pages(struct super_block *sb) } } +static int f2fs_dquot_commit(struct dquot *dquot) +{ + int ret; + + ret = dquot_commit(dquot); + if (ret < 0) + set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR); + return ret; +} + +static int f2fs_dquot_acquire(struct dquot *dquot) +{ + int ret; + + ret = dquot_acquire(dquot); + if (ret < 0) + set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR); + + return ret; +} + +static int f2fs_dquot_release(struct dquot *dquot) +{ + int ret; + + ret = dquot_release(dquot); + if (ret < 0) + set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR); + return ret; +} + +static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot) +{ + struct super_block *sb = dquot->dq_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + int ret; + + ret = dquot_mark_dquot_dirty(dquot); + + /* if we are using journalled quota */ + if (is_journalled_quota(sbi)) + set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); + + return ret; +} + +static int f2fs_dquot_commit_info(struct super_block *sb, int type) +{ + int ret; + + ret = dquot_commit_info(sb, type); + if (ret < 0) + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); + return ret; +} static int f2fs_get_projid(struct inode *inode, kprojid_t *projid) { @@ -2012,11 +2101,11 @@ static int f2fs_get_projid(struct inode *inode, kprojid_t *projid) static const struct dquot_operations f2fs_quota_operations = { .get_reserved_space = f2fs_get_reserved_space, - .write_dquot = dquot_commit, - .acquire_dquot = dquot_acquire, - .release_dquot = dquot_release, - .mark_dirty = dquot_mark_dquot_dirty, - .write_info = dquot_commit_info, + .write_dquot = f2fs_dquot_commit, + .acquire_dquot = f2fs_dquot_acquire, + .release_dquot = f2fs_dquot_release, + .mark_dirty = f2fs_dquot_mark_dquot_dirty, + .write_info = f2fs_dquot_commit_info, .alloc_dquot = dquot_alloc, .destroy_dquot = dquot_destroy, .get_projid = f2fs_get_projid, @@ -2034,6 +2123,11 @@ static const struct quotactl_ops f2fs_quotactl_ops = { .get_nextdqblk = dquot_get_next_dqblk, }; #else +int f2fs_quota_sync(struct super_block *sb, int type) +{ + return 0; +} + void f2fs_quota_off_umount(struct super_block *sb) { } @@ -3104,6 +3198,9 @@ try_onemore: goto free_meta_inode; } + if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_QUOTA_NEED_FSCK_FLAG)) + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + /* Initialize device list */ err = f2fs_scan_devices(sbi); if (err) { diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 8b9c7dc0260c6..d7711048ef93b 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -117,6 +117,7 @@ struct f2fs_super_block { * For checkpoint */ #define CP_DISABLED_FLAG 0x00001000 +#define CP_QUOTA_NEED_FSCK_FLAG 0x00000800 #define CP_LARGE_NAT_BITMAP_FLAG 0x00000400 #define CP_NOCRC_RECOVERY_FLAG 0x00000200 #define CP_TRIMMED_FLAG 0x00000100 -- GitLab From 34983b0428cac0e9ec793022b1998adfb57d5b3c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Sep 2018 15:36:02 +0800 Subject: [PATCH 1078/2269] f2fs: fix to keep project quota consistent This patch does below changes to keep consistence of project quota data in sudden power-cut case: - update inode.i_projid and project quota atomically under lock_op() in f2fs_ioc_setproject() - recover inode.i_projid and project quota in recover_inode() Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 37 ++++++++++++++++++++++++++++--------- fs/f2fs/recovery.c | 12 ++++++++++-- 3 files changed, 39 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1dc72bd17a32e..9c7510c98481e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2812,6 +2812,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count); int f2fs_precache_extents(struct inode *inode); long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid); int f2fs_pin_file_control(struct inode *inode, bool inc); /* diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 126730d6faaf2..ad83382078806 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2613,13 +2613,29 @@ static int f2fs_ioc_get_features(struct file *filp, unsigned long arg) } #ifdef CONFIG_QUOTA +int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid) +{ + struct dquot *transfer_to[MAXQUOTAS] = {}; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct super_block *sb = sbi->sb; + int err = 0; + + transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); + if (!IS_ERR(transfer_to[PRJQUOTA])) { + err = __dquot_transfer(inode, transfer_to); + if (err) + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + dqput(transfer_to[PRJQUOTA]); + } + return err; +} + static int f2fs_ioc_setproject(struct file *filp, __u32 projid) { struct inode *inode = file_inode(filp); struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct super_block *sb = sbi->sb; - struct dquot *transfer_to[MAXQUOTAS] = {}; struct page *ipage; kprojid_t kprojid; int err; @@ -2660,21 +2676,24 @@ static int f2fs_ioc_setproject(struct file *filp, __u32 projid) if (err) return err; - transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); - if (!IS_ERR(transfer_to[PRJQUOTA])) { - err = __dquot_transfer(inode, transfer_to); - dqput(transfer_to[PRJQUOTA]); - if (err) - goto out_dirty; - } + f2fs_lock_op(sbi); + err = f2fs_transfer_project_quota(inode, kprojid); + if (err) + goto out_unlock; F2FS_I(inode)->i_projid = kprojid; inode->i_ctime = current_time(inode); -out_dirty: f2fs_mark_inode_dirty_sync(inode, true); +out_unlock: + f2fs_unlock_op(sbi); return err; } #else +int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid) +{ + return 0; +} + static int f2fs_ioc_setproject(struct file *filp, __u32 projid) { if (projid != F2FS_DEF_PROJID) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 58e952d2d3ef7..7e9f0b7a1f406 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -254,10 +254,18 @@ static int recover_inode(struct inode *inode, struct page *page) F2FS_FITS_IN_INODE(raw, le16_to_cpu(raw->i_extra_isize), i_projid)) { projid_t i_projid; + kprojid_t kprojid; i_projid = (projid_t)le32_to_cpu(raw->i_projid); - F2FS_I(inode)->i_projid = - make_kprojid(&init_user_ns, i_projid); + kprojid = make_kprojid(&init_user_ns, i_projid); + + if (!projid_eq(kprojid, F2FS_I(inode)->i_projid)) { + err = f2fs_transfer_project_quota(inode, + kprojid); + if (err) + return err; + F2FS_I(inode)->i_projid = kprojid; + } } } -- GitLab From 12064f3a794e67c81d2bae15606f03f436630e88 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Fri, 6 Jul 2018 16:24:27 -0700 Subject: [PATCH 1079/2269] ANDROID: sdcardfs: Add option to drop unused dentries This adds the nocache mount option, which will cause sdcardfs to always drop dentries that are not in use, preventing cached entries from holding on to lower dentries, which could cause strange behavior when bypassing the sdcardfs layer and directly changing the lower fs. Change-Id: I70268584a20b989ae8cfdd278a2e4fa1605217fb Signed-off-by: Daniel Rosenberg --- fs/sdcardfs/dentry.c | 7 +++++++ fs/sdcardfs/main.c | 6 ++++++ fs/sdcardfs/sdcardfs.h | 1 + fs/sdcardfs/super.c | 2 ++ 4 files changed, 16 insertions(+) diff --git a/fs/sdcardfs/dentry.c b/fs/sdcardfs/dentry.c index 776d549b397b3..cb573f1efbfc4 100644 --- a/fs/sdcardfs/dentry.c +++ b/fs/sdcardfs/dentry.c @@ -123,6 +123,12 @@ out: return err; } +/* 1 = delete, 0 = cache */ +static int sdcardfs_d_delete(const struct dentry *d) +{ + return SDCARDFS_SB(d->d_sb)->options.nocache ? 1 : 0; +} + static void sdcardfs_d_release(struct dentry *dentry) { if (!dentry || !dentry->d_fsdata) @@ -181,6 +187,7 @@ static void sdcardfs_canonical_path(const struct path *path, const struct dentry_operations sdcardfs_ci_dops = { .d_revalidate = sdcardfs_d_revalidate, + .d_delete = sdcardfs_d_delete, .d_release = sdcardfs_d_release, .d_hash = sdcardfs_hash_ci, .d_compare = sdcardfs_cmp_ci, diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index 27ec726e7a464..ba52af8644cc8 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -34,6 +34,7 @@ enum { Opt_reserved_mb, Opt_gid_derivation, Opt_default_normal, + Opt_nocache, Opt_err, }; @@ -48,6 +49,7 @@ static const match_table_t sdcardfs_tokens = { {Opt_gid_derivation, "derive_gid"}, {Opt_default_normal, "default_normal"}, {Opt_reserved_mb, "reserved_mb=%u"}, + {Opt_nocache, "nocache"}, {Opt_err, NULL} }; @@ -71,6 +73,7 @@ static int parse_options(struct super_block *sb, char *options, int silent, /* by default, gid derivation is off */ opts->gid_derivation = false; opts->default_normal = false; + opts->nocache = false; *debug = 0; @@ -128,6 +131,9 @@ static int parse_options(struct super_block *sb, char *options, int silent, case Opt_default_normal: opts->default_normal = true; break; + case Opt_nocache: + opts->nocache = true; + break; /* unknown option */ default: if (!silent) diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index ec2290a16d3cb..c2a16a023f2cf 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -198,6 +198,7 @@ struct sdcardfs_mount_options { bool gid_derivation; bool default_normal; unsigned int reserved_mb; + bool nocache; }; struct sdcardfs_vfsmount_options { diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c index cffcdb11cb8ac..140696ed3ed3b 100644 --- a/fs/sdcardfs/super.c +++ b/fs/sdcardfs/super.c @@ -311,6 +311,8 @@ static int sdcardfs_show_options(struct vfsmount *mnt, struct seq_file *m, seq_puts(m, ",default_normal"); if (opts->reserved_mb != 0) seq_printf(m, ",reserved=%uMB", opts->reserved_mb); + if (opts->nocache) + seq_printf(m, ",nocache"); return 0; }; -- GitLab From 3b5d512831af3a43303d1c3615a0a7d6c693bce9 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 1 Aug 2018 13:45:11 +0200 Subject: [PATCH 1080/2269] xfrm: Validate address prefix lengths in the xfrm selector. [ Upstream commit 07bf7908950a8b14e81aa1807e3c667eab39287a ] We don't validate the address prefix lengths in the xfrm selector we got from userspace. This can lead to undefined behaviour in the address matching functions if the prefix is too big for the given address family. Fix this by checking the prefixes and refuse SA/policy insertation when a prefix is invalid. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Air Icy Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_user.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 5554d28a32eb1..4292347bf45e7 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -151,10 +151,16 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, err = -EINVAL; switch (p->family) { case AF_INET: + if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32) + goto out; + break; case AF_INET6: #if IS_ENABLED(CONFIG_IPV6) + if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128) + goto out; + break; #else err = -EAFNOSUPPORT; @@ -1353,10 +1359,16 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) switch (p->sel.family) { case AF_INET: + if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32) + return -EINVAL; + break; case AF_INET6: #if IS_ENABLED(CONFIG_IPV6) + if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128) + return -EINVAL; + break; #else return -EAFNOSUPPORT; -- GitLab From 2a55e64d5c5e98e425f7eafd2c480440b4aa78c2 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 31 Aug 2018 08:38:49 -0300 Subject: [PATCH 1081/2269] xfrm6: call kfree_skb when skb is toobig [ Upstream commit 215ab0f021c9fea3c18b75e7d522400ee6a49990 ] After commit d6990976af7c5d8f55903bfb4289b6fb030bf754 ("vti6: fix PMTU caching and reporting on xmit"), some too big skbs might be potentially passed down to __xfrm6_output, causing it to fail to transmit but not free the skb, causing a leak of skb, and consequentially a leak of dst references. After running pmtu.sh, that shows as failure to unregister devices in a namespace: [ 311.397671] unregister_netdevice: waiting for veth_b to become free. Usage count = 1 The fix is to call kfree_skb in case of transmit failures. Fixes: dd767856a36e ("xfrm6: Don't call icmpv6_send on local error") Signed-off-by: Thadeu Lima de Souza Cascardo Reviewed-by: Sabrina Dubroca Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/ipv6/xfrm6_output.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 8ae87d4ec5ff6..29dae7f2ff14d 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -170,9 +170,11 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (toobig && xfrm6_local_dontfrag(skb)) { xfrm6_local_rxpmtu(skb, mtu); + kfree_skb(skb); return -EMSGSIZE; } else if (!skb->ignore_df && toobig && skb->sk) { xfrm_local_error(skb, mtu); + kfree_skb(skb); return -EMSGSIZE; } -- GitLab From a95d9004fbdedebb2e2550352808f0e8b1b0ae79 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Mon, 3 Sep 2018 04:36:52 -0700 Subject: [PATCH 1082/2269] xfrm: reset transport header back to network header after all input transforms ahave been applied [ Upstream commit bfc0698bebcb16d19ecfc89574ad4d696955e5d3 ] A policy may have been set up with multiple transforms (e.g., ESP and ipcomp). In this situation, the ingress IPsec processing iterates in xfrm_input() and applies each transform in turn, processing the nexthdr to find any additional xfrm that may apply. This patch resets the transport header back to network header only after the last transformation so that subsequent xfrms can find the correct transport header. Fixes: 7785bba299a8 ("esp: Add a software GRO codepath") Suggested-by: Steffen Klassert Signed-off-by: Sowmini Varadhan Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/ipv4/xfrm4_input.c | 1 + net/ipv4/xfrm4_mode_transport.c | 4 +--- net/ipv6/xfrm6_input.c | 1 + net/ipv6/xfrm6_mode_transport.c | 4 +--- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index bcfc00e88756d..f8de2482a5292 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -67,6 +67,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) if (xo && (xo->flags & XFRM_GRO)) { skb_mac_header_rebuild(skb); + skb_reset_transport_header(skb); return 0; } diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c index 3d36644890bb6..1ad2c2c4e250f 100644 --- a/net/ipv4/xfrm4_mode_transport.c +++ b/net/ipv4/xfrm4_mode_transport.c @@ -46,7 +46,6 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) { int ihl = skb->data - skb_transport_header(skb); - struct xfrm_offload *xo = xfrm_offload(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), @@ -54,8 +53,7 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) skb->network_header = skb->transport_header; } ip_hdr(skb)->tot_len = htons(skb->len + ihl); - if (!xo || !(xo->flags & XFRM_GRO)) - skb_reset_transport_header(skb); + skb_reset_transport_header(skb); return 0; } diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 841f4a07438e8..9ef490dddcea2 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -59,6 +59,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) if (xo && (xo->flags & XFRM_GRO)) { skb_mac_header_rebuild(skb); + skb_reset_transport_header(skb); return -1; } diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c index 9ad07a91708ef..3c29da5defe6c 100644 --- a/net/ipv6/xfrm6_mode_transport.c +++ b/net/ipv6/xfrm6_mode_transport.c @@ -51,7 +51,6 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) { int ihl = skb->data - skb_transport_header(skb); - struct xfrm_offload *xo = xfrm_offload(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), @@ -60,8 +59,7 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) } ipv6_hdr(skb)->payload_len = htons(skb->len + ihl - sizeof(struct ipv6hdr)); - if (!xo || !(xo->flags & XFRM_GRO)) - skb_reset_transport_header(skb); + skb_reset_transport_header(skb); return 0; } -- GitLab From 64f38286c7be284f4eba70bff165269a65461183 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Mon, 3 Sep 2018 04:36:53 -0700 Subject: [PATCH 1083/2269] xfrm: reset crypto_done when iterating over multiple input xfrms [ Upstream commit 782710e333a526780d65918d669cb96646983ba2 ] We only support one offloaded xfrm (we do not have devices that can handle more than one offload), so reset crypto_done in xfrm_input() when iterating over multiple transforms in xfrm_input, so that we can invoke the appropriate x->type->input for the non-offloaded transforms Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API") Signed-off-by: Sowmini Varadhan Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_input.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 9f492dc417d59..8e75319dd9c0c 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -453,6 +453,7 @@ resume: XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); goto drop; } + crypto_done = false; } while (!err); err = xfrm_rcv_cb(skb, family, x->type->proto, 0); -- GitLab From b0be0d83fc6894337b807cbf66ee66e001caf89b Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Wed, 5 Sep 2018 08:06:13 +0300 Subject: [PATCH 1084/2269] mac80211: Always report TX status [ Upstream commit 8682250b3c1b75a45feb7452bc413d004cfe3778 ] If a frame is dropped for any reason, mac80211 wouldn't report the TX status back to user space. As the user space may rely on the TX_STATUS to kick its state machines, resends etc, it's better to just report this frame as not acked instead. Signed-off-by: Andrei Otcheretianski Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/status.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index da7427a415299..ccac205e58533 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -470,11 +470,6 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, if (!skb) return; - if (dropped) { - dev_kfree_skb_any(skb); - return; - } - if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) { u64 cookie = IEEE80211_SKB_CB(skb)->ack.cookie; struct ieee80211_sub_if_data *sdata; @@ -495,6 +490,8 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, } rcu_read_unlock(); + dev_kfree_skb_any(skb); + } else if (dropped) { dev_kfree_skb_any(skb); } else { /* consumes skb */ -- GitLab From d46c334f07f5f67cf0fc56a1e8dace1a97f8b908 Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Wed, 5 Sep 2018 08:06:12 +0300 Subject: [PATCH 1085/2269] cfg80211: reg: Init wiphy_idx in regulatory_hint_core() [ Upstream commit 24f33e64fcd0d50a4b1a8e5b41bd0257aa66b0e8 ] Core regulatory hints didn't set wiphy_idx to WIPHY_IDX_INVALID. Since the regulatory request is zeroed, wiphy_idx was always implicitly set to 0. This resulted in updating only phy #0. Fix that. Fixes: 806a9e39670b ("cfg80211: make regulatory_request use wiphy_idx instead of wiphy") Signed-off-by: Andrei Otcheretianski Signed-off-by: Luca Coelho [add fixes tag] Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/reg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 6e94f6934a0e7..6f032c7b8732d 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2384,6 +2384,7 @@ static int regulatory_hint_core(const char *alpha2) request->alpha2[0] = alpha2[0]; request->alpha2[1] = alpha2[1]; request->initiator = NL80211_REGDOM_SET_BY_CORE; + request->wiphy_idx = WIPHY_IDX_INVALID; queue_regulatory_request(request); -- GitLab From 574be53ab82c3525236c8a3dd70f3dcaebd9e9aa Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Wed, 5 Sep 2018 06:22:59 -0400 Subject: [PATCH 1086/2269] mac80211: fix pending queue hang due to TX_DROP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6eae4a6c2be387fec41b0d2782c4fffb57159498 ] In our environment running lots of mesh nodes, we are seeing the pending queue hang periodically, with the debugfs queues file showing lines such as: 00: 0x00000000/348 i.e. there are a large number of frames but no stop reason set. One way this could happen is if queue processing from the pending tasklet exited early without processing all frames, and without having some future event (incoming frame, stop reason flag, ...) to reschedule it. Exactly this can occur today if ieee80211_tx() returns false due to packet drops or power-save buffering in the tx handlers. In the past, this function would return true in such cases, and the change to false doesn't seem to be intentional. Fix this case by reverting to the previous behavior. Fixes: bb42f2d13ffc ("mac80211: Move reorder-sensitive TX handlers to after TXQ dequeue") Signed-off-by: Bob Copeland Acked-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index d8fddd88bf468..a17a56032a215 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1837,7 +1837,7 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; if (invoke_tx_handlers_early(&tx)) - return false; + return true; if (ieee80211_queue_skb(local, sdata, tx.sta, tx.skb)) return true; -- GitLab From 9da59d2e283d424cd9c947b849fc819536bd0882 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 5 Sep 2018 18:52:22 +0300 Subject: [PATCH 1087/2269] cfg80211: Address some corner cases in scan result channel updating [ Upstream commit 119f94a6fefcc76d47075b83d2b73d04c895df78 ] cfg80211_get_bss_channel() is used to update the RX channel based on the available frame payload information (channel number from DSSS Parameter Set element or HT Operation element). This is needed on 2.4 GHz channels where frames may be received on neighboring channels due to overlapping frequency range. This might of some use on the 5 GHz band in some corner cases, but things are more complex there since there is no n:1 or 1:n mapping between channel numbers and frequencies due to multiple different starting frequencies in different operating classes. This could result in ieee80211_channel_to_frequency() returning incorrect frequency and ieee80211_get_channel() returning incorrect channel information (or indication of no match). In the previous implementation, this could result in some scan results being dropped completely, e.g., for the 4.9 GHz channels. That prevented connection to such BSSs. Fix this by using the driver-provided channel pointer if ieee80211_get_channel() does not find matching channel data for the channel number in the frame payload and if the scan is done with 5 MHz or 10 MHz channel bandwidth. While doing this, also add comments describing what the function is trying to achieve to make it easier to understand what happens here and why. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/scan.c | 58 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 49 insertions(+), 9 deletions(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index f6c5fe4825065..5ed0ed0559dc4 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1055,13 +1055,23 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, return NULL; } +/* + * Update RX channel information based on the available frame payload + * information. This is mainly for the 2.4 GHz band where frames can be received + * from neighboring channels and the Beacon frames use the DSSS Parameter Set + * element to indicate the current (transmitting) channel, but this might also + * be needed on other bands if RX frequency does not match with the actual + * operating channel of a BSS. + */ static struct ieee80211_channel * cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, - struct ieee80211_channel *channel) + struct ieee80211_channel *channel, + enum nl80211_bss_scan_width scan_width) { const u8 *tmp; u32 freq; int channel_number = -1; + struct ieee80211_channel *alt_channel; tmp = cfg80211_find_ie(WLAN_EID_DS_PARAMS, ie, ielen); if (tmp && tmp[1] == 1) { @@ -1075,16 +1085,45 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, } } - if (channel_number < 0) + if (channel_number < 0) { + /* No channel information in frame payload */ return channel; + } freq = ieee80211_channel_to_frequency(channel_number, channel->band); - channel = ieee80211_get_channel(wiphy, freq); - if (!channel) - return NULL; - if (channel->flags & IEEE80211_CHAN_DISABLED) + alt_channel = ieee80211_get_channel(wiphy, freq); + if (!alt_channel) { + if (channel->band == NL80211_BAND_2GHZ) { + /* + * Better not allow unexpected channels when that could + * be going beyond the 1-11 range (e.g., discovering + * BSS on channel 12 when radio is configured for + * channel 11. + */ + return NULL; + } + + /* No match for the payload channel number - ignore it */ + return channel; + } + + if (scan_width == NL80211_BSS_CHAN_WIDTH_10 || + scan_width == NL80211_BSS_CHAN_WIDTH_5) { + /* + * Ignore channel number in 5 and 10 MHz channels where there + * may not be an n:1 or 1:n mapping between frequencies and + * channel numbers. + */ + return channel; + } + + /* + * Use the channel determined through the payload channel number + * instead of the RX channel reported by the driver. + */ + if (alt_channel->flags & IEEE80211_CHAN_DISABLED) return NULL; - return channel; + return alt_channel; } /* Returned bss is reference counted and must be cleaned up appropriately. */ @@ -1109,7 +1148,8 @@ cfg80211_inform_bss_data(struct wiphy *wiphy, (data->signal < 0 || data->signal > 100))) return NULL; - channel = cfg80211_get_bss_channel(wiphy, ie, ielen, data->chan); + channel = cfg80211_get_bss_channel(wiphy, ie, ielen, data->chan, + data->scan_width); if (!channel) return NULL; @@ -1207,7 +1247,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, return NULL; channel = cfg80211_get_bss_channel(wiphy, mgmt->u.beacon.variable, - ielen, data->chan); + ielen, data->chan, data->scan_width); if (!channel) return NULL; -- GitLab From ebec37ed21a220b824f9ac8f467c81d0955b01ce Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 5 Sep 2018 13:34:02 +0200 Subject: [PATCH 1088/2269] mac80211: TDLS: fix skb queue/priority assignment [ Upstream commit cb59bc14e830028d2244861216df038165d7625d ] If the TDLS setup happens over a connection to an AP that doesn't have QoS, we nevertheless assign a non-zero TID (skb->priority) and queue mapping, which may confuse us or drivers later. Fix it by just assigning the special skb->priority and then using ieee80211_select_queue() just like other data frames would go through. Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/tdls.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 91093d4a2f841..6e7aa65cf3454 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -16,6 +16,7 @@ #include "ieee80211_i.h" #include "driver-ops.h" #include "rate.h" +#include "wme.h" /* give usermode some time for retries in setting up the TDLS session */ #define TDLS_PEER_SETUP_TIMEOUT (15 * HZ) @@ -1006,14 +1007,13 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev, switch (action_code) { case WLAN_TDLS_SETUP_REQUEST: case WLAN_TDLS_SETUP_RESPONSE: - skb_set_queue_mapping(skb, IEEE80211_AC_BK); - skb->priority = 2; + skb->priority = 256 + 2; break; default: - skb_set_queue_mapping(skb, IEEE80211_AC_VI); - skb->priority = 5; + skb->priority = 256 + 5; break; } + skb_set_queue_mapping(skb, ieee80211_select_queue(sdata, skb)); /* * Set the WLAN_TDLS_TEARDOWN flag to indicate a teardown in progress. -- GitLab From 35507aabf0e5e50525de25603328f09a7a629da5 Mon Sep 17 00:00:00 2001 From: Yuan-Chi Pang Date: Thu, 6 Sep 2018 16:57:48 +0800 Subject: [PATCH 1089/2269] mac80211: fix TX status reporting for ieee80211s [ Upstream commit c42055105785580563535e6d3143cad95c7ac7ee ] TX status reporting to ieee80211s is through ieee80211s_update_metric. There are two problems about ieee80211s_update_metric: 1. The purpose is to estimate the fail probability to a specific link. No need to restrict to data frame. 2. Current implementation does not work if wireless driver does not pass tx_status with skb. Fix this by removing ieee80211_is_data condition, passing ieee80211_tx_status directly to ieee80211s_update_metric, and putting it in both __ieee80211_tx_status and ieee80211_tx_status_ext. Signed-off-by: Yuan-Chi Pang Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/mesh.h | 3 ++- net/mac80211/mesh_hwmp.c | 9 +++------ net/mac80211/status.c | 4 +++- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 7e5f271e3c30d..4f1c61637ce35 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -217,7 +217,8 @@ void mesh_rmc_free(struct ieee80211_sub_if_data *sdata); int mesh_rmc_init(struct ieee80211_sub_if_data *sdata); void ieee80211s_init(void); void ieee80211s_update_metric(struct ieee80211_local *local, - struct sta_info *sta, struct sk_buff *skb); + struct sta_info *sta, + struct ieee80211_tx_status *st); void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata); void ieee80211_mesh_teardown_sdata(struct ieee80211_sub_if_data *sdata); int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 055ea36ff27b3..fab0764c315fe 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -295,15 +295,12 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, } void ieee80211s_update_metric(struct ieee80211_local *local, - struct sta_info *sta, struct sk_buff *skb) + struct sta_info *sta, + struct ieee80211_tx_status *st) { - struct ieee80211_tx_info *txinfo = IEEE80211_SKB_CB(skb); - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_tx_info *txinfo = st->info; int failed; - if (!ieee80211_is_data(hdr->frame_control)) - return; - failed = !(txinfo->flags & IEEE80211_TX_STAT_ACK); /* moving average, scaled to 100. diff --git a/net/mac80211/status.c b/net/mac80211/status.c index ccac205e58533..bdf131ed5ce87 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -797,7 +797,7 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, rate_control_tx_status(local, sband, status); if (ieee80211_vif_is_mesh(&sta->sdata->vif)) - ieee80211s_update_metric(local, sta, skb); + ieee80211s_update_metric(local, sta, status); if (!(info->flags & IEEE80211_TX_CTL_INJECTED) && acked) ieee80211_frame_acked(sta, skb); @@ -958,6 +958,8 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, } rate_control_tx_status(local, sband, status); + if (ieee80211_vif_is_mesh(&sta->sdata->vif)) + ieee80211s_update_metric(local, sta, status); } if (acked || noack_success) { -- GitLab From 73c6f86060703daf8b7aa72c51451a6339164915 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 11 Sep 2018 10:31:15 +0200 Subject: [PATCH 1090/2269] xfrm: Fix NULL pointer dereference when skb_dst_force clears the dst_entry. [ Upstream commit 9e1437937807b0122e8da1ca8765be2adca9aee6 ] Since commit 222d7dbd258d ("net: prevent dst uses after free") skb_dst_force() might clear the dst_entry attached to the skb. The xfrm code don't expect this to happen, so we crash with a NULL pointer dereference in this case. Fix it by checking skb_dst(skb) for NULL after skb_dst_force() and drop the packet in cast the dst_entry was cleared. Fixes: 222d7dbd258d ("net: prevent dst uses after free") Reported-by: Tobias Hommel Reported-by: Kristian Evensen Reported-by: Wolfgang Walter Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_output.c | 4 ++++ net/xfrm/xfrm_policy.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 35610cc881a9a..c47660fba4988 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -101,6 +101,10 @@ static int xfrm_output_one(struct sk_buff *skb, int err) spin_unlock_bh(&x->lock); skb_dst_force(skb); + if (!skb_dst(skb)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); + goto error_nolock; + } if (xfrm_offload(skb)) { x->type_offload->encap(x, skb); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 2fb7a78308e1a..37c32e73aaef3 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2550,6 +2550,10 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) } skb_dst_force(skb); + if (!skb_dst(skb)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); + return 0; + } dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE); if (IS_ERR(dst)) { -- GitLab From 5d7bf7b4d022ba0f5a01f2fc2be352fad925a627 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 13 Sep 2018 16:48:08 +0100 Subject: [PATCH 1091/2269] ARM: 8799/1: mm: fix pci_ioremap_io() offset check [ Upstream commit 3a58ac65e2d7969bcdf1b6acb70fa4d12a88e53e ] IO_SPACE_LIMIT is the ending address of the PCI IO space, i.e something like 0xfffff (and not 0x100000). Therefore, when offset = 0xf0000 is passed as argument, this function fails even though the offset + SZ_64K fits below the IO_SPACE_LIMIT. This makes the last chunk of 64 KB of the I/O space not usable as it cannot be mapped. This patch fixes that by substracing 1 to offset + SZ_64K, so that we compare the addrss of the last byte of the I/O space against IO_SPACE_LIMIT instead of the address of the first byte of what is after the I/O space. Fixes: c2794437091a4 ("ARM: Add fixed PCI i/o mapping") Signed-off-by: Thomas Petazzoni Acked-by: Nicolas Pitre Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/mm/ioremap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index fc91205ff46ce..5bf9443cfbaa6 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -473,7 +473,7 @@ void pci_ioremap_set_mem_type(int mem_type) int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr) { - BUG_ON(offset + SZ_64K > IO_SPACE_LIMIT); + BUG_ON(offset + SZ_64K - 1 > IO_SPACE_LIMIT); return ioremap_page_range(PCI_IO_VIRT_BASE + offset, PCI_IO_VIRT_BASE + offset + SZ_64K, -- GitLab From 26c6b9da7a001cf37e1b2029a33955cdc890d676 Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Wed, 19 Sep 2018 13:54:56 -0600 Subject: [PATCH 1092/2269] xfrm: validate template mode [ Upstream commit 32bf94fb5c2ec4ec842152d0e5937cd4bb6738fa ] XFRM mode parameters passed as part of the user templates in the IP_XFRM_POLICY are never properly validated. Passing values other than valid XFRM modes can cause stack-out-of-bounds reads to occur later in the XFRM processing: [ 140.535608] ================================================================ [ 140.543058] BUG: KASAN: stack-out-of-bounds in xfrm_state_find+0x17e4/0x1cc4 [ 140.550306] Read of size 4 at addr ffffffc0238a7a58 by task repro/5148 [ 140.557369] [ 140.558927] Call trace: [ 140.558936] dump_backtrace+0x0/0x388 [ 140.558940] show_stack+0x24/0x30 [ 140.558946] __dump_stack+0x24/0x2c [ 140.558949] dump_stack+0x8c/0xd0 [ 140.558956] print_address_description+0x74/0x234 [ 140.558960] kasan_report+0x240/0x264 [ 140.558963] __asan_report_load4_noabort+0x2c/0x38 [ 140.558967] xfrm_state_find+0x17e4/0x1cc4 [ 140.558971] xfrm_resolve_and_create_bundle+0x40c/0x1fb8 [ 140.558975] xfrm_lookup+0x238/0x1444 [ 140.558977] xfrm_lookup_route+0x48/0x11c [ 140.558984] ip_route_output_flow+0x88/0xc4 [ 140.558991] raw_sendmsg+0xa74/0x266c [ 140.558996] inet_sendmsg+0x258/0x3b0 [ 140.559002] sock_sendmsg+0xbc/0xec [ 140.559005] SyS_sendto+0x3a8/0x5a8 [ 140.559008] el0_svc_naked+0x34/0x38 [ 140.559009] [ 140.592245] page dumped because: kasan: bad access detected [ 140.597981] page_owner info is not active (free page?) [ 140.603267] [ 140.653503] ================================================================ Signed-off-by: Sean Tranchetti Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_user.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 4292347bf45e7..4e8319766f2bb 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1449,6 +1449,9 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) (ut[i].family != prev_family)) return -EINVAL; + if (ut[i].mode >= XFRM_MODE_MAX) + return -EINVAL; + prev_family = ut[i].family; switch (ut[i].family) { -- GitLab From 1157713407a691eb48b2d6abfda1bc87b0c3848d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 17 Sep 2018 08:20:36 -0700 Subject: [PATCH 1093/2269] netfilter: bridge: Don't sabotage nf_hook calls from an l3mdev [ Upstream commit a173f066c7cfc031acb8f541708041e009fc9812 ] For starters, the bridge netfilter code registers operations that are invoked any time nh_hook is called. Specifically, ip_sabotage_in watches for nested calls for NF_INET_PRE_ROUTING when a bridge is in the stack. Packet wise, the bridge netfilter hook runs first. br_nf_pre_routing allocates nf_bridge, sets in_prerouting to 1 and calls NF_HOOK for NF_INET_PRE_ROUTING. It's finish function, br_nf_pre_routing_finish, then resets in_prerouting flag to 0 and the packet continues up the stack. The packet eventually makes it to the VRF driver and it invokes nf_hook for NF_INET_PRE_ROUTING in case any rules have been added against the vrf device. Because of the registered operations the call to nf_hook causes ip_sabotage_in to be invoked. That function sees the nf_bridge on the skb and that in_prerouting is not set. Thinking it is an invalid nested call it steals (drops) the packet. Update ip_sabotage_in to recognize that the bridge or one of its upper devices (e.g., vlan) can be enslaved to a VRF (L3 master device) and allow the packet to go through the nf_hook a second time. Fixes: 73e20b761acf ("net: vrf: Add support for PREROUTING rules on vrf device") Reported-by: D'Souza, Nelson Signed-off-by: David Ahern Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/bridge/br_netfilter_hooks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index c2eea1b8737a1..7582f28ab3060 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -832,7 +832,8 @@ static unsigned int ip_sabotage_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - if (skb->nf_bridge && !skb->nf_bridge->in_prerouting) { + if (skb->nf_bridge && !skb->nf_bridge->in_prerouting && + !netif_is_l3_master(skb->dev)) { state->okfn(state->net, state->sk, skb); return NF_STOLEN; } -- GitLab From 1241679ce248b13db2193ef04b942098fa85e3ec Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Fri, 21 Sep 2018 16:34:04 +0100 Subject: [PATCH 1094/2269] arm64: hugetlb: Fix handling of young ptes [ Upstream commit 469ed9d823b7d240d6b9574f061ded7c3834c167 ] In the contiguous bit hugetlb break-before-make code we assume that all hugetlb pages are young. In fact, remove_migration_pte is able to place an old hugetlb pte so this assumption is not valid. This patch fixes the contiguous hugetlb scanning code to preserve young ptes. Fixes: d8bdcff28764 ("arm64: hugetlb: Add break-before-make logic for contiguous entries") Signed-off-by: Steve Capper Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/mm/hugetlbpage.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 6cb0fa92a6516..9f6ae9686dac6 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -118,11 +118,14 @@ static pte_t get_clear_flush(struct mm_struct *mm, /* * If HW_AFDBM is enabled, then the HW could turn on - * the dirty bit for any page in the set, so check - * them all. All hugetlb entries are already young. + * the dirty or accessed bit for any page in the set, + * so check them all. */ if (pte_dirty(pte)) orig_pte = pte_mkdirty(orig_pte); + + if (pte_young(pte)) + orig_pte = pte_mkyoung(orig_pte); } if (valid) @@ -347,10 +350,13 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, if (!pte_same(orig_pte, pte)) changed = 1; - /* Make sure we don't lose the dirty state */ + /* Make sure we don't lose the dirty or young state */ if (pte_dirty(orig_pte)) pte = pte_mkdirty(pte); + if (pte_young(orig_pte)) + pte = pte_mkyoung(pte); + hugeprot = pte_pgprot(pte); for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot)); -- GitLab From a5bdfc6892b5f6c4d131c246771dc67a55460691 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 19 Sep 2018 17:14:01 -0700 Subject: [PATCH 1095/2269] ARM: dts: BCM63xx: Fix incorrect interrupt specifiers [ Upstream commit 3ab97942d0213b6583a5408630a8cbbfbf54730f ] A number of our interrupts were incorrectly specified, fix both the PPI and SPI interrupts to be correct. Fixes: b5762cacc411 ("ARM: bcm63138: add NAND DT support") Fixes: 46d4bca0445a ("ARM: BCM63XX: add BCM63138 minimal Device Tree") Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin --- arch/arm/boot/dts/bcm63138.dtsi | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/arm/boot/dts/bcm63138.dtsi b/arch/arm/boot/dts/bcm63138.dtsi index 43ee992ccdcf7..6df61518776f7 100644 --- a/arch/arm/boot/dts/bcm63138.dtsi +++ b/arch/arm/boot/dts/bcm63138.dtsi @@ -106,21 +106,23 @@ global_timer: timer@1e200 { compatible = "arm,cortex-a9-global-timer"; reg = <0x1e200 0x20>; - interrupts = ; + interrupts = ; clocks = <&axi_clk>; }; local_timer: local-timer@1e600 { compatible = "arm,cortex-a9-twd-timer"; reg = <0x1e600 0x20>; - interrupts = ; + interrupts = ; clocks = <&axi_clk>; }; twd_watchdog: watchdog@1e620 { compatible = "arm,cortex-a9-twd-wdt"; reg = <0x1e620 0x20>; - interrupts = ; + interrupts = ; }; armpll: armpll { @@ -158,7 +160,7 @@ serial0: serial@600 { compatible = "brcm,bcm6345-uart"; reg = <0x600 0x1b>; - interrupts = ; + interrupts = ; clocks = <&periph_clk>; clock-names = "periph"; status = "disabled"; @@ -167,7 +169,7 @@ serial1: serial@620 { compatible = "brcm,bcm6345-uart"; reg = <0x620 0x1b>; - interrupts = ; + interrupts = ; clocks = <&periph_clk>; clock-names = "periph"; status = "disabled"; @@ -180,7 +182,7 @@ reg = <0x2000 0x600>, <0xf0 0x10>; reg-names = "nand", "nand-int-base"; status = "disabled"; - interrupts = ; + interrupts = ; interrupt-names = "nand"; }; -- GitLab From d9e742766c763db1e8ead9a9fbd7640232735ede Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 25 Sep 2018 08:32:50 +0200 Subject: [PATCH 1096/2269] net: macb: Clean 64b dma addresses if they are not detected [ Upstream commit e1e5d8a9fe737d94ccc0ccbaf0c97f69a8f3e000 ] Clear ADDR64 dma bit in DMACFG register in case that HW_DMA_CAP_64B is not detected on 64bit system. The issue was observed when bootloader(u-boot) does not check macb feature at DCFG6 register (DAW64_OFFSET) and enabling 64bit dma support by default. Then macb driver is reading DMACFG register back and only adding 64bit dma configuration but not cleaning it out. Signed-off-by: Michal Simek Acked-by: Nicolas Ferre Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/cadence/macb_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index b4f92de1efbd1..d6f8d6c8b0f1e 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -2000,6 +2000,7 @@ static void macb_configure_dma(struct macb *bp) else dmacfg &= ~GEM_BIT(TXCOEN); + dmacfg &= ~GEM_BIT(ADDR64); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT if (bp->hw_dma_cap & HW_DMA_CAP_64B) dmacfg |= GEM_BIT(ADDR64); -- GitLab From 795b13034224f1b82fc22d749f9226a8c97977d4 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 23 Aug 2018 23:36:00 +0200 Subject: [PATCH 1097/2269] soc: fsl: qbman: qman: avoid allocating from non existing gen_pool [ Upstream commit 64e9e22e68512da8df3c9a7430f07621e48db3c2 ] If the qman driver didn't probe, calling qman_alloc_fqid_range, qman_alloc_pool_range or qman_alloc_cgrid_range (as done in dpaa_eth) will pass a NULL pointer to gen_pool_alloc, leading to a NULL pointer dereference. Signed-off-by: Alexandre Belloni Reviewed-by: Roy Pledge Signed-off-by: Li Yang (cherry picked from commit f72487a2788aa70c3aee1d0ebd5470de9bac953a) Signed-off-by: Olof Johansson Signed-off-by: Sasha Levin --- drivers/soc/fsl/qbman/qman.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index 0c6065dba48a2..4f27e95efcdd3 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -2699,6 +2699,9 @@ static int qman_alloc_range(struct gen_pool *p, u32 *result, u32 cnt) { unsigned long addr; + if (!p) + return -ENODEV; + addr = gen_pool_alloc(p, cnt); if (!addr) return -ENOMEM; -- GitLab From fa1578ec411d8cf9724166738fd41fc5b686e28e Mon Sep 17 00:00:00 2001 From: Zhao Qiang Date: Thu, 1 Feb 2018 14:54:32 +0800 Subject: [PATCH 1098/2269] soc: fsl: qe: Fix copy/paste bug in ucc_get_tdm_sync_shift() [ Upstream commit 96fc74333f84cfdf8d434c6c07254e215e2aad00 ] There is a copy and paste bug so we accidentally use the RX_ shift when we're in TX_ mode. Fixes: bb8b2062aff3 ("fsl/qe: setup clock source for TDM mode") Signed-off-by: Dan Carpenter Signed-off-by: Zhao Qiang Signed-off-by: Li Yang (cherry picked from commit 3cb31b634052ed458922e0c8e2b4b093d7fb60b9) Signed-off-by: Olof Johansson Signed-off-by: Sasha Levin --- drivers/soc/fsl/qe/ucc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/fsl/qe/ucc.c b/drivers/soc/fsl/qe/ucc.c index c646d87138613..681f7d4b7724f 100644 --- a/drivers/soc/fsl/qe/ucc.c +++ b/drivers/soc/fsl/qe/ucc.c @@ -626,7 +626,7 @@ static u32 ucc_get_tdm_sync_shift(enum comm_dir mode, u32 tdm_num) { u32 shift; - shift = (mode == COMM_DIR_RX) ? RX_SYNC_SHIFT_BASE : RX_SYNC_SHIFT_BASE; + shift = (mode == COMM_DIR_RX) ? RX_SYNC_SHIFT_BASE : TX_SYNC_SHIFT_BASE; shift -= tdm_num * 2; return shift; -- GitLab From c7b66583c9edb63ab7613a6ff545367175f95125 Mon Sep 17 00:00:00 2001 From: Masashi Honma Date: Tue, 25 Sep 2018 11:15:00 +0900 Subject: [PATCH 1099/2269] nl80211: Fix possible Spectre-v1 for NL80211_TXRATE_HT [ Upstream commit 30fe6d50eb088783c8729c7d930f65296b2b3fa7 ] Use array_index_nospec() to sanitize ridx with respect to speculation. Signed-off-by: Masashi Honma Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/nl80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 3de415bca3915..5e7c9b361e8a5 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3480,6 +3480,7 @@ static bool ht_rateset_to_mask(struct ieee80211_supported_band *sband, return false; /* check availability */ + ridx = array_index_nospec(ridx, IEEE80211_HT_MCS_MASK_LEN); if (sband->ht_cap.mcs.rx_mask[ridx] & rbit) mcs[ridx] |= rbit; else -- GitLab From db2a11e706bf59d60dbd250f19a383d102a92e69 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Tue, 25 Sep 2018 09:51:02 +0200 Subject: [PATCH 1100/2269] mac80211_hwsim: do not omit multicast announce of first added radio [ Upstream commit 28ef8b49a338dc1844e86b7954cfffc7dfa2660a ] The allocation of hwsim radio identifiers uses a post-increment from 0, so the first radio has idx 0. This idx is explicitly excluded from multicast announcements ever since, but it is unclear why. Drop that idx check and announce the first radio as well. This makes userspace happy if it relies on these events. Signed-off-by: Martin Willi Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/mac80211_hwsim.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index aafa7aa18fbd7..477f9f2f6626c 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2730,8 +2730,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, list_add_tail(&data->list, &hwsim_radios); spin_unlock_bh(&hwsim_radio_lock); - if (idx > 0) - hwsim_mcast_new_radio(idx, info, param); + hwsim_mcast_new_radio(idx, info, param); return idx; -- GitLab From 0f8a689c6a2f93cbbc16b0ff2003b3240241bdc5 Mon Sep 17 00:00:00 2001 From: Matias Karhumaa Date: Wed, 26 Sep 2018 09:13:46 +0300 Subject: [PATCH 1101/2269] Bluetooth: SMP: fix crash in unpairing [ Upstream commit cb28c306b93b71f2741ce1a5a66289db26715f4d ] In case unpair_device() was called through mgmt interface at the same time when pairing was in progress, Bluetooth kernel module crash was seen. [ 600.351225] general protection fault: 0000 [#1] SMP PTI [ 600.351235] CPU: 1 PID: 11096 Comm: btmgmt Tainted: G OE 4.19.0-rc1+ #1 [ 600.351238] Hardware name: Dell Inc. Latitude E5440/08RCYC, BIOS A18 05/14/2017 [ 600.351272] RIP: 0010:smp_chan_destroy.isra.10+0xce/0x2c0 [bluetooth] [ 600.351276] Code: c0 0f 84 b4 01 00 00 80 78 28 04 0f 84 53 01 00 00 4d 85 ed 0f 85 ab 00 00 00 48 8b 08 48 8b 50 08 be 10 00 00 00 48 89 51 08 <48> 89 0a 48 b9 00 02 00 00 00 00 ad de 48 89 48 08 48 8b 83 00 01 [ 600.351279] RSP: 0018:ffffa9be839b3b50 EFLAGS: 00010246 [ 600.351282] RAX: ffff9c999ac565a0 RBX: ffff9c9996e98c00 RCX: ffff9c999aa28b60 [ 600.351285] RDX: dead000000000200 RSI: 0000000000000010 RDI: ffff9c999e403500 [ 600.351287] RBP: ffffa9be839b3b70 R08: 0000000000000000 R09: ffffffff92a25c00 [ 600.351290] R10: ffffa9be839b3ae8 R11: 0000000000000001 R12: ffff9c995375b800 [ 600.351292] R13: 0000000000000000 R14: ffff9c99619a5000 R15: ffff9c9962a01c00 [ 600.351295] FS: 00007fb2be27c700(0000) GS:ffff9c999e880000(0000) knlGS:0000000000000000 [ 600.351298] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 600.351300] CR2: 00007fb2bdadbad0 CR3: 000000041c328001 CR4: 00000000001606e0 [ 600.351302] Call Trace: [ 600.351325] smp_failure+0x4f/0x70 [bluetooth] [ 600.351345] smp_cancel_pairing+0x74/0x80 [bluetooth] [ 600.351370] unpair_device+0x1c1/0x330 [bluetooth] [ 600.351399] hci_sock_sendmsg+0x960/0x9f0 [bluetooth] [ 600.351409] ? apparmor_socket_sendmsg+0x1e/0x20 [ 600.351417] sock_sendmsg+0x3e/0x50 [ 600.351422] sock_write_iter+0x85/0xf0 [ 600.351429] do_iter_readv_writev+0x12b/0x1b0 [ 600.351434] do_iter_write+0x87/0x1a0 [ 600.351439] vfs_writev+0x98/0x110 [ 600.351443] ? ep_poll+0x16d/0x3d0 [ 600.351447] ? ep_modify+0x73/0x170 [ 600.351451] do_writev+0x61/0xf0 [ 600.351455] ? do_writev+0x61/0xf0 [ 600.351460] __x64_sys_writev+0x1c/0x20 [ 600.351465] do_syscall_64+0x5a/0x110 [ 600.351471] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 600.351474] RIP: 0033:0x7fb2bdb62fe0 [ 600.351477] Code: 73 01 c3 48 8b 0d b8 6e 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 69 c7 2c 00 00 75 10 b8 14 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 de 80 01 00 48 89 04 24 [ 600.351479] RSP: 002b:00007ffe062cb8f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000014 [ 600.351484] RAX: ffffffffffffffda RBX: 000000000255b3d0 RCX: 00007fb2bdb62fe0 [ 600.351487] RDX: 0000000000000001 RSI: 00007ffe062cb920 RDI: 0000000000000004 [ 600.351490] RBP: 00007ffe062cb920 R08: 000000000255bd80 R09: 0000000000000000 [ 600.351494] R10: 0000000000000353 R11: 0000000000000246 R12: 0000000000000001 [ 600.351497] R13: 00007ffe062cbbe0 R14: 0000000000000000 R15: 0000000000000000 [ 600.351501] Modules linked in: algif_hash algif_skcipher af_alg cmac ipt_MASQUERADE nf_conntrack_netlink nfnetlink xfrm_user xfrm_algo iptable_nat nf_nat_ipv4 xt_addrtype iptable_filter ip_tables xt_conntrack x_tables nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c br_netfilter bridge stp llc overlay arc4 nls_iso8859_1 dm_crypt intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp dell_laptop kvm_intel crct10dif_pclmul dell_smm_hwmon crc32_pclmul ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper intel_cstate intel_rapl_perf uvcvideo videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 videobuf2_common videodev media hid_multitouch input_leds joydev serio_raw dell_wmi snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic dell_smbios dcdbas sparse_keymap [ 600.351569] snd_hda_intel btusb snd_hda_codec btrtl btbcm btintel snd_hda_core bluetooth(OE) snd_hwdep snd_pcm iwlmvm ecdh_generic wmi_bmof dell_wmi_descriptor snd_seq_midi mac80211 snd_seq_midi_event lpc_ich iwlwifi snd_rawmidi snd_seq snd_seq_device snd_timer cfg80211 snd soundcore mei_me mei dell_rbtn dell_smo8800 mac_hid parport_pc ppdev lp parport autofs4 hid_generic usbhid hid i915 nouveau kvmgt vfio_mdev mdev vfio_iommu_type1 vfio kvm irqbypass i2c_algo_bit ttm drm_kms_helper syscopyarea sysfillrect sysimgblt mxm_wmi psmouse ahci sdhci_pci cqhci libahci fb_sys_fops sdhci drm e1000e video wmi [ 600.351637] ---[ end trace e49e9f1df09c94fb ]--- [ 600.351664] RIP: 0010:smp_chan_destroy.isra.10+0xce/0x2c0 [bluetooth] [ 600.351666] Code: c0 0f 84 b4 01 00 00 80 78 28 04 0f 84 53 01 00 00 4d 85 ed 0f 85 ab 00 00 00 48 8b 08 48 8b 50 08 be 10 00 00 00 48 89 51 08 <48> 89 0a 48 b9 00 02 00 00 00 00 ad de 48 89 48 08 48 8b 83 00 01 [ 600.351669] RSP: 0018:ffffa9be839b3b50 EFLAGS: 00010246 [ 600.351672] RAX: ffff9c999ac565a0 RBX: ffff9c9996e98c00 RCX: ffff9c999aa28b60 [ 600.351674] RDX: dead000000000200 RSI: 0000000000000010 RDI: ffff9c999e403500 [ 600.351676] RBP: ffffa9be839b3b70 R08: 0000000000000000 R09: ffffffff92a25c00 [ 600.351679] R10: ffffa9be839b3ae8 R11: 0000000000000001 R12: ffff9c995375b800 [ 600.351681] R13: 0000000000000000 R14: ffff9c99619a5000 R15: ffff9c9962a01c00 [ 600.351684] FS: 00007fb2be27c700(0000) GS:ffff9c999e880000(0000) knlGS:0000000000000000 [ 600.351686] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 600.351689] CR2: 00007fb2bdadbad0 CR3: 000000041c328001 CR4: 00000000001606e0 Crash happened because list_del_rcu() was called twice for smp->ltk. This was possible if unpair_device was called right after ltk was generated but before keys were distributed. In this commit smp_cancel_pairing was refactored to cancel pairing if it is in progress and otherwise just removes keys. Once keys are removed from rcu list, pointers to smp context's keys are set to NULL to make sure removed list items are not accessed later. This commit also adjusts the functionality of mgmt unpair_device() little bit. Previously pairing was canceled only if pairing was in state that keys were already generated. With this commit unpair_device() cancels pairing already in earlier states. Bug was found by fuzzing kernel SMP implementation using Synopsys Defensics. Reported-by: Pekka Oikarainen Signed-off-by: Matias Karhumaa Signed-off-by: Johan Hedberg Signed-off-by: Sasha Levin --- net/bluetooth/mgmt.c | 7 ++----- net/bluetooth/smp.c | 29 +++++++++++++++++++++++++---- net/bluetooth/smp.h | 3 ++- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 1fba2a03f8ae8..ba24f613c0fc1 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2298,9 +2298,8 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, /* LE address type */ addr_type = le_addr_type(cp->addr.type); - hci_remove_irk(hdev, &cp->addr.bdaddr, addr_type); - - err = hci_remove_ltk(hdev, &cp->addr.bdaddr, addr_type); + /* Abort any ongoing SMP pairing. Removes ltk and irk if they exist. */ + err = smp_cancel_and_remove_pairing(hdev, &cp->addr.bdaddr, addr_type); if (err < 0) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, MGMT_STATUS_NOT_PAIRED, &rp, @@ -2314,8 +2313,6 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, goto done; } - /* Abort any ongoing SMP pairing */ - smp_cancel_pairing(conn); /* Defer clearing up the connection parameters until closing to * give a chance of keeping them if a repairing happens. diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index a27704ff13a9d..dbcc439fc78b8 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -2410,30 +2410,51 @@ unlock: return ret; } -void smp_cancel_pairing(struct hci_conn *hcon) +int smp_cancel_and_remove_pairing(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 addr_type) { - struct l2cap_conn *conn = hcon->l2cap_data; + struct hci_conn *hcon; + struct l2cap_conn *conn; struct l2cap_chan *chan; struct smp_chan *smp; + int err; + + err = hci_remove_ltk(hdev, bdaddr, addr_type); + hci_remove_irk(hdev, bdaddr, addr_type); + + hcon = hci_conn_hash_lookup_le(hdev, bdaddr, addr_type); + if (!hcon) + goto done; + conn = hcon->l2cap_data; if (!conn) - return; + goto done; chan = conn->smp; if (!chan) - return; + goto done; l2cap_chan_lock(chan); smp = chan->data; if (smp) { + /* Set keys to NULL to make sure smp_failure() does not try to + * remove and free already invalidated rcu list entries. */ + smp->ltk = NULL; + smp->slave_ltk = NULL; + smp->remote_irk = NULL; + if (test_bit(SMP_FLAG_COMPLETE, &smp->flags)) smp_failure(conn, 0); else smp_failure(conn, SMP_UNSPECIFIED); + err = 0; } l2cap_chan_unlock(chan); + +done: + return err; } static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb) diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h index 0ff6247eaa6c0..121edadd5f8da 100644 --- a/net/bluetooth/smp.h +++ b/net/bluetooth/smp.h @@ -181,7 +181,8 @@ enum smp_key_pref { }; /* SMP Commands */ -void smp_cancel_pairing(struct hci_conn *hcon); +int smp_cancel_and_remove_pairing(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 addr_type); bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level, enum smp_key_pref key_pref); int smp_conn_security(struct hci_conn *hcon, __u8 sec_level); -- GitLab From 865741554925b7f53c1ccc01d38d3854dffbbcb3 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Wed, 26 Sep 2018 18:11:22 +0200 Subject: [PATCH 1102/2269] pxa168fb: prepare the clock [ Upstream commit d85536cde91fcfed6fb8d983783bd2b92c843939 ] Add missing prepare/unprepare operations for fbi->clk, this fixes following kernel warning: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1 at drivers/clk/clk.c:874 clk_core_enable+0x2c/0x1b0 Enabling unprepared disp0_clk Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 4.18.0-rc8-00032-g02b43ddd4f21-dirty #25 Hardware name: Marvell MMP2 (Device Tree Support) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (__warn+0xd8/0xf0) [] (__warn) from [] (warn_slowpath_fmt+0x44/0x6c) [] (warn_slowpath_fmt) from [] (clk_core_enable+0x2c/0x1b0) [] (clk_core_enable) from [] (clk_core_enable_lock+0x18/0x2c) [] (clk_core_enable_lock) from [] (pxa168fb_probe+0x464/0x6ac) [] (pxa168fb_probe) from [] (platform_drv_probe+0x48/0x94) [] (platform_drv_probe) from [] (driver_probe_device+0x328/0x470) [] (driver_probe_device) from [] (__driver_attach+0xb0/0x124) [] (__driver_attach) from [] (bus_for_each_dev+0x64/0xa0) [] (bus_for_each_dev) from [] (bus_add_driver+0x1b8/0x230) [] (bus_add_driver) from [] (driver_register+0xac/0xf0) [] (driver_register) from [] (do_one_initcall+0xb8/0x1f0) [] (do_one_initcall) from [] (kernel_init_freeable+0x294/0x2e0) [] (kernel_init_freeable) from [] (kernel_init+0x8/0x10c) [] (kernel_init) from [] (ret_from_fork+0x14/0x2c) Exception stack(0xd008bfb0 to 0xd008bff8) bfa0: 00000000 00000000 00000000 00000000 bfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 bfe0: 00000000 00000000 00000000 00000000 00000013 00000000 ---[ end trace c0af40f9e2ed7cb4 ]--- Signed-off-by: Lubomir Rintel [b.zolnierkie: enhance patch description a bit] Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Sasha Levin --- drivers/video/fbdev/pxa168fb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/pxa168fb.c b/drivers/video/fbdev/pxa168fb.c index def3a501acd64..d059d04c63acd 100644 --- a/drivers/video/fbdev/pxa168fb.c +++ b/drivers/video/fbdev/pxa168fb.c @@ -712,7 +712,7 @@ static int pxa168fb_probe(struct platform_device *pdev) /* * enable controller clock */ - clk_enable(fbi->clk); + clk_prepare_enable(fbi->clk); pxa168fb_set_par(info); @@ -767,7 +767,7 @@ static int pxa168fb_probe(struct platform_device *pdev) failed_free_cmap: fb_dealloc_cmap(&info->cmap); failed_free_clk: - clk_disable(fbi->clk); + clk_disable_unprepare(fbi->clk); failed_free_fbmem: dma_free_coherent(fbi->dev, info->fix.smem_len, info->screen_base, fbi->fb_start_dma); @@ -807,7 +807,7 @@ static int pxa168fb_remove(struct platform_device *pdev) dma_free_wc(fbi->dev, PAGE_ALIGN(info->fix.smem_len), info->screen_base, info->fix.smem_start); - clk_disable(fbi->clk); + clk_disable_unprepare(fbi->clk); framebuffer_release(info); -- GitLab From 76f86190774e9b1655cc53781311e065462f1fc4 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 24 Sep 2018 13:53:34 -0700 Subject: [PATCH 1103/2269] qed: Avoid implicit enum conversion in qed_set_tunn_cls_info [ Upstream commit a898fba32229efd5e6b6154f83fa86a7145156b9 ] Clang warns when one enumerated type is implicitly converted to another. drivers/net/ethernet/qlogic/qed/qed_sp_commands.c:163:25: warning: implicit conversion from enumeration type 'enum tunnel_clss' to different enumeration type 'enum qed_tunn_clss' [-Wenum-conversion] p_tun->vxlan.tun_cls = type; ~ ^~~~ drivers/net/ethernet/qlogic/qed/qed_sp_commands.c:165:26: warning: implicit conversion from enumeration type 'enum tunnel_clss' to different enumeration type 'enum qed_tunn_clss' [-Wenum-conversion] p_tun->l2_gre.tun_cls = type; ~ ^~~~ drivers/net/ethernet/qlogic/qed/qed_sp_commands.c:167:26: warning: implicit conversion from enumeration type 'enum tunnel_clss' to different enumeration type 'enum qed_tunn_clss' [-Wenum-conversion] p_tun->ip_gre.tun_cls = type; ~ ^~~~ drivers/net/ethernet/qlogic/qed/qed_sp_commands.c:169:29: warning: implicit conversion from enumeration type 'enum tunnel_clss' to different enumeration type 'enum qed_tunn_clss' [-Wenum-conversion] p_tun->l2_geneve.tun_cls = type; ~ ^~~~ drivers/net/ethernet/qlogic/qed/qed_sp_commands.c:171:29: warning: implicit conversion from enumeration type 'enum tunnel_clss' to different enumeration type 'enum qed_tunn_clss' [-Wenum-conversion] p_tun->ip_geneve.tun_cls = type; ~ ^~~~ 5 warnings generated. Avoid this by changing type to an int. Link: https://github.com/ClangBuiltLinux/linux/issues/125 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_sp_commands.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index 46d0c3cb83a56..d7c5965328be8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -154,7 +154,7 @@ qed_set_pf_update_tunn_mode(struct qed_tunnel_info *p_tun, static void qed_set_tunn_cls_info(struct qed_tunnel_info *p_tun, struct qed_tunnel_info *p_src) { - enum tunnel_clss type; + int type; p_tun->b_update_rx_cls = p_src->b_update_rx_cls; p_tun->b_update_tx_cls = p_src->b_update_tx_cls; -- GitLab From ab7998abfea24e59acd3d387e46760628f95ff28 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 24 Sep 2018 14:05:27 -0700 Subject: [PATCH 1104/2269] qed: Fix mask parameter in qed_vf_prep_tunn_req_tlv [ Upstream commit db803f36e56f23b5a2266807e190d1dc11554d54 ] Clang complains when one enumerated type is implicitly converted to another. drivers/net/ethernet/qlogic/qed/qed_vf.c:686:6: warning: implicit conversion from enumeration type 'enum qed_tunn_mode' to different enumeration type 'enum qed_tunn_clss' [-Wenum-conversion] QED_MODE_L2GENEVE_TUNN, ^~~~~~~~~~~~~~~~~~~~~~ Update mask's parameter to expect qed_tunn_mode, which is what was intended. Link: https://github.com/ClangBuiltLinux/linux/issues/125 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_vf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index 6eb85db69f9a9..b8b1a791a4faa 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -572,7 +572,7 @@ free_p_iov: static void __qed_vf_prep_tunn_req_tlv(struct vfpf_update_tunn_param_tlv *p_req, struct qed_tunn_update_type *p_src, - enum qed_tunn_clss mask, u8 *p_cls) + enum qed_tunn_mode mask, u8 *p_cls) { if (p_src->b_update_mode) { p_req->tun_mode_update_mask |= BIT(mask); @@ -587,7 +587,7 @@ __qed_vf_prep_tunn_req_tlv(struct vfpf_update_tunn_param_tlv *p_req, static void qed_vf_prep_tunn_req_tlv(struct vfpf_update_tunn_param_tlv *p_req, struct qed_tunn_update_type *p_src, - enum qed_tunn_clss mask, + enum qed_tunn_mode mask, u8 *p_cls, struct qed_tunn_update_udp_port *p_port, u8 *p_update_port, u16 *p_udp_port) { -- GitLab From 935d441968b1877707367cc7ac78b1822bf483fc Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 24 Sep 2018 14:34:53 -0700 Subject: [PATCH 1105/2269] qed: Avoid implicit enum conversion in qed_roce_mode_to_flavor [ Upstream commit d3a315795b4ce8b105a64a90699103121bde04a8 ] Clang warns when one enumerated type is implicitly converted to another. drivers/net/ethernet/qlogic/qed/qed_roce.c:153:12: warning: implicit conversion from enumeration type 'enum roce_mode' to different enumeration type 'enum roce_flavor' [-Wenum-conversion] flavor = ROCE_V2_IPV6; ~ ^~~~~~~~~~~~ drivers/net/ethernet/qlogic/qed/qed_roce.c:156:12: warning: implicit conversion from enumeration type 'enum roce_mode' to different enumeration type 'enum roce_flavor' [-Wenum-conversion] flavor = MAX_ROCE_MODE; ~ ^~~~~~~~~~~~~ 2 warnings generated. Use the appropriate values from the expected type, roce_flavor: ROCE_V2_IPV6 = RROCE_IPV6 = 2 MAX_ROCE_MODE = MAX_ROCE_FLAVOR = 3 While we're add it, ditch the local variable flavor, we can just return the value directly from the switch statement. Link: https://github.com/ClangBuiltLinux/linux/issues/125 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_roce.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index fb7c2d1562ae7..bedbf840fd7d8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -129,23 +129,16 @@ static void qed_rdma_copy_gids(struct qed_rdma_qp *qp, __le32 *src_gid, static enum roce_flavor qed_roce_mode_to_flavor(enum roce_mode roce_mode) { - enum roce_flavor flavor; - switch (roce_mode) { case ROCE_V1: - flavor = PLAIN_ROCE; - break; + return PLAIN_ROCE; case ROCE_V2_IPV4: - flavor = RROCE_IPV4; - break; + return RROCE_IPV4; case ROCE_V2_IPV6: - flavor = ROCE_V2_IPV6; - break; + return RROCE_IPV6; default: - flavor = MAX_ROCE_MODE; - break; + return MAX_ROCE_FLAVOR; } - return flavor; } void qed_roce_free_cid_pair(struct qed_hwfn *p_hwfn, u16 cid) -- GitLab From ee86b4d659c23f735c3162b0c9c461a2e6f07a8b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 24 Sep 2018 15:17:03 -0700 Subject: [PATCH 1106/2269] qed: Avoid constant logical operation warning in qed_vf_pf_acquire [ Upstream commit 1c492a9d55ba99079210ed901dd8a5423f980487 ] Clang warns when a constant is used in a boolean context as it thinks a bitwise operation may have been intended. drivers/net/ethernet/qlogic/qed/qed_vf.c:415:27: warning: use of logical '&&' with constant operand [-Wconstant-logical-operand] if (!p_iov->b_pre_fp_hsi && ^ drivers/net/ethernet/qlogic/qed/qed_vf.c:415:27: note: use '&' for a bitwise operation if (!p_iov->b_pre_fp_hsi && ^~ & drivers/net/ethernet/qlogic/qed/qed_vf.c:415:27: note: remove constant to silence this warning if (!p_iov->b_pre_fp_hsi && ~^~ 1 warning generated. This has been here since commit 1fe614d10f45 ("qed: Relax VF firmware requirements") and I am not entirely sure why since 0 isn't a special case. Just remove the statement causing Clang to warn since it isn't required. Link: https://github.com/ClangBuiltLinux/linux/issues/126 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_vf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index b8b1a791a4faa..dd8ebf6d380f9 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -413,7 +413,6 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn) } if (!p_iov->b_pre_fp_hsi && - ETH_HSI_VER_MINOR && (resp->pfdev_info.minor_fp_hsi < ETH_HSI_VER_MINOR)) { DP_INFO(p_hwfn, "PF is using older fastpath HSI; %02x.%02x is configured\n", -- GitLab From b93393d63d8f66135acc32a3ff8f3680f923aa08 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 24 Sep 2018 14:42:12 -0700 Subject: [PATCH 1107/2269] qed: Avoid implicit enum conversion in qed_iwarp_parse_rx_pkt [ Upstream commit 77f2d753819b7d50c16abfb778caf1fe075faed0 ] Clang warns when one enumerated type is implicitly converted to another. drivers/net/ethernet/qlogic/qed/qed_iwarp.c:1713:25: warning: implicit conversion from enumeration type 'enum tcp_ip_version' to different enumeration type 'enum qed_tcp_ip_version' [-Wenum-conversion] cm_info->ip_version = TCP_IPV4; ~ ^~~~~~~~ drivers/net/ethernet/qlogic/qed/qed_iwarp.c:1733:25: warning: implicit conversion from enumeration type 'enum tcp_ip_version' to different enumeration type 'enum qed_tcp_ip_version' [-Wenum-conversion] cm_info->ip_version = TCP_IPV6; ~ ^~~~~~~~ 2 warnings generated. Use the appropriate values from the expected type, qed_tcp_ip_version: TCP_IPV4 = QED_TCP_IPV4 = 0 TCP_IPV6 = QED_TCP_IPV6 = 1 Link: https://github.com/ClangBuiltLinux/linux/issues/125 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index e41f28602535b..eb666877d1aa9 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1672,7 +1672,7 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn, cm_info->local_ip[0] = ntohl(iph->daddr); cm_info->remote_ip[0] = ntohl(iph->saddr); - cm_info->ip_version = TCP_IPV4; + cm_info->ip_version = QED_TCP_IPV4; ip_hlen = (iph->ihl) * sizeof(u32); *payload_len = ntohs(iph->tot_len) - ip_hlen; @@ -1692,7 +1692,7 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn, cm_info->remote_ip[i] = ntohl(ip6h->saddr.in6_u.u6_addr32[i]); } - cm_info->ip_version = TCP_IPV6; + cm_info->ip_version = QED_TCP_IPV6; ip_hlen = sizeof(*ip6h); *payload_len = ntohs(ip6h->payload_len); -- GitLab From 915670c48eb3d073ef8dc29a12db92cbde233195 Mon Sep 17 00:00:00 2001 From: Masashi Honma Date: Tue, 25 Sep 2018 11:15:01 +0900 Subject: [PATCH 1108/2269] nl80211: Fix possible Spectre-v1 for CQM RSSI thresholds [ Upstream commit 1222a16014888ed9733c11e221730d4a8196222b ] Use array_index_nospec() to sanitize i with respect to speculation. Note that the user doesn't control i directly, but can make it out of bounds by not finding a threshold in the array. Signed-off-by: Masashi Honma [add note about user control, as explained by Masashi] Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/nl80211.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5e7c9b361e8a5..46e9812d13c02 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9720,7 +9720,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; s32 last, low, high; u32 hyst; - int i, n; + int i, n, low_index; int err; /* RSSI reporting disabled? */ @@ -9757,10 +9757,19 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, if (last < wdev->cqm_config->rssi_thresholds[i]) break; - low = i > 0 ? - (wdev->cqm_config->rssi_thresholds[i - 1] - hyst) : S32_MIN; - high = i < n ? - (wdev->cqm_config->rssi_thresholds[i] + hyst - 1) : S32_MAX; + low_index = i - 1; + if (low_index >= 0) { + low_index = array_index_nospec(low_index, n); + low = wdev->cqm_config->rssi_thresholds[low_index] - hyst; + } else { + low = S32_MIN; + } + if (i < n) { + i = array_index_nospec(i, n); + high = wdev->cqm_config->rssi_thresholds[i] + hyst - 1; + } else { + high = S32_MAX; + } return rdev_set_cqm_rssi_range_config(rdev, dev, low, high); } -- GitLab From eef5935c2d7a349479c6992ddb60b2c4d9e17263 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:50 -0700 Subject: [PATCH 1109/2269] asix: Check for supported Wake-on-LAN modes [ Upstream commit c4ce446e33d7a0e978256ac6fea4c80e59d9de5f ] The driver currently silently accepts unsupported Wake-on-LAN modes (other than WAKE_PHY or WAKE_MAGIC) without reporting that to the user, which is confusing. Fixes: 2e55cc7210fe ("[PATCH] USB: usbnet (3/9) module for ASIX Ethernet adapters") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/asix_common.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 522d2900cd1dd..e9fcf6ef716a8 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -607,6 +607,9 @@ int asix_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) struct usbnet *dev = netdev_priv(net); u8 opt = 0; + if (wolinfo->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) + return -EINVAL; + if (wolinfo->wolopts & WAKE_PHY) opt |= AX_MONITOR_LINK; if (wolinfo->wolopts & WAKE_MAGIC) -- GitLab From a0fc063355300102c96ee8b9b9c460102a93a7c0 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:51 -0700 Subject: [PATCH 1110/2269] ax88179_178a: Check for supported Wake-on-LAN modes [ Upstream commit 5ba6b4aa9a410c5e2c6417df52b5e2118ea9b467 ] The driver currently silently accepts unsupported Wake-on-LAN modes (other than WAKE_PHY or WAKE_MAGIC) without reporting that to the user, which is confusing. Fixes: e2ca90c276e1 ("ax88179_178a: ASIX AX88179_178A USB 3.0/2.0 to gigabit ethernet adapter driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/ax88179_178a.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index f32261ecd2150..0f69b77e8502b 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -566,6 +566,9 @@ ax88179_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) struct usbnet *dev = netdev_priv(net); u8 opt = 0; + if (wolinfo->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) + return -EINVAL; + if (wolinfo->wolopts & WAKE_PHY) opt |= AX_MONITOR_MODE_RWLC; if (wolinfo->wolopts & WAKE_MAGIC) -- GitLab From 9f0962c0f6f7126c0d407989e86daeb1317e0dda Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:52 -0700 Subject: [PATCH 1111/2269] lan78xx: Check for supported Wake-on-LAN modes [ Upstream commit eb9ad088f96653a26b340f7c447c44cf023d5cdc ] The driver supports a fair amount of Wake-on-LAN modes, but is not checking that the user specified one that is supported. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Florian Fainelli Reviewed-by: Woojung Huh Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/lan78xx.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 9e3f632e22f14..00ddcaf09014e 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1375,19 +1375,10 @@ static int lan78xx_set_wol(struct net_device *netdev, if (ret < 0) return ret; - pdata->wol = 0; - if (wol->wolopts & WAKE_UCAST) - pdata->wol |= WAKE_UCAST; - if (wol->wolopts & WAKE_MCAST) - pdata->wol |= WAKE_MCAST; - if (wol->wolopts & WAKE_BCAST) - pdata->wol |= WAKE_BCAST; - if (wol->wolopts & WAKE_MAGIC) - pdata->wol |= WAKE_MAGIC; - if (wol->wolopts & WAKE_PHY) - pdata->wol |= WAKE_PHY; - if (wol->wolopts & WAKE_ARP) - pdata->wol |= WAKE_ARP; + if (wol->wolopts & ~WAKE_ALL) + return -EINVAL; + + pdata->wol = wol->wolopts; device_set_wakeup_enable(&dev->udev->dev, (bool)wol->wolopts); -- GitLab From 4301fb3391eddf9c697bac653320749735d5a2a9 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:53 -0700 Subject: [PATCH 1112/2269] sr9800: Check for supported Wake-on-LAN modes [ Upstream commit c5cb93e994ffb43b7b3b1ff10b9f928f54574a36 ] The driver currently silently accepts unsupported Wake-on-LAN modes (other than WAKE_PHY or WAKE_MAGIC) without reporting that to the user, which is confusing. Fixes: 19a38d8e0aa3 ("USB2NET : SR9800 : One chip USB2.0 USB2NET SR9800 Device Driver Support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/sr9800.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index 9277a0f228dfa..35f39f23d8814 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -421,6 +421,9 @@ sr_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) struct usbnet *dev = netdev_priv(net); u8 opt = 0; + if (wolinfo->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) + return -EINVAL; + if (wolinfo->wolopts & WAKE_PHY) opt |= SR_MONITOR_LINK; if (wolinfo->wolopts & WAKE_MAGIC) -- GitLab From 9eaabc0a77799b3563a5be757e551ae7463d2706 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:54 -0700 Subject: [PATCH 1113/2269] r8152: Check for supported Wake-on-LAN Modes [ Upstream commit f2750df1548bd8a2b060eb609fc43ca82811af4c ] The driver does not check for Wake-on-LAN modes specified by an user, but will conditionally set the device as wake-up enabled or not based on that, which could be a very confusing user experience. Fixes: 21ff2e8976b1 ("r8152: support WOL") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/r8152.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 0fa64cc1a0118..66beff4d76467 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -4497,6 +4497,9 @@ static int rtl8152_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) if (!rtl_can_wakeup(tp)) return -EOPNOTSUPP; + if (wol->wolopts & ~WAKE_ANY) + return -EINVAL; + ret = usb_autopm_get_interface(tp->intf); if (ret < 0) goto out_set_wol; -- GitLab From 9e3a0366365bf82da3668d6e20cc753476fb9519 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:55 -0700 Subject: [PATCH 1114/2269] smsc75xx: Check for Wake-on-LAN modes [ Upstream commit 9c734b2769a73eea2e9e9767c0e0bf839ff23679 ] The driver does not check for Wake-on-LAN modes specified by an user, but will conditionally set the device as wake-up enabled or not based on that, which could be a very confusing user experience. Fixes: 6c636503260d ("smsc75xx: add wol magic packet support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/smsc75xx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index b64b1ee56d2d9..ec287c9741e83 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -731,6 +731,9 @@ static int smsc75xx_ethtool_set_wol(struct net_device *net, struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); int ret; + if (wolinfo->wolopts & ~SUPPORTED_WAKE) + return -EINVAL; + pdata->wolopts = wolinfo->wolopts & SUPPORTED_WAKE; ret = device_set_wakeup_enable(&dev->udev->dev, pdata->wolopts); -- GitLab From 35ffbd523797f58a3017906904a718db6035fc56 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:56 -0700 Subject: [PATCH 1115/2269] smsc95xx: Check for Wake-on-LAN modes [ Upstream commit c530c471ba37bdd9fe1c7185b01455c00ae606fb ] The driver does not check for Wake-on-LAN modes specified by an user, but will conditionally set the device as wake-up enabled or not based on that, which could be a very confusing user experience. Fixes: e0e474a83c18 ("smsc95xx: add wol magic packet support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/smsc95xx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 309b88acd3d0b..99e684e39d355 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -774,6 +774,9 @@ static int smsc95xx_ethtool_set_wol(struct net_device *net, struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]); int ret; + if (wolinfo->wolopts & ~SUPPORTED_WAKE) + return -EINVAL; + pdata->wolopts = wolinfo->wolopts & SUPPORTED_WAKE; ret = device_set_wakeup_enable(&dev->udev->dev, pdata->wolopts); -- GitLab From 492a81318e8cf41f18ad6845152b54f4a3387556 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Thu, 27 Sep 2018 17:05:04 -0600 Subject: [PATCH 1116/2269] cfg80211: fix use-after-free in reg_process_hint() [ Upstream commit 1db58529454742f67ebd96e3588315e880b72837 ] reg_process_hint_country_ie() can free regulatory_request and return REG_REQ_ALREADY_SET. We shouldn't use regulatory_request after it's called. KASAN error was observed when this happens. BUG: KASAN: use-after-free in reg_process_hint+0x839/0x8aa [cfg80211] Read of size 4 at addr ffff8800c430d434 by task kworker/1:3/89 Workqueue: events reg_todo [cfg80211] Call Trace: dump_stack+0xc1/0x10c ? _atomic_dec_and_lock+0x1ad/0x1ad ? _raw_spin_lock_irqsave+0xa0/0xd2 print_address_description+0x86/0x26f ? reg_process_hint+0x839/0x8aa [cfg80211] kasan_report+0x241/0x29b reg_process_hint+0x839/0x8aa [cfg80211] reg_todo+0x204/0x5b9 [cfg80211] process_one_work+0x55f/0x8d0 ? worker_detach_from_pool+0x1b5/0x1b5 ? _raw_spin_unlock_irq+0x65/0xdd ? _raw_spin_unlock_irqrestore+0xf3/0xf3 worker_thread+0x5dd/0x841 ? kthread_parkme+0x1d/0x1d kthread+0x270/0x285 ? pr_cont_work+0xe3/0xe3 ? rcu_read_unlock_sched_notrace+0xca/0xca ret_from_fork+0x22/0x40 Allocated by task 2718: set_track+0x63/0xfa __kmalloc+0x119/0x1ac regulatory_hint_country_ie+0x38/0x329 [cfg80211] __cfg80211_connect_result+0x854/0xadd [cfg80211] cfg80211_rx_assoc_resp+0x3bc/0x4f0 [cfg80211] smsc95xx v1.0.6 ieee80211_sta_rx_queued_mgmt+0x1803/0x7ed5 [mac80211] ieee80211_iface_work+0x411/0x696 [mac80211] process_one_work+0x55f/0x8d0 worker_thread+0x5dd/0x841 kthread+0x270/0x285 ret_from_fork+0x22/0x40 Freed by task 89: set_track+0x63/0xfa kasan_slab_free+0x6a/0x87 kfree+0xdc/0x470 reg_process_hint+0x31e/0x8aa [cfg80211] reg_todo+0x204/0x5b9 [cfg80211] process_one_work+0x55f/0x8d0 worker_thread+0x5dd/0x841 kthread+0x270/0x285 ret_from_fork+0x22/0x40 Signed-off-by: Yu Zhao Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/reg.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 6f032c7b8732d..bd91de4160353 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2170,11 +2170,12 @@ static void reg_process_hint(struct regulatory_request *reg_request) { struct wiphy *wiphy = NULL; enum reg_request_treatment treatment; + enum nl80211_reg_initiator initiator = reg_request->initiator; if (reg_request->wiphy_idx != WIPHY_IDX_INVALID) wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx); - switch (reg_request->initiator) { + switch (initiator) { case NL80211_REGDOM_SET_BY_CORE: treatment = reg_process_hint_core(reg_request); break; @@ -2192,7 +2193,7 @@ static void reg_process_hint(struct regulatory_request *reg_request) treatment = reg_process_hint_country_ie(wiphy, reg_request); break; default: - WARN(1, "invalid initiator %d\n", reg_request->initiator); + WARN(1, "invalid initiator %d\n", initiator); goto out_free; } @@ -2207,7 +2208,7 @@ static void reg_process_hint(struct regulatory_request *reg_request) */ if (treatment == REG_REQ_ALREADY_SET && wiphy && wiphy->regulatory_flags & REGULATORY_STRICT_REG) { - wiphy_update_regulatory(wiphy, reg_request->initiator); + wiphy_update_regulatory(wiphy, initiator); wiphy_all_share_dfs_chan_state(wiphy); reg_check_channels(); } -- GitLab From ffc3cb561ecea45c95ba1690876c5e905eef3791 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 Sep 2018 17:58:35 +0200 Subject: [PATCH 1117/2269] perf/core: Fix perf_pmu_unregister() locking [ Upstream commit a9f9772114c8b07ae75bcb3654bd017461248095 ] When we unregister a PMU, we fail to serialize the @pmu_idr properly. Fix that by doing the entire thing under pmu_lock. Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: 2e80a82a49c4 ("perf: Dynamic pmu types") Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/events/core.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 4dbce29a9313d..ee1c07c0b8331 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9020,9 +9020,7 @@ static void free_pmu_context(struct pmu *pmu) if (pmu->task_ctx_nr > perf_invalid_context) return; - mutex_lock(&pmus_lock); free_percpu(pmu->pmu_cpu_context); - mutex_unlock(&pmus_lock); } /* @@ -9278,12 +9276,8 @@ EXPORT_SYMBOL_GPL(perf_pmu_register); void perf_pmu_unregister(struct pmu *pmu) { - int remove_device; - mutex_lock(&pmus_lock); - remove_device = pmu_bus_running; list_del_rcu(&pmu->entry); - mutex_unlock(&pmus_lock); /* * We dereference the pmu list under both SRCU and regular RCU, so @@ -9295,13 +9289,14 @@ void perf_pmu_unregister(struct pmu *pmu) free_percpu(pmu->pmu_disable_count); if (pmu->type >= PERF_TYPE_MAX) idr_remove(&pmu_idr, pmu->type); - if (remove_device) { + if (pmu_bus_running) { if (pmu->nr_addr_filters) device_remove_file(pmu->dev, &dev_attr_nr_addr_filters); device_del(pmu->dev); put_device(pmu->dev); } free_pmu_context(pmu); + mutex_unlock(&pmus_lock); } EXPORT_SYMBOL_GPL(perf_pmu_unregister); -- GitLab From a18e2159c3ffcc8fe18155302d208adec443cd66 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 23 Sep 2018 18:13:43 +0200 Subject: [PATCH 1118/2269] perf/ring_buffer: Prevent concurent ring buffer access [ Upstream commit cd6fb677ce7e460c25bdd66f689734102ec7d642 ] Some of the scheduling tracepoints allow the perf_tp_event code to write to ring buffer under different cpu than the code is running on. This results in corrupted ring buffer data demonstrated in following perf commands: # perf record -e 'sched:sched_switch,sched:sched_wakeup' perf bench sched messaging # Running 'sched/messaging' benchmark: # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 0.383 [sec] [ perf record: Woken up 8 times to write data ] 0x42b890 [0]: failed to process type: -1765585640 [ perf record: Captured and wrote 4.825 MB perf.data (29669 samples) ] # perf report --stdio 0x42b890 [0]: failed to process type: -1765585640 The reason for the corruption are some of the scheduling tracepoints, that have __perf_task dfined and thus allow to store data to another cpu ring buffer: sched_waking sched_wakeup sched_wakeup_new sched_stat_wait sched_stat_sleep sched_stat_iowait sched_stat_blocked The perf_tp_event function first store samples for current cpu related events defined for tracepoint: hlist_for_each_entry_rcu(event, head, hlist_entry) perf_swevent_event(event, count, &data, regs); And then iterates events of the 'task' and store the sample for any task's event that passes tracepoint checks: ctx = rcu_dereference(task->perf_event_ctxp[perf_sw_context]); list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (event->attr.type != PERF_TYPE_TRACEPOINT) continue; if (event->attr.config != entry->type) continue; perf_swevent_event(event, count, &data, regs); } Above code can race with same code running on another cpu, ending up with 2 cpus trying to store under the same ring buffer, which is specifically not allowed. This patch prevents the problem, by allowing only events with the same current cpu to receive the event. NOTE: this requires the use of (per-task-)per-cpu buffers for this feature to work; perf-record does this. Signed-off-by: Jiri Olsa [peterz: small edits to Changelog] Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Andrew Vagin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: e6dab5ffab59 ("perf/trace: Add ability to set a target task for events") Link: http://lkml.kernel.org/r/20180923161343.GB15054@krava Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/events/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index ee1c07c0b8331..991af683ef9e8 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8058,6 +8058,8 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, goto unlock; list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { + if (event->cpu != smp_processor_id()) + continue; if (event->attr.type != PERF_TYPE_TRACEPOINT) continue; if (event->attr.config != entry->type) -- GitLab From 40568f21f235aefb8755650f2a09797b3d297ec1 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 21 Sep 2018 07:07:06 -0700 Subject: [PATCH 1119/2269] perf/x86/intel/uncore: Fix PCI BDF address of M3UPI on SKX [ Upstream commit 9d92cfeaf5215158d26d2991be7f7ff865cb98f3 ] The counters on M3UPI Link 0 and Link 3 don't count properly, and writing 0 to these counters may causes system crash on some machines. The PCI BDF addresses of the M3UPI in the current code are incorrect. The correct addresses should be: D18:F1 0x204D D18:F2 0x204E D18:F5 0x204D Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: cd34cd97b7b4 ("perf/x86/intel/uncore: Add Skylake server uncore support") Link: http://lkml.kernel.org/r/1537538826-55489-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/events/intel/uncore_snbep.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 2dae3f585c015..a68aba8a482fe 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3807,16 +3807,16 @@ static const struct pci_device_id skx_uncore_pci_ids[] = { .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 5, SKX_PCI_UNCORE_M2PCIE, 3), }, { /* M3UPI0 Link 0 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C), - .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, SKX_PCI_UNCORE_M3UPI, 0), + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 0), }, { /* M3UPI0 Link 1 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D), - .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 1), + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204E), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 2, SKX_PCI_UNCORE_M3UPI, 1), }, { /* M3UPI1 Link 2 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C), - .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 4, SKX_PCI_UNCORE_M3UPI, 2), + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 5, SKX_PCI_UNCORE_M3UPI, 2), }, { /* end: all zeroes */ } }; -- GitLab From 784f8395895f7b7c56b3c1195c7ce0f0376085dc Mon Sep 17 00:00:00 2001 From: "Natarajan, Janakarajan" Date: Thu, 27 Sep 2018 15:51:55 +0000 Subject: [PATCH 1120/2269] perf/x86/amd/uncore: Set ThreadMask and SliceMask for L3 Cache perf events [ Upstream commit d7cbbe49a9304520181fb8c9272d1327deec8453 ] In Family 17h, some L3 Cache Performance events require the ThreadMask and SliceMask to be set. For other events, these fields do not affect the count either way. Set ThreadMask and SliceMask to 0xFF and 0xF respectively. Signed-off-by: Janakarajan Natarajan Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: H . Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Suravee Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/Message-ID: Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/events/amd/uncore.c | 10 ++++++++++ arch/x86/include/asm/perf_event.h | 8 ++++++++ 2 files changed, 18 insertions(+) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index f5cbbba992833..4e1d7483b78c6 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -35,6 +35,7 @@ static int num_counters_llc; static int num_counters_nb; +static bool l3_mask; static HLIST_HEAD(uncore_unused_list); @@ -208,6 +209,13 @@ static int amd_uncore_event_init(struct perf_event *event) hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB; hwc->idx = -1; + /* + * SliceMask and ThreadMask need to be set for certain L3 events in + * Family 17h. For other events, the two fields do not affect the count. + */ + if (l3_mask) + hwc->config |= (AMD64_L3_SLICE_MASK | AMD64_L3_THREAD_MASK); + if (event->cpu < 0) return -EINVAL; @@ -542,6 +550,7 @@ static int __init amd_uncore_init(void) amd_llc_pmu.name = "amd_l3"; format_attr_event_df.show = &event_show_df; format_attr_event_l3.show = &event_show_l3; + l3_mask = true; } else { num_counters_nb = NUM_COUNTERS_NB; num_counters_llc = NUM_COUNTERS_L2; @@ -549,6 +558,7 @@ static int __init amd_uncore_init(void) amd_llc_pmu.name = "amd_l2"; format_attr_event_df = format_attr_event; format_attr_event_l3 = format_attr_event; + l3_mask = false; } amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df; diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 12f54082f4c8e..78241b736f2a0 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -46,6 +46,14 @@ #define INTEL_ARCH_EVENT_MASK \ (ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT) +#define AMD64_L3_SLICE_SHIFT 48 +#define AMD64_L3_SLICE_MASK \ + ((0xFULL) << AMD64_L3_SLICE_SHIFT) + +#define AMD64_L3_THREAD_SHIFT 56 +#define AMD64_L3_THREAD_MASK \ + ((0xFFULL) << AMD64_L3_THREAD_SHIFT) + #define X86_RAW_EVENT_MASK \ (ARCH_PERFMON_EVENTSEL_EVENT | \ ARCH_PERFMON_EVENTSEL_UMASK | \ -- GitLab From 1837dbb25cc517d96d51e4c9e1e8bffd80e9b48c Mon Sep 17 00:00:00 2001 From: Rickard x Andersson Date: Tue, 2 Oct 2018 14:49:32 +0200 Subject: [PATCH 1121/2269] net: fec: fix rare tx timeout [ Upstream commit 657ade07df72847f591ccdb36bd9b91ed0edbac3 ] During certain heavy network loads TX could time out with TX ring dump. TX is sometimes never restarted after reaching "tx_stop_threshold" because function "fec_enet_tx_queue" only tests the first queue. In addition the TX timeout callback function failed to recover because it also operated only on the first queue. Signed-off-by: Rickard x Andersson Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/fec_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index eb2ea231c7ca7..8bfa6ef826a9f 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1155,7 +1155,7 @@ static void fec_enet_timeout_work(struct work_struct *work) napi_disable(&fep->napi); netif_tx_lock_bh(ndev); fec_restart(ndev); - netif_wake_queue(ndev); + netif_tx_wake_all_queues(ndev); netif_tx_unlock_bh(ndev); napi_enable(&fep->napi); } @@ -1270,7 +1270,7 @@ skb_done: /* Since we have freed up a buffer, the ring is no longer full */ - if (netif_queue_stopped(ndev)) { + if (netif_tx_queue_stopped(nq)) { entries_free = fec_enet_get_free_txdesc_num(txq); if (entries_free >= txq->tx_wake_threshold) netif_tx_wake_queue(nq); @@ -1747,7 +1747,7 @@ static void fec_enet_adjust_link(struct net_device *ndev) napi_disable(&fep->napi); netif_tx_lock_bh(ndev); fec_restart(ndev); - netif_wake_queue(ndev); + netif_tx_wake_all_queues(ndev); netif_tx_unlock_bh(ndev); napi_enable(&fep->napi); } @@ -2249,7 +2249,7 @@ static int fec_enet_set_pauseparam(struct net_device *ndev, napi_disable(&fep->napi); netif_tx_lock_bh(ndev); fec_restart(ndev); - netif_wake_queue(ndev); + netif_tx_wake_all_queues(ndev); netif_tx_unlock_bh(ndev); napi_enable(&fep->napi); } -- GitLab From 3095f0c98c6a2a3e279c801955efdd73ff54c76e Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 2 Oct 2018 14:23:45 +0100 Subject: [PATCH 1122/2269] declance: Fix continuation with the adapter identification message [ Upstream commit fe3a83af6a50199bf250fa331e94216912f79395 ] Fix a commit 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines") regression with the `declance' driver, which caused the adapter identification message to be split between two lines, e.g.: declance.c: v0.011 by Linux MIPS DECstation task force tc6: PMAD-AA , addr = 08:00:2b:1b:2a:6a, irq = 14 tc6: registered as eth0. Address that properly, by printing identification with a single call, making the messages now look like: declance.c: v0.011 by Linux MIPS DECstation task force tc6: PMAD-AA, addr = 08:00:2b:1b:2a:6a, irq = 14 tc6: registered as eth0. Signed-off-by: Maciej W. Rozycki Fixes: 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines") Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amd/declance.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c index 82cc813850330..c7cde58feaf7a 100644 --- a/drivers/net/ethernet/amd/declance.c +++ b/drivers/net/ethernet/amd/declance.c @@ -1029,6 +1029,7 @@ static int dec_lance_probe(struct device *bdev, const int type) int i, ret; unsigned long esar_base; unsigned char *esar; + const char *desc; if (dec_lance_debug && version_printed++ == 0) printk(version); @@ -1214,19 +1215,20 @@ static int dec_lance_probe(struct device *bdev, const int type) */ switch (type) { case ASIC_LANCE: - printk("%s: IOASIC onboard LANCE", name); + desc = "IOASIC onboard LANCE"; break; case PMAD_LANCE: - printk("%s: PMAD-AA", name); + desc = "PMAD-AA"; break; case PMAX_LANCE: - printk("%s: PMAX onboard LANCE", name); + desc = "PMAX onboard LANCE"; break; } for (i = 0; i < 6; i++) dev->dev_addr[i] = esar[i * 4]; - printk(", addr = %pM, irq = %d\n", dev->dev_addr, dev->irq); + printk("%s: %s, addr = %pM, irq = %d\n", + name, desc, dev->dev_addr, dev->irq); dev->netdev_ops = &lance_netdev_ops; dev->watchdog_timeo = 5*HZ; -- GitLab From a0ab962b674dd64a7bce9de8b5a108f4832d6139 Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Tue, 2 Oct 2018 18:52:01 -0600 Subject: [PATCH 1123/2269] net: qualcomm: rmnet: Skip processing loopback packets [ Upstream commit a07f388e2cde2be74b263f85df6f672fea0305a1 ] RMNET RX handler was processing invalid packets that were originally sent on the real device and were looped back via dev_loopback_xmit(). This was detected using syzkaller. Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Signed-off-by: Sean Tranchetti Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index 929fb8d96ec0c..8d979fef5fc72 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -205,6 +205,9 @@ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb) if (!skb) return RX_HANDLER_CONSUMED; + if (skb->pkt_type == PACKET_LOOPBACK) + return RX_HANDLER_PASS; + dev = skb->dev; port = rmnet_get_port(dev); -- GitLab From 45894023bee9e2733083891650b03eaa2ce794a1 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 2 Oct 2018 14:48:49 -0700 Subject: [PATCH 1124/2269] locking/ww_mutex: Fix runtime warning in the WW mutex selftest [ Upstream commit e4a02ed2aaf447fa849e3254bfdb3b9b01e1e520 ] If CONFIG_WW_MUTEX_SELFTEST=y is enabled, booting an image in an arm64 virtual machine results in the following traceback if 8 CPUs are enabled: DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current) WARNING: CPU: 2 PID: 537 at kernel/locking/mutex.c:1033 __mutex_unlock_slowpath+0x1a8/0x2e0 ... Call trace: __mutex_unlock_slowpath() ww_mutex_unlock() test_cycle_work() process_one_work() worker_thread() kthread() ret_from_fork() If requesting b_mutex fails with -EDEADLK, the error variable is reassigned to the return value from calling ww_mutex_lock on a_mutex again. If this call fails, a_mutex is not locked. It is, however, unconditionally unlocked subsequently, causing the reported warning. Fix the problem by using two error variables. With this change, the selftest still fails as follows: cyclic deadlock not resolved, ret[7/8] = -35 However, the traceback is gone. Signed-off-by: Guenter Roeck Cc: Chris Wilson Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Fixes: d1b42b800e5d0 ("locking/ww_mutex: Add kselftests for resolving ww_mutex cyclic deadlocks") Link: http://lkml.kernel.org/r/1538516929-9734-1-git-send-email-linux@roeck-us.net Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/locking/test-ww_mutex.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 0e4cd64ad2c01..654977862b06b 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -260,7 +260,7 @@ static void test_cycle_work(struct work_struct *work) { struct test_cycle *cycle = container_of(work, typeof(*cycle), work); struct ww_acquire_ctx ctx; - int err; + int err, erra = 0; ww_acquire_init(&ctx, &ww_class); ww_mutex_lock(&cycle->a_mutex, &ctx); @@ -270,17 +270,19 @@ static void test_cycle_work(struct work_struct *work) err = ww_mutex_lock(cycle->b_mutex, &ctx); if (err == -EDEADLK) { + err = 0; ww_mutex_unlock(&cycle->a_mutex); ww_mutex_lock_slow(cycle->b_mutex, &ctx); - err = ww_mutex_lock(&cycle->a_mutex, &ctx); + erra = ww_mutex_lock(&cycle->a_mutex, &ctx); } if (!err) ww_mutex_unlock(cycle->b_mutex); - ww_mutex_unlock(&cycle->a_mutex); + if (!erra) + ww_mutex_unlock(&cycle->a_mutex); ww_acquire_fini(&ctx); - cycle->result = err; + cycle->result = err ?: erra; } static int __test_cycle(unsigned int nthreads) -- GitLab From 049b662eeaddf34fab850cc4023d8cc1a7709ef6 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 3 Oct 2018 15:20:58 +0200 Subject: [PATCH 1125/2269] be2net: don't flip hw_features when VXLANs are added/deleted [ Upstream commit 2d52527e80c2dc0c5f43f50adf183781262ec565 ] the be2net implementation of .ndo_tunnel_{add,del}() changes the value of NETIF_F_GSO_UDP_TUNNEL bit in 'features' and 'hw_features', but it forgets to call netdev_features_change(). Moreover, ethtool setting for that bit can potentially be reverted after a tunnel is added or removed. GSO already does software segmentation when 'hw_enc_features' is 0, even if VXLAN offload is turned on. In addition, commit 096de2f83ebc ("benet: stricter vxlan offloading check in be_features_check") avoids hardware segmentation of non-VXLAN tunneled packets, or VXLAN packets having wrong destination port. So, it's safe to avoid flipping the above feature on addition/deletion of VXLAN tunnels. Fixes: 630f4b70567f ("be2net: Export tunnel offloads only when a VxLAN tunnel is created") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/emulex/benet/be_main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 7e2b70c2bba30..39f399741647f 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -3900,8 +3900,6 @@ static int be_enable_vxlan_offloads(struct be_adapter *adapter) netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_UDP_TUNNEL; - netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; - netdev->features |= NETIF_F_GSO_UDP_TUNNEL; dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n", be16_to_cpu(port)); @@ -3923,8 +3921,6 @@ static void be_disable_vxlan_offloads(struct be_adapter *adapter) adapter->vxlan_port = 0; netdev->hw_enc_features = 0; - netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL); - netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL); } static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs, @@ -5215,6 +5211,7 @@ static void be_netdev_init(struct net_device *netdev) struct be_adapter *adapter = netdev_priv(netdev); netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_GSO_UDP_TUNNEL | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX; if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS)) -- GitLab From ec4e9618d1fdc980b7018c698cccbc1bbcd6e3fc Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Fri, 5 Oct 2018 08:48:27 -0500 Subject: [PATCH 1126/2269] net: cxgb3_main: fix a missing-check bug [ Upstream commit 2c05d88818ab6571816b93edce4d53703870d7ae ] In cxgb_extension_ioctl(), the command of the ioctl is firstly copied from the user-space buffer 'useraddr' to 'cmd' and checked through the switch statement. If the command is not as expected, an error code EOPNOTSUPP is returned. In the following execution, i.e., the cases of the switch statement, the whole buffer of 'useraddr' is copied again to a specific data structure, according to what kind of command is requested. However, after the second copy, there is no re-check on the newly-copied command. Given that the buffer 'useraddr' is in the user space, a malicious user can race to change the command between the two copies. By doing so, the attacker can supply malicious data to the kernel and cause undefined behavior. This patch adds a re-check in each case of the switch statement if there is a second copy in that case, to re-check whether the command obtained in the second copy is the same as the one in the first copy. If not, an error code EINVAL is returned. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index bf291e90cdb0f..79053d2ce7a36 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -2159,6 +2159,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EPERM; if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; + if (t.cmd != CHELSIO_SET_QSET_PARAMS) + return -EINVAL; if (t.qset_idx >= SGE_QSETS) return -EINVAL; if (!in_range(t.intr_lat, 0, M_NEWTIMER) || @@ -2258,6 +2260,9 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; + if (t.cmd != CHELSIO_GET_QSET_PARAMS) + return -EINVAL; + /* Display qsets for all ports when offload enabled */ if (test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map)) { q1 = 0; @@ -2303,6 +2308,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EBUSY; if (copy_from_user(&edata, useraddr, sizeof(edata))) return -EFAULT; + if (edata.cmd != CHELSIO_SET_QSET_NUM) + return -EINVAL; if (edata.val < 1 || (edata.val > 1 && !(adapter->flags & USING_MSIX))) return -EINVAL; @@ -2343,6 +2350,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EPERM; if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; + if (t.cmd != CHELSIO_LOAD_FW) + return -EINVAL; /* Check t.len sanity ? */ fw_data = memdup_user(useraddr + sizeof(t), t.len); if (IS_ERR(fw_data)) @@ -2366,6 +2375,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EBUSY; if (copy_from_user(&m, useraddr, sizeof(m))) return -EFAULT; + if (m.cmd != CHELSIO_SETMTUTAB) + return -EINVAL; if (m.nmtus != NMTUS) return -EINVAL; if (m.mtus[0] < 81) /* accommodate SACK */ @@ -2407,6 +2418,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EBUSY; if (copy_from_user(&m, useraddr, sizeof(m))) return -EFAULT; + if (m.cmd != CHELSIO_SET_PM) + return -EINVAL; if (!is_power_of_2(m.rx_pg_sz) || !is_power_of_2(m.tx_pg_sz)) return -EINVAL; /* not power of 2 */ @@ -2440,6 +2453,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EIO; /* need the memory controllers */ if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; + if (t.cmd != CHELSIO_GET_MEM) + return -EINVAL; if ((t.addr & 7) || (t.len & 7)) return -EINVAL; if (t.mem_id == MEM_CM) @@ -2492,6 +2507,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EAGAIN; if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; + if (t.cmd != CHELSIO_SET_TRACE_FILTER) + return -EINVAL; tp = (const struct trace_params *)&t.sip; if (t.config_tx) -- GitLab From d0539c56391d449603f7ff2bcddf38995df49749 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Fri, 5 Oct 2018 10:59:36 -0500 Subject: [PATCH 1127/2269] yam: fix a missing-check bug [ Upstream commit 0781168e23a2fc8dceb989f11fc5b39b3ccacc35 ] In yam_ioctl(), the concrete ioctl command is firstly copied from the user-space buffer 'ifr->ifr_data' to 'ioctl_cmd' and checked through the following switch statement. If the command is not as expected, an error code EINVAL is returned. In the following execution the buffer 'ifr->ifr_data' is copied again in the cases of the switch statement to specific data structures according to what kind of ioctl command is requested. However, after the second copy, no re-check is enforced on the newly-copied command. Given that the buffer 'ifr->ifr_data' is in the user space, a malicious user can race to change the command between the two copies. This way, the attacker can inject inconsistent data and cause undefined behavior. This patch adds a re-check in each case of the switch statement if there is a second copy in that case, to re-check whether the command obtained in the second copy is the same as the one in the first copy. If not, an error code EINVAL will be returned. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/hamradio/yam.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c index 7a7c5224a3368..16a6e11939122 100644 --- a/drivers/net/hamradio/yam.c +++ b/drivers/net/hamradio/yam.c @@ -980,6 +980,8 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) sizeof(struct yamdrv_ioctl_mcs)); if (IS_ERR(ym)) return PTR_ERR(ym); + if (ym->cmd != SIOCYAMSMCS) + return -EINVAL; if (ym->bitrate > YAM_MAXBITRATE) { kfree(ym); return -EINVAL; @@ -995,6 +997,8 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (copy_from_user(&yi, ifr->ifr_data, sizeof(struct yamdrv_ioctl_cfg))) return -EFAULT; + if (yi.cmd != SIOCYAMSCFG) + return -EINVAL; if ((yi.cfg.mask & YAM_IOBASE) && netif_running(dev)) return -EINVAL; /* Cannot change this parameter when up */ if ((yi.cfg.mask & YAM_IRQ) && netif_running(dev)) -- GitLab From f83f38fcf17fffd875f1f7f20213aaf4dfdbe5d0 Mon Sep 17 00:00:00 2001 From: Larry Chen Date: Fri, 5 Oct 2018 15:51:37 -0700 Subject: [PATCH 1128/2269] ocfs2: fix crash in ocfs2_duplicate_clusters_by_page() [ Upstream commit 69eb7765b9c6902444c89c54e7043242faf981e5 ] ocfs2_duplicate_clusters_by_page() may crash if one of the extent's pages is dirty. When a page has not been written back, it is still in dirty state. If ocfs2_duplicate_clusters_by_page() is called against the dirty page, the crash happens. To fix this bug, we can just unlock the page and wait until the page until its not dirty. The following is the backtrace: kernel BUG at /root/code/ocfs2/refcounttree.c:2961! [exception RIP: ocfs2_duplicate_clusters_by_page+822] __ocfs2_move_extent+0x80/0x450 [ocfs2] ? __ocfs2_claim_clusters+0x130/0x250 [ocfs2] ocfs2_defrag_extent+0x5b8/0x5e0 [ocfs2] __ocfs2_move_extents_range+0x2a4/0x470 [ocfs2] ocfs2_move_extents+0x180/0x3b0 [ocfs2] ? ocfs2_wait_for_recovery+0x13/0x70 [ocfs2] ocfs2_ioctl_move_extents+0x133/0x2d0 [ocfs2] ocfs2_ioctl+0x253/0x640 [ocfs2] do_vfs_ioctl+0x90/0x5f0 SyS_ioctl+0x74/0x80 do_syscall_64+0x74/0x140 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 Once we find the page is dirty, we do not wait until it's clean, rather we use write_one_page() to write it back Link: http://lkml.kernel.org/r/20180829074740.9438-1-lchen@suse.com [lchen@suse.com: update comments] Link: http://lkml.kernel.org/r/20180830075041.14879-1-lchen@suse.com [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Larry Chen Acked-by: Changwei Ge Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- fs/ocfs2/refcounttree.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 1b1283f07941b..824f407df1db1 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -2946,6 +2946,7 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle, if (map_end & (PAGE_SIZE - 1)) to = map_end & (PAGE_SIZE - 1); +retry: page = find_or_create_page(mapping, page_index, GFP_NOFS); if (!page) { ret = -ENOMEM; @@ -2954,11 +2955,18 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle, } /* - * In case PAGE_SIZE <= CLUSTER_SIZE, This page - * can't be dirtied before we CoW it out. + * In case PAGE_SIZE <= CLUSTER_SIZE, we do not expect a dirty + * page, so write it back. */ - if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize) - BUG_ON(PageDirty(page)); + if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize) { + if (PageDirty(page)) { + /* + * write_on_page will unlock the page on return + */ + ret = write_one_page(page); + goto retry; + } + } if (!PageUptodate(page)) { ret = block_read_full_page(page, ocfs2_get_block); -- GitLab From d1efab095c20ee5051d3273123d950da4e88361e Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Sun, 29 Oct 2017 10:46:39 +0200 Subject: [PATCH 1129/2269] iwlwifi: mvm: check for short GI only for OFDM [ Upstream commit 4c59ff5a9a9c54cc26c807dc2fa6933f7e9fa4ef ] This bit will be used in CCK to indicate short preamble. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/rx.c | 3 ++- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c index 2d14a58cbdd7e..c73e4be9bde3f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c @@ -439,7 +439,8 @@ void iwl_mvm_rx_rx_mpdu(struct iwl_mvm *mvm, struct napi_struct *napi, rx_status->bw = RATE_INFO_BW_160; break; } - if (rate_n_flags & RATE_MCS_SGI_MSK) + if (!(rate_n_flags & RATE_MCS_CCK_MSK) && + rate_n_flags & RATE_MCS_SGI_MSK) rx_status->enc_flags |= RX_ENC_FLAG_SHORT_GI; if (rate_n_flags & RATE_HT_MCS_GF_MSK) rx_status->enc_flags |= RX_ENC_FLAG_HT_GF; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index e2196dc35dc6c..8ba8c70571fb7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -981,7 +981,9 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, rx_status->bw = RATE_INFO_BW_160; break; } - if (rate_n_flags & RATE_MCS_SGI_MSK) + + if (!(rate_n_flags & RATE_MCS_CCK_MSK) && + rate_n_flags & RATE_MCS_SGI_MSK) rx_status->enc_flags |= RX_ENC_FLAG_SHORT_GI; if (rate_n_flags & RATE_HT_MCS_GF_MSK) rx_status->enc_flags |= RX_ENC_FLAG_HT_GF; -- GitLab From 4187fbd60c3993ca4a26b20db218dd4023df841c Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Tue, 31 Oct 2017 15:54:50 +0200 Subject: [PATCH 1130/2269] iwlwifi: dbg: allow wrt collection before ALIVE [ Upstream commit dfd4b08cf44f27587e2053e006e43a1603328006 ] Even if no ALIVE was received, the WRT data can still be collected. Add this. Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 2fa7ec466275d..8390104172410 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -950,7 +950,20 @@ int iwl_fw_dbg_collect_desc(struct iwl_fw_runtime *fwrt, if (trigger) delay = msecs_to_jiffies(le32_to_cpu(trigger->stop_delay)); - if (WARN(fwrt->trans->state == IWL_TRANS_NO_FW, + /* + * If the loading of the FW completed successfully, the next step is to + * get the SMEM config data. Thus, if fwrt->smem_cfg.num_lmacs is non + * zero, the FW was already loaded successully. If the state is "NO_FW" + * in such a case - WARN and exit, since FW may be dead. Otherwise, we + * can try to collect the data, since FW might just not be fully + * loaded (no "ALIVE" yet), and the debug data is accessible. + * + * Corner case: got the FW alive but crashed before getting the SMEM + * config. In such a case, due to HW access problems, we might + * collect garbage. + */ + if (WARN((fwrt->trans->state == IWL_TRANS_NO_FW) && + fwrt->smem_cfg.num_lmacs, "Can't collect dbg data when FW isn't alive\n")) return -EIO; -- GitLab From 18a83c0133928c5162c607a45fe541687adc03b1 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 7 Nov 2017 23:54:17 +0200 Subject: [PATCH 1131/2269] iwlwifi: fix the ALIVE notification layout [ Upstream commit 5cd2d8fc6c6bca979ac5dd8ad0e41153f1f982f9 ] The ucode_major and ucode_minor were swapped. This has no practical consequences since those fields are not used. Same goes for umac_major and umac_minor which were only printed under certain debug flags. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/fw/api/alive.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/alive.h b/drivers/net/wireless/intel/iwlwifi/fw/api/alive.h index 3684a3e180e52..007bfe7656a4a 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/alive.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/alive.h @@ -95,8 +95,8 @@ enum { #define IWL_ALIVE_FLG_RFKILL BIT(0) struct iwl_lmac_alive { - __le32 ucode_minor; __le32 ucode_major; + __le32 ucode_minor; u8 ver_subtype; u8 ver_type; u8 mac; @@ -113,8 +113,8 @@ struct iwl_lmac_alive { } __packed; /* UCODE_ALIVE_NTFY_API_S_VER_3 */ struct iwl_umac_alive { - __le32 umac_minor; /* UMAC version: minor */ __le32 umac_major; /* UMAC version: major */ + __le32 umac_minor; /* UMAC version: minor */ __le32 error_info_addr; /* SRAM address for UMAC error log */ __le32 dbg_print_buff_addr; } __packed; /* UMAC_ALIVE_DATA_API_S_VER_2 */ -- GitLab From e37e383a4e6610b5a78111f88d2a74f2a533f78c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 12 Nov 2017 14:54:23 -0800 Subject: [PATCH 1132/2269] tools/testing/nvdimm: unit test clear-error commands [ Upstream commit fb2a1748355161e050e9f49f1ea9a0ae707a148b ] Validate command parsing in acpi_nfit_ctl for the clear error command. This tests for a crash condition introduced by commit 4b27db7e26cd "acpi, nfit: add support for the _LSI, _LSR, and _LSW label methods". Cc: Vishal Verma Signed-off-by: Dan Williams Signed-off-by: Sasha Levin --- tools/testing/nvdimm/test/nfit.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index bef419d4266df..3ad0b3a3317ba 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -1589,6 +1589,7 @@ static int nfit_ctl_test(struct device *dev) unsigned long mask, cmd_size, offset; union { struct nd_cmd_get_config_size cfg_size; + struct nd_cmd_clear_error clear_err; struct nd_cmd_ars_status ars_stat; struct nd_cmd_ars_cap ars_cap; char buf[sizeof(struct nd_cmd_ars_status) @@ -1767,6 +1768,23 @@ static int nfit_ctl_test(struct device *dev) return -EIO; } + /* test clear error */ + cmd_size = sizeof(cmds.clear_err); + cmds.clear_err = (struct nd_cmd_clear_error) { + .length = 512, + .cleared = 512, + }; + rc = setup_result(cmds.buf, cmd_size); + if (rc) + return rc; + rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_CLEAR_ERROR, + cmds.buf, cmd_size, &cmd_rc); + if (rc < 0 || cmd_rc) { + dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", + __func__, __LINE__, rc, cmd_rc); + return -EIO; + } + return 0; } -- GitLab From 3b6c6d9cbdea50aed8f6bf22575461d8009e3951 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 18 Jan 2018 17:25:30 -0700 Subject: [PATCH 1133/2269] usbip: vhci_hcd: update 'status' file header and format [ Upstream commit 5468099c747240ed97dbb34340fece8ca87be34f ] Commit 2f2d0088eb93 ("usbip: prevent vhci_hcd driver from leaking a socket pointer address") in the /sys/devices/platform/vhci_hcd/status. Fix the header and field alignment to reflect the changes and make it easier to read. Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/usbip/vhci_sysfs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/usb/usbip/vhci_sysfs.c b/drivers/usb/usbip/vhci_sysfs.c index 4a22a9f06d964..eb78983534579 100644 --- a/drivers/usb/usbip/vhci_sysfs.c +++ b/drivers/usb/usbip/vhci_sysfs.c @@ -34,10 +34,10 @@ /* * output example: - * hub port sta spd dev sockfd local_busid - * hs 0000 004 000 00000000 3 1-2.3 + * hub port sta spd dev sockfd local_busid + * hs 0000 004 000 00000000 000003 1-2.3 * ................................................ - * ss 0008 004 000 00000000 4 2-3.4 + * ss 0008 004 000 00000000 000004 2-3.4 * ................................................ * * Output includes socket fd instead of socket pointer address to avoid @@ -61,13 +61,13 @@ static void port_show_vhci(char **out, int hub, int port, struct vhci_device *vd if (vdev->ud.status == VDEV_ST_USED) { *out += sprintf(*out, "%03u %08x ", vdev->speed, vdev->devid); - *out += sprintf(*out, "%u %s", + *out += sprintf(*out, "%06u %s", vdev->ud.sockfd, dev_name(&vdev->udev->dev)); } else { *out += sprintf(*out, "000 00000000 "); - *out += sprintf(*out, "0000000000000000 0-0"); + *out += sprintf(*out, "000000 0-0"); } *out += sprintf(*out, "\n"); @@ -165,7 +165,7 @@ static ssize_t status_show(struct device *dev, int pdev_nr; out += sprintf(out, - "hub port sta spd dev socket local_busid\n"); + "hub port sta spd dev sockfd local_busid\n"); pdev_nr = status_name_to_id(attr->attr.name); if (pdev_nr < 0) -- GitLab From c750773f3942863a37e7415934a646a0da7b1040 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 Nov 2017 14:25:25 +0100 Subject: [PATCH 1134/2269] scsi: aacraid: address UBSAN warning regression [ Upstream commit d18539754d97876503275efc7d00a1901bb0cfad ] As reported by Meelis Roos, my previous patch causes an incorrect calculation of the timeout, through an undefined signed integer overflow: [ 12.228155] UBSAN: Undefined behaviour in drivers/scsi/aacraid/commsup.c:2514:49 [ 12.228229] signed integer overflow: [ 12.228283] 964297611 * 250 cannot be represented in type 'long int' The problem is that doing a multiplication with HZ first and then dividing by USEC_PER_SEC worked correctly for 32-bit microseconds, but not for 32-bit nanoseconds, which would require up to 41 bits. This reworks the calculation to first convert the nanoseconds into jiffies, which should give us the same result as before and not overflow. Unfortunately I did not understand the exact intention of the algorithm, in particular the part where we add half a second, so it's possible that there is still a preexisting problem in this function. I added a comment that this would be handled more nicely using usleep_range(), which generally works better for waking up at a particular time than the current schedule_timeout() based implementation. I did not feel comfortable trying to implement that without being sure what the intent is here though. Fixes: 820f18865912 ("scsi: aacraid: use timespec64 instead of timeval") Tested-by: Meelis Roos Signed-off-by: Arnd Bergmann Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/aacraid/commsup.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index 998788a967be8..3e38bae6ecde2 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -2506,8 +2506,8 @@ int aac_command_thread(void *data) /* Synchronize our watches */ if (((NSEC_PER_SEC - (NSEC_PER_SEC / HZ)) > now.tv_nsec) && (now.tv_nsec > (NSEC_PER_SEC / HZ))) - difference = (((NSEC_PER_SEC - now.tv_nsec) * HZ) - + NSEC_PER_SEC / 2) / NSEC_PER_SEC; + difference = HZ + HZ / 2 - + now.tv_nsec / (NSEC_PER_SEC / HZ); else { if (now.tv_nsec > NSEC_PER_SEC / 2) ++now.tv_sec; @@ -2531,6 +2531,10 @@ int aac_command_thread(void *data) if (kthread_should_stop()) break; + /* + * we probably want usleep_range() here instead of the + * jiffies computation + */ schedule_timeout(difference); if (kthread_should_stop()) -- GitLab From 8c954368de6915c4dcbbceca279758ab654bad37 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Thu, 21 Dec 2017 17:38:27 +0200 Subject: [PATCH 1135/2269] IB/ipoib: Fix lockdep issue found on ipoib_ib_dev_heavy_flush [ Upstream commit 1f80bd6a6cc8358b81194e1f5fc16449947396ec ] The locking order of vlan_rwsem (LOCK A) and then rtnl (LOCK B), contradicts other flows such as ipoib_open possibly causing a deadlock. To prevent this deadlock heavy flush is called with RTNL locked and only then tries to acquire vlan_rwsem. This deadlock is possible only when there are child interfaces. [ 140.941758] ====================================================== [ 140.946276] WARNING: possible circular locking dependency detected [ 140.950950] 4.15.0-rc1+ #9 Tainted: G O [ 140.954797] ------------------------------------------------------ [ 140.959424] kworker/u32:1/146 is trying to acquire lock: [ 140.963450] (rtnl_mutex){+.+.}, at: [] __ipoib_ib_dev_flush+0x2da/0x4e0 [ib_ipoib] [ 140.970006] but task is already holding lock: [ 140.975141] (&priv->vlan_rwsem){++++}, at: [] __ipoib_ib_dev_flush+0x51/0x4e0 [ib_ipoib] [ 140.982105] which lock already depends on the new lock. [ 140.990023] the existing dependency chain (in reverse order) is: [ 140.998650] -> #1 (&priv->vlan_rwsem){++++}: [ 141.005276] down_read+0x4d/0xb0 [ 141.009560] ipoib_open+0xad/0x120 [ib_ipoib] [ 141.014400] __dev_open+0xcb/0x140 [ 141.017919] __dev_change_flags+0x1a4/0x1e0 [ 141.022133] dev_change_flags+0x23/0x60 [ 141.025695] devinet_ioctl+0x704/0x7d0 [ 141.029156] sock_do_ioctl+0x20/0x50 [ 141.032526] sock_ioctl+0x221/0x300 [ 141.036079] do_vfs_ioctl+0xa6/0x6d0 [ 141.039656] SyS_ioctl+0x74/0x80 [ 141.042811] entry_SYSCALL_64_fastpath+0x1f/0x96 [ 141.046891] -> #0 (rtnl_mutex){+.+.}: [ 141.051701] lock_acquire+0xd4/0x220 [ 141.055212] __mutex_lock+0x88/0x970 [ 141.058631] __ipoib_ib_dev_flush+0x2da/0x4e0 [ib_ipoib] [ 141.063160] __ipoib_ib_dev_flush+0x71/0x4e0 [ib_ipoib] [ 141.067648] process_one_work+0x1f5/0x610 [ 141.071429] worker_thread+0x4a/0x3f0 [ 141.074890] kthread+0x141/0x180 [ 141.078085] ret_from_fork+0x24/0x30 [ 141.081559] other info that might help us debug this: [ 141.088967] Possible unsafe locking scenario: [ 141.094280] CPU0 CPU1 [ 141.097953] ---- ---- [ 141.101640] lock(&priv->vlan_rwsem); [ 141.104771] lock(rtnl_mutex); [ 141.109207] lock(&priv->vlan_rwsem); [ 141.114032] lock(rtnl_mutex); [ 141.116800] *** DEADLOCK *** Fixes: b4b678b06f6e ("IB/ipoib: Grab rtnl lock on heavy flush when calling ndo_open/stop") Signed-off-by: Alex Vesker Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index c97384c914a42..d77e8e2ae05f2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -1203,13 +1203,10 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ipoib_ib_dev_down(dev); if (level == IPOIB_FLUSH_HEAVY) { - rtnl_lock(); if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) ipoib_ib_dev_stop(dev); - result = ipoib_ib_dev_open(dev); - rtnl_unlock(); - if (result) + if (ipoib_ib_dev_open(dev)) return; if (netif_queue_stopped(dev)) @@ -1249,7 +1246,9 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, flush_heavy); + rtnl_lock(); __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY, 0); + rtnl_unlock(); } void ipoib_ib_dev_cleanup(struct net_device *dev) -- GitLab From 18addd960fe854df99727139040535e72addc6ea Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Mon, 9 Oct 2017 09:11:32 -0400 Subject: [PATCH 1136/2269] IB/rxe: put the pool on allocation failure [ Upstream commit 6b9f8970cd30929cb6b372fa44fa66da9e59c650 ] If the allocation of elem fails, it is not sufficient to simply check for NULL and return. We need to also put our reference on the pool or else we will leave the pool with a permanent ref count and we will never be able to free it. Fixes: 4831ca9e4a8e ("IB/rxe: check for allocation failure on elem") Suggested-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/rxe_pool.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 3b4916680018a..b4a8acc7bb7d6 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -394,23 +394,25 @@ void *rxe_alloc(struct rxe_pool *pool) kref_get(&pool->rxe->ref_cnt); - if (atomic_inc_return(&pool->num_elem) > pool->max_elem) { - atomic_dec(&pool->num_elem); - rxe_dev_put(pool->rxe); - rxe_pool_put(pool); - return NULL; - } + if (atomic_inc_return(&pool->num_elem) > pool->max_elem) + goto out_put_pool; elem = kmem_cache_zalloc(pool_cache(pool), (pool->flags & RXE_POOL_ATOMIC) ? GFP_ATOMIC : GFP_KERNEL); if (!elem) - return NULL; + goto out_put_pool; elem->pool = pool; kref_init(&elem->ref_cnt); return elem; + +out_put_pool: + atomic_dec(&pool->num_elem); + rxe_dev_put(pool->rxe); + rxe_pool_put(pool); + return NULL; } void rxe_elem_release(struct kref *kref) -- GitLab From 049fee28bba39dcf1bcec5fa56fa2a8eefd2e853 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 19 Apr 2018 12:52:06 +0200 Subject: [PATCH 1137/2269] s390/qeth: fix error handling in adapter command callbacks [ Upstream commit 686c97ee29c886ee07d17987d0059874c5c3b5af ] Make sure to check both return code fields before(!) processing the command response. Otherwise we risk operating on invalid data. This matches an earlier fix for SETASSPARMS commands, see commit ad3cbf613329 ("s390/qeth: fix error handling in checksum cmd callback"). Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/s390/net/qeth_core_main.c | 85 ++++++++++++++----------------- 1 file changed, 37 insertions(+), 48 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 4f2747cd15a62..169dd7127f9ef 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -3001,28 +3001,23 @@ static int qeth_send_startlan(struct qeth_card *card) return rc; } -static int qeth_default_setadapterparms_cb(struct qeth_card *card, - struct qeth_reply *reply, unsigned long data) +static int qeth_setadpparms_inspect_rc(struct qeth_ipa_cmd *cmd) { - struct qeth_ipa_cmd *cmd; - - QETH_CARD_TEXT(card, 4, "defadpcb"); - - cmd = (struct qeth_ipa_cmd *) data; - if (cmd->hdr.return_code == 0) + if (!cmd->hdr.return_code) cmd->hdr.return_code = cmd->data.setadapterparms.hdr.return_code; - return 0; + return cmd->hdr.return_code; } static int qeth_query_setadapterparms_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; QETH_CARD_TEXT(card, 3, "quyadpcb"); + if (qeth_setadpparms_inspect_rc(cmd)) + return 0; - cmd = (struct qeth_ipa_cmd *) data; if (cmd->data.setadapterparms.data.query_cmds_supp.lan_type & 0x7f) { card->info.link_type = cmd->data.setadapterparms.data.query_cmds_supp.lan_type; @@ -3030,7 +3025,7 @@ static int qeth_query_setadapterparms_cb(struct qeth_card *card, } card->options.adp.supported_funcs = cmd->data.setadapterparms.data.query_cmds_supp.supported_cmds; - return qeth_default_setadapterparms_cb(card, reply, (unsigned long)cmd); + return 0; } static struct qeth_cmd_buffer *qeth_get_adapter_cmd(struct qeth_card *card, @@ -3122,22 +3117,20 @@ EXPORT_SYMBOL_GPL(qeth_query_ipassists); static int qeth_query_switch_attributes_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; - struct qeth_switch_info *sw_info; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; struct qeth_query_switch_attributes *attrs; + struct qeth_switch_info *sw_info; QETH_CARD_TEXT(card, 2, "qswiatcb"); - cmd = (struct qeth_ipa_cmd *) data; - sw_info = (struct qeth_switch_info *)reply->param; - if (cmd->data.setadapterparms.hdr.return_code == 0) { - attrs = &cmd->data.setadapterparms.data.query_switch_attributes; - sw_info->capabilities = attrs->capabilities; - sw_info->settings = attrs->settings; - QETH_CARD_TEXT_(card, 2, "%04x%04x", sw_info->capabilities, - sw_info->settings); - } - qeth_default_setadapterparms_cb(card, reply, (unsigned long) cmd); + if (qeth_setadpparms_inspect_rc(cmd)) + return 0; + sw_info = (struct qeth_switch_info *)reply->param; + attrs = &cmd->data.setadapterparms.data.query_switch_attributes; + sw_info->capabilities = attrs->capabilities; + sw_info->settings = attrs->settings; + QETH_CARD_TEXT_(card, 2, "%04x%04x", sw_info->capabilities, + sw_info->settings); return 0; } @@ -4188,16 +4181,13 @@ EXPORT_SYMBOL_GPL(qeth_do_send_packet); static int qeth_setadp_promisc_mode_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; struct qeth_ipacmd_setadpparms *setparms; QETH_CARD_TEXT(card, 4, "prmadpcb"); - cmd = (struct qeth_ipa_cmd *) data; setparms = &(cmd->data.setadapterparms); - - qeth_default_setadapterparms_cb(card, reply, (unsigned long)cmd); - if (cmd->hdr.return_code) { + if (qeth_setadpparms_inspect_rc(cmd)) { QETH_CARD_TEXT_(card, 4, "prmrc%x", cmd->hdr.return_code); setparms->data.mode = SET_PROMISC_MODE_OFF; } @@ -4267,11 +4257,12 @@ EXPORT_SYMBOL_GPL(qeth_get_stats); static int qeth_setadpparms_change_macaddr_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; QETH_CARD_TEXT(card, 4, "chgmaccb"); + if (qeth_setadpparms_inspect_rc(cmd)) + return 0; - cmd = (struct qeth_ipa_cmd *) data; if (!card->options.layer2 || !(card->info.mac_bits & QETH_LAYER2_MAC_READ)) { memcpy(card->dev->dev_addr, @@ -4279,7 +4270,6 @@ static int qeth_setadpparms_change_macaddr_cb(struct qeth_card *card, OSA_ADDR_LEN); card->info.mac_bits |= QETH_LAYER2_MAC_READ; } - qeth_default_setadapterparms_cb(card, reply, (unsigned long) cmd); return 0; } @@ -4310,13 +4300,15 @@ EXPORT_SYMBOL_GPL(qeth_setadpparms_change_macaddr); static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; struct qeth_set_access_ctrl *access_ctrl_req; int fallback = *(int *)reply->param; QETH_CARD_TEXT(card, 4, "setaccb"); + if (cmd->hdr.return_code) + return 0; + qeth_setadpparms_inspect_rc(cmd); - cmd = (struct qeth_ipa_cmd *) data; access_ctrl_req = &cmd->data.setadapterparms.data.set_access_ctrl; QETH_DBF_TEXT_(SETUP, 2, "setaccb"); QETH_DBF_TEXT_(SETUP, 2, "%s", card->gdev->dev.kobj.name); @@ -4389,7 +4381,6 @@ static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card, card->options.isolation = card->options.prev_isolation; break; } - qeth_default_setadapterparms_cb(card, reply, (unsigned long) cmd); return 0; } @@ -4677,14 +4668,15 @@ out: static int qeth_setadpparms_query_oat_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *)data; struct qeth_qoat_priv *priv; char *resdata; int resdatalen; QETH_CARD_TEXT(card, 3, "qoatcb"); + if (qeth_setadpparms_inspect_rc(cmd)) + return 0; - cmd = (struct qeth_ipa_cmd *)data; priv = (struct qeth_qoat_priv *)reply->param; resdatalen = cmd->data.setadapterparms.hdr.cmdlength; resdata = (char *)data + 28; @@ -4778,21 +4770,18 @@ out: static int qeth_query_card_info_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct carrier_info *carrier_info = (struct carrier_info *)reply->param; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *)data; struct qeth_query_card_info *card_info; - struct carrier_info *carrier_info; QETH_CARD_TEXT(card, 2, "qcrdincb"); - carrier_info = (struct carrier_info *)reply->param; - cmd = (struct qeth_ipa_cmd *)data; - card_info = &cmd->data.setadapterparms.data.card_info; - if (cmd->data.setadapterparms.hdr.return_code == 0) { - carrier_info->card_type = card_info->card_type; - carrier_info->port_mode = card_info->port_mode; - carrier_info->port_speed = card_info->port_speed; - } + if (qeth_setadpparms_inspect_rc(cmd)) + return 0; - qeth_default_setadapterparms_cb(card, reply, (unsigned long) cmd); + card_info = &cmd->data.setadapterparms.data.card_info; + carrier_info->card_type = card_info->card_type; + carrier_info->port_mode = card_info->port_mode; + carrier_info->port_speed = card_info->port_speed; return 0; } -- GitLab From 71a9d1240a30504af52fdd48995a3c9ef2cc467c Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Thu, 12 Apr 2018 09:49:11 +0000 Subject: [PATCH 1138/2269] net/mlx5: Fix mlx5_get_vector_affinity function [ Upstream commit 6082d9c9c94a408d7409b5f2e4e42ac9e8b16d0d ] Adding the vector offset when calling to mlx5_vector2eqn() is wrong. This is because mlx5_vector2eqn() checks if EQ index is equal to vector number and the fact that the internal completion vectors that mlx5 allocates don't get an EQ index. The second problem here is that using effective_affinity_mask gives the same CPU for different vectors. This leads to unmapped queues when calling it from blk_mq_rdma_map_queues(). This doesn't happen when using affinity_hint mask. Fixes: 2572cf57d75a ("mlx5: fix mlx5_get_vector_affinity to start from completion vector 0") Fixes: 05e0cc84e00c ("net/mlx5: Fix get vector affinity helper function") Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/main.c | 2 +- include/linux/mlx5/driver.h | 12 +++--------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ab70194a73db3..c3a4f5d92391f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3911,7 +3911,7 @@ mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - return mlx5_get_vector_affinity(dev->mdev, comp_vector); + return mlx5_get_vector_affinity_hint(dev->mdev, comp_vector); } static void *mlx5_ib_add(struct mlx5_core_dev *mdev) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c4d19e77fea8b..5eff332092bcc 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1193,25 +1193,19 @@ enum { }; static inline const struct cpumask * -mlx5_get_vector_affinity(struct mlx5_core_dev *dev, int vector) +mlx5_get_vector_affinity_hint(struct mlx5_core_dev *dev, int vector) { - const struct cpumask *mask; struct irq_desc *desc; unsigned int irq; int eqn; int err; - err = mlx5_vector2eqn(dev, MLX5_EQ_VEC_COMP_BASE + vector, &eqn, &irq); + err = mlx5_vector2eqn(dev, vector, &eqn, &irq); if (err) return NULL; desc = irq_to_desc(irq); -#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK - mask = irq_data_get_effective_affinity_mask(&desc->irq_data); -#else - mask = desc->irq_common_data.affinity; -#endif - return mask; + return desc->affinity_hint; } #endif /* MLX5_DRIVER_H */ -- GitLab From aa2a0c23c9b7e6fb17db4c77cbe0967512f5d274 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Wed, 14 Feb 2018 12:17:47 +0000 Subject: [PATCH 1139/2269] powerpc/pseries: Add empty update_numa_cpu_lookup_table() for NUMA=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c1e150ceb61e4a585bad156da15c33bfe89f5858 ] When CONFIG_NUMA is not set, the build fails with: arch/powerpc/platforms/pseries/hotplug-cpu.c:335:4: error: déclaration implicite de la fonction « update_numa_cpu_lookup_table » So we have to add update_numa_cpu_lookup_table() as an empty function when CONFIG_NUMA is not set. Fixes: 1d9a090783be ("powerpc/numa: Invalidate numa_cpu_lookup_table on cpu remove") Signed-off-by: Corentin Labbe Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/topology.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index d5f2ee882f748..66c72b356ac0e 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -81,6 +81,9 @@ static inline int numa_update_cpu_topology(bool cpus_locked) { return 0; } + +static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {} + #endif /* CONFIG_NUMA */ #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR) -- GitLab From 4fbd26a1484c5819208b87e54d35515fd678a140 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Tue, 13 Feb 2018 14:50:50 +0100 Subject: [PATCH 1140/2269] dm integrity: fail early if required HMAC key is not available [ Upstream commit e16b4f99f0f79682a7efe191a8ce694d87ca9fc8 ] Since crypto API commit 9fa68f62004 ("crypto: hash - prevent using keyed hashes without setting key") dm-integrity cannot use keyed algorithms without the key being set. The dm-integrity recognizes this too late (during use of HMAC), so it allows creation and formatting of superblock, but the device is in fact unusable. Fix it by detecting the key requirement in integrity table constructor. Signed-off-by: Milan Broz Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-integrity.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 898286ed47a10..b10e4c5641ea3 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2547,6 +2547,9 @@ static int get_mac(struct crypto_shash **hash, struct alg_spec *a, char **error, *error = error_key; return r; } + } else if (crypto_shash_get_flags(*hash) & CRYPTO_TFM_NEED_KEY) { + *error = error_key; + return -ENOKEY; } } -- GitLab From 8d9d7f018464129de99ba5c63e00866a3a88750c Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 20 Mar 2018 09:44:53 +0800 Subject: [PATCH 1141/2269] net: phy: realtek: Use the dummy stubs for MMD register access for rtl8211b [ Upstream commit 0231b1a074c672f8c00da00a57144072890d816b ] The Ethernet on mpc8315erdb is broken since commit b6b5e8a69118 ("gianfar: Disable EEE autoneg by default"). The reason is that even though the rtl8211b doesn't support the MMD extended registers access, it does return some random values if we trying to access the MMD register via indirect method. This makes it seem that the EEE is supported by this phy device. And the subsequent writing to the MMD registers does cause the phy malfunction. So use the dummy stubs for the MMD register access to fix this issue. Fixes: b6b5e8a69118 ("gianfar: Disable EEE autoneg by default") Signed-off-by: Kevin Hao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 9cbe645e3d89c..7d38af5ed4b52 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -138,6 +138,8 @@ static struct phy_driver realtek_drvs[] = { .read_status = &genphy_read_status, .ack_interrupt = &rtl821x_ack_interrupt, .config_intr = &rtl8211b_config_intr, + .read_mmd = &genphy_read_mmd_unsupported, + .write_mmd = &genphy_write_mmd_unsupported, }, { .phy_id = 0x001cc914, .name = "RTL8211DN Gigabit Ethernet", -- GitLab From 8b882dbba71768ff1d9a70e85488a9b5210f7b8f Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 20 Mar 2018 09:44:52 +0800 Subject: [PATCH 1142/2269] net: phy: Add general dummy stubs for MMD register access [ Upstream commit 5df7af85ecd88e8b5f1f31d6456c3cf38a8bbdda ] For some phy devices, even though they don't support the MMD extended register access, it does have some side effect if we are trying to read/write the MMD registers via indirect method. So introduce general dummy stubs for MMD register access which these devices can use to avoid such side effect. Fixes: b6b5e8a69118 ("gianfar: Disable EEE autoneg by default") Signed-off-by: Kevin Hao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/phy_device.c | 17 +++++++++++++++++ include/linux/phy.h | 4 ++++ 2 files changed, 21 insertions(+) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index a174d05a97522..fe76e2c4022af 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1641,6 +1641,23 @@ int genphy_config_init(struct phy_device *phydev) } EXPORT_SYMBOL(genphy_config_init); +/* This is used for the phy device which doesn't support the MMD extended + * register access, but it does have side effect when we are trying to access + * the MMD register via indirect method. + */ +int genphy_read_mmd_unsupported(struct phy_device *phdev, int devad, u16 regnum) +{ + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(genphy_read_mmd_unsupported); + +int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum, + u16 regnum, u16 val) +{ + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(genphy_write_mmd_unsupported); + int genphy_suspend(struct phy_device *phydev) { int value; diff --git a/include/linux/phy.h b/include/linux/phy.h index dca9e926b88f9..efc04c2d92c93 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -879,6 +879,10 @@ static inline int genphy_no_soft_reset(struct phy_device *phydev) { return 0; } +int genphy_read_mmd_unsupported(struct phy_device *phdev, int devad, + u16 regnum); +int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum, + u16 regnum, u16 val); /* Clause 45 PHY */ int genphy_c45_restart_aneg(struct phy_device *phydev); -- GitLab From 651f11a83af35ea3c57d53dcb8a4a6640bb3fb5d Mon Sep 17 00:00:00 2001 From: Shay Agroskin Date: Wed, 27 Jun 2018 15:43:07 +0300 Subject: [PATCH 1143/2269] net/mlx5e: Refine ets validation function [ Upstream commit e279d634f3d57452eb106a0c0e99a6add3fba1a6 ] Removed an error message received when configuring ETS total bandwidth to be zero. Our hardware doesn't support such configuration, so we shall reject it in the driver. Nevertheless, we removed the error message in order to eliminate error messages caused by old userspace tools who try to pass such configuration. Fixes: ff0891915cd7 ("net/mlx5e: Fix ETS BW check") Signed-off-by: Shay Agroskin Reviewed-by: Huy Nguyen Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 9d64d0759ee97..a5dd99aaf3212 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -257,7 +257,8 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) } static int mlx5e_dbcnl_validate_ets(struct net_device *netdev, - struct ieee_ets *ets) + struct ieee_ets *ets, + bool zero_sum_allowed) { bool have_ets_tc = false; int bw_sum = 0; @@ -282,8 +283,9 @@ static int mlx5e_dbcnl_validate_ets(struct net_device *netdev, } if (have_ets_tc && bw_sum != 100) { - netdev_err(netdev, - "Failed to validate ETS: BW sum is illegal\n"); + if (bw_sum || (!bw_sum && !zero_sum_allowed)) + netdev_err(netdev, + "Failed to validate ETS: BW sum is illegal\n"); return -EINVAL; } return 0; @@ -298,7 +300,7 @@ static int mlx5e_dcbnl_ieee_setets(struct net_device *netdev, if (!MLX5_CAP_GEN(priv->mdev, ets)) return -EOPNOTSUPP; - err = mlx5e_dbcnl_validate_ets(netdev, ets); + err = mlx5e_dbcnl_validate_ets(netdev, ets, false); if (err) return err; @@ -477,12 +479,9 @@ static u8 mlx5e_dcbnl_setall(struct net_device *netdev) ets.prio_tc[i] = cee_cfg->prio_to_pg_map[i]; } - err = mlx5e_dbcnl_validate_ets(netdev, &ets); - if (err) { - netdev_err(netdev, - "%s, Failed to validate ETS: %d\n", __func__, err); + err = mlx5e_dbcnl_validate_ets(netdev, &ets, true); + if (err) goto out; - } err = mlx5e_dcbnl_ieee_setets_core(priv, &ets); if (err) { -- GitLab From a972222a6a3004df0ba6295d9ea14617dff0964a Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 20 Mar 2018 21:05:48 +0000 Subject: [PATCH 1144/2269] scsi: qla2xxx: Avoid double completion of abort command [ Upstream commit 3a9910d7b686546dcc9986e790af17e148f1c888 ] qla2x00_tmf_sp_done() now deletes the timer that will run qla2x00_tmf_iocb_timeout(), but doesn't check whether the timer already expired. Check the return value from del_timer() to avoid calling complete() a second time. Fixes: 4440e46d5db7 ("[SCSI] qla2xxx: Add IOCB Abort command asynchronous ...") Fixes: 1514839b3664 ("scsi: qla2xxx: Fix NULL pointer crash due to active ...") Signed-off-by: Ben Hutchings Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 1d42d38f5a45a..0e19f6bc24ffb 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -1365,8 +1365,8 @@ qla24xx_abort_sp_done(void *ptr, int res) srb_t *sp = ptr; struct srb_iocb *abt = &sp->u.iocb_cmd; - del_timer(&sp->u.iocb_cmd.timer); - complete(&abt->u.abt.comp); + if (del_timer(&sp->u.iocb_cmd.timer)) + complete(&abt->u.abt.comp); } int -- GitLab From 6cfb67394a42072981c2884114b3850a3cdd5b2b Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 19 Mar 2018 22:12:53 +0100 Subject: [PATCH 1145/2269] kbuild: set no-integrated-as before incl. arch Makefile [ Upstream commit 0f0e8de334c54c38818a4a5390a39aa09deff5bf ] In order to make sure compiler flag detection for ARM works correctly the no-integrated-as flags need to be set before including the arch specific Makefile. Fixes: cfe17c9bbe6a ("kbuild: move cc-option and cc-disable-warning after incl. arch Makefile") Signed-off-by: Stefan Agner Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 89574ee68d6b4..7f6579d53911c 100644 --- a/Makefile +++ b/Makefile @@ -487,6 +487,8 @@ CLANG_GCC_TC := --gcc-toolchain=$(GCC_TOOLCHAIN) endif KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) +KBUILD_CFLAGS += $(call cc-option, -no-integrated-as) +KBUILD_AFLAGS += $(call cc-option, -no-integrated-as) endif RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register @@ -721,8 +723,6 @@ KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare) # See modpost pattern 2 KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,) KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior) -KBUILD_CFLAGS += $(call cc-option, -no-integrated-as) -KBUILD_AFLAGS += $(call cc-option, -no-integrated-as) else # These warnings generated too much noise in a regular build. -- GitLab From 579493b9f689910b49c071d0acdec43c4f447d65 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sun, 25 Feb 2018 13:39:51 +0200 Subject: [PATCH 1146/2269] IB/mlx5: Avoid passing an invalid QP type to firmware [ Upstream commit e7b169f34403becd3c9fd3b6e46614ab788f2187 ] During QP creation, the mlx5 driver translates the QP type to an internal value which is passed on to FW. There was no check to make sure that the translated value is valid, and -EINVAL was coerced into the mailbox command. Current firmware refuses this as an invalid QP type, but future/past firmware may do something else. Fixes: 09a7d9eca1a6c ('{net,IB}/mlx5: QP/XRCD commands via mlx5 ifc') Reviewed-by: Ilya Lesokhin Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/qp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index ef9ee6c328a1d..dfc1900551673 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1527,6 +1527,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, u32 uidx = MLX5_IB_DEFAULT_UIDX; struct mlx5_ib_create_qp ucmd; struct mlx5_ib_qp_base *base; + int mlx5_st; void *qpc; u32 *in; int err; @@ -1535,6 +1536,10 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); + mlx5_st = to_mlx5_st(init_attr->qp_type); + if (mlx5_st < 0) + return -EINVAL; + if (init_attr->rwq_ind_tbl) { if (!udata) return -ENOSYS; @@ -1688,7 +1693,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); - MLX5_SET(qpc, qpc, st, to_mlx5_st(init_attr->qp_type)); + MLX5_SET(qpc, qpc, st, mlx5_st); MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR) -- GitLab From b39ac54215190bc178ae7de799e74d327a3c1a33 Mon Sep 17 00:00:00 2001 From: Marcel Ziswiler Date: Thu, 22 Feb 2018 15:38:25 +0100 Subject: [PATCH 1147/2269] ARM: tegra: Fix ULPI regression on Tegra20 [ Upstream commit 4c9a27a6c66d4427f3cba4019d4ba738fe99fa87 ] Since commit f8f8f1d04494 ("clk: Don't touch hardware when reparenting during registration") ULPI has been broken on Tegra20 leading to the following error message during boot: [ 1.974698] ulpi_phy_power_on: ulpi write failed [ 1.979384] tegra-ehci c5004000.usb: Failed to power on the phy [ 1.985434] tegra-ehci: probe of c5004000.usb failed with error -110 Debugging through the changes and finally also consulting the TRM revealed that rather than the CDEV2 clock off OSC requiring such pin muxing actually the PLL_P_OUT4 clock is in use. It looks like so far it just worked by chance of that one having been enabled which Stephen's commit now changed when reparenting sclk away from pll_p_out4 leaving that one disabled. Fix this by properly assigning the PLL_P_OUT4 clock as the ULPI PHY clock. Signed-off-by: Marcel Ziswiler Reviewed-by: Dmitry Osipenko Reviewed-by: Rob Herring Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- arch/arm/boot/dts/tegra20.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi index 914f59166a995..2780e68a853bf 100644 --- a/arch/arm/boot/dts/tegra20.dtsi +++ b/arch/arm/boot/dts/tegra20.dtsi @@ -706,7 +706,7 @@ phy_type = "ulpi"; clocks = <&tegra_car TEGRA20_CLK_USB2>, <&tegra_car TEGRA20_CLK_PLL_U>, - <&tegra_car TEGRA20_CLK_CDEV2>; + <&tegra_car TEGRA20_CLK_PLL_P_OUT4>; clock-names = "reg", "pll_u", "ulpi-link"; resets = <&tegra_car 58>, <&tegra_car 22>; reset-names = "usb", "utmi-pads"; -- GitLab From 5c097f55e980d5578bec4e5d99759ead834a6358 Mon Sep 17 00:00:00 2001 From: James Chapman Date: Wed, 3 Jan 2018 22:48:06 +0000 Subject: [PATCH 1148/2269] l2tp: remove configurable payload offset [ Upstream commit 900631ee6a2651dc4fbaecb8ef9fa5f1e3378853 ] If L2TP_ATTR_OFFSET is set to a non-zero value in L2TPv3 tunnels, it results in L2TPv3 packets being transmitted which might not be compliant with the L2TPv3 RFC. This patch has l2tp ignore the offset setting and send all packets with no offset. In more detail: L2TPv2 supports a variable offset from the L2TPv2 header to the payload. The offset value is indicated by an optional field in the L2TP header. Our L2TP implementation already detects the presence of the optional offset and skips that many bytes when handling data received packets. All transmitted packets are always transmitted with no offset. L2TPv3 has no optional offset field in the L2TPv3 packet header. Instead, L2TPv3 defines optional fields in a "Layer-2 Specific Sublayer". At the time when the original L2TP code was written, there was talk at IETF of offset being implemented in a new Layer-2 Specific Sublayer. A L2TP_ATTR_OFFSET netlink attribute was added so that this offset could be configured and the intention was to allow it to be also used to set the tx offset for L2TPv2. However, no L2TPv3 offset was ever specified and the L2TP_ATTR_OFFSET parameter was forgotten about. Setting L2TP_ATTR_OFFSET results in L2TPv3 packets being transmitted with the specified number of bytes padding between L2TPv3 header and payload. This is not compliant with L2TPv3 RFC3931. This change removes the configurable offset altogether while retaining L2TP_ATTR_OFFSET for backwards compatibility. Any L2TP_ATTR_OFFSET value is ignored. Signed-off-by: James Chapman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/l2tp/l2tp_core.c | 14 ++++---------- net/l2tp/l2tp_core.h | 3 --- net/l2tp/l2tp_debugfs.c | 4 ++-- net/l2tp/l2tp_netlink.c | 3 --- 4 files changed, 6 insertions(+), 18 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 5c87f1d3e525c..33ea389ee0158 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -808,10 +808,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, } } - /* Session data offset is handled differently for L2TPv2 and - * L2TPv3. For L2TPv2, there is an optional 16-bit value in - * the header. For L2TPv3, the offset is negotiated using AVPs - * in the session setup control protocol. + /* Session data offset is defined only for L2TPv2 and is + * indicated by an optional 16-bit value in the header. */ if (tunnel->version == L2TP_HDR_VER_2) { /* If offset bit set, skip it. */ @@ -819,8 +817,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, offset = ntohs(*(__be16 *)ptr); ptr += 2 + offset; } - } else - ptr += session->offset; + } offset = ptr - optr; if (!pskb_may_pull(skb, offset)) @@ -1104,8 +1101,6 @@ static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf) } bufp += session->l2specific_len; } - if (session->offset) - bufp += session->offset; return bufp - optr; } @@ -1779,7 +1774,7 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version) if (session->send_seq) session->hdr_len += 4; } else { - session->hdr_len = 4 + session->cookie_len + session->l2specific_len + session->offset; + session->hdr_len = 4 + session->cookie_len + session->l2specific_len; if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP) session->hdr_len += 4; } @@ -1830,7 +1825,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn session->recv_seq = cfg->recv_seq; session->lns_mode = cfg->lns_mode; session->reorder_timeout = cfg->reorder_timeout; - session->offset = cfg->offset; session->l2specific_type = cfg->l2specific_type; session->l2specific_len = cfg->l2specific_len; session->cookie_len = cfg->cookie_len; diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 9e2f1fda1b037..0a58c0754526a 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -59,7 +59,6 @@ struct l2tp_session_cfg { int debug; /* bitmask of debug message * categories */ u16 vlan_id; /* VLAN pseudowire only */ - u16 offset; /* offset to payload */ u16 l2specific_len; /* Layer 2 specific length */ u16 l2specific_type; /* Layer 2 specific type */ u8 cookie[8]; /* optional cookie */ @@ -86,8 +85,6 @@ struct l2tp_session { int cookie_len; u8 peer_cookie[8]; int peer_cookie_len; - u16 offset; /* offset from end of L2TP header - to beginning of data */ u16 l2specific_len; u16 l2specific_type; u16 hdr_len; diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index 53bae54c4d6e8..534cad03b9e96 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -180,8 +180,8 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v) session->lns_mode ? "LNS" : "LAC", session->debug, jiffies_to_msecs(session->reorder_timeout)); - seq_printf(m, " offset %hu l2specific %hu/%hu\n", - session->offset, session->l2specific_type, session->l2specific_len); + seq_printf(m, " offset 0 l2specific %hu/%hu\n", + session->l2specific_type, session->l2specific_len); if (session->cookie_len) { seq_printf(m, " cookie %02x%02x%02x%02x", session->cookie[0], session->cookie[1], diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index c28223d8092b1..001797ce4084a 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -549,9 +549,6 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf } if (tunnel->version > 2) { - if (info->attrs[L2TP_ATTR_OFFSET]) - cfg.offset = nla_get_u16(info->attrs[L2TP_ATTR_OFFSET]); - if (info->attrs[L2TP_ATTR_DATA_SEQ]) cfg.data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]); -- GitLab From 8f041940aaf2674dcccd15753d68ff885f90d5e7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 5 Apr 2018 14:57:11 +0200 Subject: [PATCH 1149/2269] cifs: Use ULL suffix for 64-bit constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3995bbf53bd2047f2720c6fdd4bf38f6d942a0c0 ] On 32-bit (e.g. with m68k-linux-gnu-gcc-4.1): fs/cifs/inode.c: In function ‘simple_hashstr’: fs/cifs/inode.c:713: warning: integer constant is too large for ‘long’ type Fixes: 7ea884c77e5c97f1 ("smb3: Fix root directory when server returns inode number of zero") Signed-off-by: Geert Uytterhoeven Signed-off-by: Steve French Reviewed-by: Aurelien Aptel Signed-off-by: Sasha Levin --- fs/cifs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 2cd0b3053439f..d01cbca84701f 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -712,7 +712,7 @@ cgfi_exit: /* Simple function to return a 64 bit hash of string. Rarely called */ static __u64 simple_hashstr(const char *str) { - const __u64 hash_mult = 1125899906842597L; /* a big enough prime */ + const __u64 hash_mult = 1125899906842597ULL; /* a big enough prime */ __u64 hash = 0; while (*str) -- GitLab From 7d768c84ec311468d7fd61250700ef8b21f8cf6d Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Tue, 20 Mar 2018 09:58:51 -0300 Subject: [PATCH 1150/2269] test_bpf: Fix testing with CONFIG_BPF_JIT_ALWAYS_ON=y on other arches [ Upstream commit 52fda36d63bfc8c8e8ae5eda8eb5ac6f52cd67ed ] Function bpf_fill_maxinsns11 is designed to not be able to be JITed on x86_64. So, it fails when CONFIG_BPF_JIT_ALWAYS_ON=y, and commit 09584b406742 ("bpf: fix selftests/bpf test_kmod.sh failure when CONFIG_BPF_JIT_ALWAYS_ON=y") makes sure that failure is detected on that case. However, it does not fail on other architectures, which have a different JIT compiler design. So, test_bpf has started to fail to load on those. After this fix, test_bpf loads fine on both x86_64 and ppc64el. Fixes: 09584b406742 ("bpf: fix selftests/bpf test_kmod.sh failure when CONFIG_BPF_JIT_ALWAYS_ON=y") Signed-off-by: Thadeu Lima de Souza Cascardo Reviewed-by: Yonghong Song Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- lib/test_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 64701b4c99001..75ebf2bbc2eeb 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -5427,7 +5427,7 @@ static struct bpf_test tests[] = { { "BPF_MAXINSNS: Jump, gap, jump, ...", { }, -#ifdef CONFIG_BPF_JIT_ALWAYS_ON +#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_X86) CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, #else CLASSIC | FLAG_NO_DATA, -- GitLab From 090246ff5f6c38b0219564d0d46ae52cac22e243 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Wed, 28 Feb 2018 19:06:48 +0100 Subject: [PATCH 1151/2269] KVM: x86: Update the exit_qualification access bits while walking an address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ddd6f0e94d3153951580d5b88b9d97c7e26a0e00 ] ... to avoid having a stale value when handling an EPT misconfig for MMIO regions. MMIO regions that are not passed-through to the guest are handled through EPT misconfigs. The first time a certain MMIO page is touched it causes an EPT violation, then KVM marks the EPT entry to cause an EPT misconfig instead. Any subsequent accesses to the entry will generate an EPT misconfig. Things gets slightly complicated with nested guest handling for MMIO regions that are not passed through from L0 (i.e. emulated by L0 user-space). An EPT violation for one of these MMIO regions from L2, exits to L0 hypervisor. L0 would then look at the EPT12 mapping for L1 hypervisor and realize it is not present (or not sufficient to serve the request). Then L0 injects an EPT violation to L1. L1 would then update its EPT mappings. The EXIT_QUALIFICATION value for L1 would come from exit_qualification variable in "struct vcpu". The problem is that this variable is only updated on EPT violation and not on EPT misconfig. So if an EPT violation because of a read happened first, then an EPT misconfig because of a write happened afterwards. The L0 hypervisor will still contain exit_qualification value from the previous read instead of the write and end up injecting an EPT violation to the L1 hypervisor with an out of date EXIT_QUALIFICATION. The EPT violation that is injected from L0 to L1 needs to have the correct EXIT_QUALIFICATION specially for the access bits because the individual access bits for MMIO EPTs are updated only on actual access of this specific type. So for the example above, the L1 hypervisor will keep updating only the read bit in the EPT then resume the L2 guest. The L2 guest would end up causing another exit where the L0 *again* will inject another EPT violation to L1 hypervisor with *again* an out of date exit_qualification which indicates a read and not a write. Then this ping-pong just keeps happening without making any forward progress. The behavior of mapping MMIO regions changed in: commit a340b3e229b24 ("kvm: Map PFN-type memory regions as writable (if possible)") ... where an EPT violation for a read would also fixup the write bits to avoid another EPT violation which by acciddent would fix the bug mentioned above. This commit fixes this situation and ensures that the access bits for the exit_qualifcation is up to date. That ensures that even L1 hypervisor running with a KVM version before the commit mentioned above would still work. ( The description above assumes EPT to be available and used by L1 hypervisor + the L1 hypervisor is passing through the MMIO region to the L2 guest while this MMIO region is emulated by the L0 user-space ). Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: x86@kernel.org Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: KarimAllah Ahmed Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin --- arch/x86/kvm/paging_tmpl.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 5abae72266b77..6288e9d7068e1 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -452,14 +452,21 @@ error: * done by is_rsvd_bits_set() above. * * We set up the value of exit_qualification to inject: - * [2:0] - Derive from [2:0] of real exit_qualification at EPT violation + * [2:0] - Derive from the access bits. The exit_qualification might be + * out of date if it is serving an EPT misconfiguration. * [5:3] - Calculated by the page walk of the guest EPT page tables * [7:8] - Derived from [7:8] of real exit_qualification * * The other bits are set to 0. */ if (!(errcode & PFERR_RSVD_MASK)) { - vcpu->arch.exit_qualification &= 0x187; + vcpu->arch.exit_qualification &= 0x180; + if (write_fault) + vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_WRITE; + if (user_fault) + vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_READ; + if (fetch_fault) + vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_INSTR; vcpu->arch.exit_qualification |= (pte_access & 0x7) << 3; } #endif -- GitLab From c1fdafea627c843099518ebb5ec3fcc9396ae4a8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 15 Mar 2018 14:18:00 -0700 Subject: [PATCH 1152/2269] sparc64: Fix regression in pmdp_invalidate(). [ Upstream commit cfb61b5e3e09f8b49bc4d685429df75f45127adc ] pmdp_invalidate() was changed to update the pmd atomically (to not lose dirty/access bits) and return the original pmd value. However, in doing so, we lost a lot of the essential work that set_pmd_at() does, namely to update hugepage mapping counts and queuing up the batched TLB flush entry. Thus we were not flushing entries out of the TLB when making such PMD changes. Fix this by abstracting the accounting work of set_pmd_at() out into a separate function, and call it from pmdp_establish(). Fixes: a8e654f01cb7 ("sparc64: update pmdp_invalidate() to return old pmd value") Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- arch/sparc/mm/tlb.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 847ddffbf38ad..b5cfab7116514 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -163,13 +163,10 @@ static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr, pte_unmap(pte); } -void set_pmd_at(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp, pmd_t pmd) -{ - pmd_t orig = *pmdp; - - *pmdp = pmd; +static void __set_pmd_acct(struct mm_struct *mm, unsigned long addr, + pmd_t orig, pmd_t pmd) +{ if (mm == &init_mm) return; @@ -219,6 +216,15 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, } } +void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + pmd_t orig = *pmdp; + + *pmdp = pmd; + __set_pmd_acct(mm, addr, orig, pmd); +} + static inline pmd_t pmdp_establish(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t pmd) { @@ -227,6 +233,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, do { old = *pmdp; } while (cmpxchg64(&pmdp->pmd, old.pmd, pmd.pmd) != old.pmd); + __set_pmd_acct(vma->vm_mm, address, old, pmd); return old; } -- GitLab From db9d15988afb923b6e559c8aa3ef56d596f74524 Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Mon, 2 Apr 2018 21:50:06 +0530 Subject: [PATCH 1153/2269] tpm: move the delay_msec increment after sleep in tpm_transmit() [ Upstream commit 92980756979a9c51be0275f395f4e89c42cf199a ] Commit e2fb992d82c6 ("tpm: add retry logic") introduced a new loop to handle the TPM2_RC_RETRY error. The loop retries the command after sleeping for the specified time, which is incremented exponentially in every iteration. Unfortunately, the loop doubles the time before sleeping, causing the initial sleep to be doubled. This patch fixes the initial sleep time. Fixes: commit e2fb992d82c6 ("tpm: add retry logic") Signed-off-by: Nayna Jain Reviewed-by: Mimi Zohar Tested-by: Jarkko Sakkinen Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Sasha Levin --- drivers/char/tpm/tpm-interface.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index a2070ab86c824..89d5915b1a3fc 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -611,12 +611,13 @@ ssize_t tpm_transmit(struct tpm_chip *chip, struct tpm_space *space, rc = be32_to_cpu(header->return_code); if (rc != TPM2_RC_RETRY) break; - delay_msec *= 2; + if (delay_msec > TPM2_DURATION_LONG) { dev_err(&chip->dev, "TPM is in retry loop\n"); break; } tpm_msleep(delay_msec); + delay_msec *= 2; memcpy(buf, save, save_size); } return ret; -- GitLab From 3c0cff34e91e4ea1fdf74df65db98ee455190dfa Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 23 Apr 2018 15:39:23 -0700 Subject: [PATCH 1154/2269] bpf: sockmap, map_release does not hold refcnt for pinned maps [ Upstream commit ba6b8de423f8d0dee48d6030288ed81c03ddf9f0 ] Relying on map_release hook to decrement the reference counts when a map is removed only works if the map is not being pinned. In the pinned case the ref is decremented immediately and the BPF programs released. After this BPF programs may not be in-use which is not what the user would expect. This patch moves the release logic into bpf_map_put_uref() and brings sockmap in-line with how a similar case is handled in prog array maps. Fixes: 3d9e952697de ("bpf: sockmap, fix leaking maps with attached but not detached progs") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- include/linux/bpf.h | 2 +- kernel/bpf/arraymap.c | 3 ++- kernel/bpf/sockmap.c | 4 ++-- kernel/bpf/syscall.c | 4 ++-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5c5be80ce802c..c9d2a1a3ef111 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -27,6 +27,7 @@ struct bpf_map_ops { void (*map_release)(struct bpf_map *map, struct file *map_file); void (*map_free)(struct bpf_map *map); int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key); + void (*map_release_uref)(struct bpf_map *map); /* funcs callable from userspace and from eBPF programs */ void *(*map_lookup_elem)(struct bpf_map *map, void *key); @@ -300,7 +301,6 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value); int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags); int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); -void bpf_fd_array_map_clear(struct bpf_map *map); int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags); int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index f57d0bdf3c9e7..a8f55ea4146b2 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -467,7 +467,7 @@ static u32 prog_fd_array_sys_lookup_elem(void *ptr) } /* decrement refcnt of all bpf_progs that are stored in this map */ -void bpf_fd_array_map_clear(struct bpf_map *map) +static void bpf_fd_array_map_clear(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); int i; @@ -485,6 +485,7 @@ const struct bpf_map_ops prog_array_map_ops = { .map_fd_get_ptr = prog_fd_array_get_ptr, .map_fd_put_ptr = prog_fd_array_put_ptr, .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, + .map_release_uref = bpf_fd_array_map_clear, }; static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 20eaddfa691c2..22991e19c01c8 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -875,7 +875,7 @@ static int sock_map_update_elem(struct bpf_map *map, return err; } -static void sock_map_release(struct bpf_map *map, struct file *map_file) +static void sock_map_release(struct bpf_map *map) { struct bpf_stab *stab = container_of(map, struct bpf_stab, map); struct bpf_prog *orig; @@ -895,7 +895,7 @@ const struct bpf_map_ops sock_map_ops = { .map_get_next_key = sock_map_get_next_key, .map_update_elem = sock_map_update_elem, .map_delete_elem = sock_map_delete_elem, - .map_release = sock_map_release, + .map_release_uref = sock_map_release, }; BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 4e933219fec6b..ea22d0b6a9f0a 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -214,8 +214,8 @@ static void bpf_map_free_deferred(struct work_struct *work) static void bpf_map_put_uref(struct bpf_map *map) { if (atomic_dec_and_test(&map->usercnt)) { - if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) - bpf_fd_array_map_clear(map); + if (map->ops->map_release_uref) + map->ops->map_release_uref(map); } } -- GitLab From 9de255666ce3729cdd50e7defb649b82dbb538f6 Mon Sep 17 00:00:00 2001 From: "Winkler, Tomas" Date: Sat, 7 Apr 2018 19:12:36 +0300 Subject: [PATCH 1155/2269] tpm: tpm_crb: relinquish locality on error path. [ Upstream commit 1fbad3028664e114d210dc65d768947a3a553eaa ] In crb_map_io() function, __crb_request_locality() is called prior to crb_cmd_ready(), but if one of the consecutive function fails the flow bails out instead of trying to relinquish locality. This patch adds goto jump to __crb_relinquish_locality() on the error path. Fixes: 888d867df441 (tpm: cmd_ready command can be issued only after granting locality) Signed-off-by: Tomas Winkler Tested-by: Jarkko Sakkinen Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Sasha Levin --- drivers/char/tpm/tpm_crb.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c index 5c7ce5aaaf6fb..b4ad169836e94 100644 --- a/drivers/char/tpm/tpm_crb.c +++ b/drivers/char/tpm/tpm_crb.c @@ -520,8 +520,10 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv, priv->regs_t = crb_map_res(dev, priv, &io_res, buf->control_address, sizeof(struct crb_regs_tail)); - if (IS_ERR(priv->regs_t)) - return PTR_ERR(priv->regs_t); + if (IS_ERR(priv->regs_t)) { + ret = PTR_ERR(priv->regs_t); + goto out_relinquish_locality; + } /* * PTT HW bug w/a: wake up the device to access @@ -529,7 +531,7 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv, */ ret = __crb_cmd_ready(dev, priv); if (ret) - return ret; + goto out_relinquish_locality; pa_high = ioread32(&priv->regs_t->ctrl_cmd_pa_high); pa_low = ioread32(&priv->regs_t->ctrl_cmd_pa_low); @@ -574,6 +576,8 @@ out: __crb_go_idle(dev, priv); +out_relinquish_locality: + __crb_relinquish_locality(dev, priv, 0); return ret; -- GitLab From a0fcefb70022c7b39a873b16e36fca40eb354ec8 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Thu, 21 Jun 2018 14:00:21 +0100 Subject: [PATCH 1156/2269] xen-netfront: Update features after registering netdev [ Upstream commit 45c8184c1bed1ca8a7f02918552063a00b909bf5 ] Update the features after calling register_netdev() otherwise the device features are not set up correctly and it not possible to change the MTU of the device. After this change, the features reported by ethtool match the device's features before the commit which introduced the issue and it is possible to change the device's MTU. Fixes: f599c64fdf7d ("xen-netfront: Fix race between device setup and open") Reported-by: Liam Shepherd Signed-off-by: Ross Lagerwall Reviewed-by: Juergen Gross Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/xen-netfront.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 1a40fc3517a8f..ca239912c0e6f 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1964,10 +1964,6 @@ static int xennet_connect(struct net_device *dev) /* talk_to_netback() sets the correct number of queues */ num_queues = dev->real_num_tx_queues; - rtnl_lock(); - netdev_update_features(dev); - rtnl_unlock(); - if (dev->reg_state == NETREG_UNINITIALIZED) { err = register_netdev(dev); if (err) { @@ -1977,6 +1973,10 @@ static int xennet_connect(struct net_device *dev) } } + rtnl_lock(); + netdev_update_features(dev); + rtnl_unlock(); + /* * All public and private state should now be sane. Get * ready to start sending and receiving packets and give the driver -- GitLab From d29f27192a2d88d9255265b258abaeef1ee1726f Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Thu, 21 Jun 2018 14:00:20 +0100 Subject: [PATCH 1157/2269] xen-netfront: Fix mismatched rtnl_unlock [ Upstream commit cb257783c2927b73614b20f915a91ff78aa6f3e8 ] Fixes: f599c64fdf7d ("xen-netfront: Fix race between device setup and open") Reported-by: Ben Hutchings Signed-off-by: Ross Lagerwall Reviewed-by: Juergen Gross Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/xen-netfront.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index ca239912c0e6f..6ea95b316256c 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1824,7 +1824,7 @@ static int talk_to_netback(struct xenbus_device *dev, err = xen_net_read_mac(dev, info->netdev->dev_addr); if (err) { xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename); - goto out; + goto out_unlocked; } rtnl_lock(); @@ -1939,6 +1939,7 @@ abort_transaction_no_dev_fatal: xennet_destroy_queues(info); out: rtnl_unlock(); +out_unlocked: device_unregister(&dev->dev); return err; } -- GitLab From e6df57b60b86698cd2e3cc00cb773e06e96df604 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 13 Jun 2018 11:19:42 -0600 Subject: [PATCH 1158/2269] IB/usnic: Update with bug fixes from core code [ Upstream commit 43cbd64b1fdc1da89abdad88a022d9e87a98e9c6 ] usnic has a modified version of the core codes' ib_umem_get() and related, and the copy misses many of the bug fixes done over the years: Commit bc3e53f682d9 ("mm: distinguish between mlocked and pinned pages") Commit 87773dd56d54 ("IB: ib_umem_release() should decrement mm->pinned_vm from ib_umem_get") Commit 8494057ab5e4 ("IB/uverbs: Prevent integer overflow in ib_umem_get address arithmetic") Commit 8abaae62f3fd ("IB/core: disallow registering 0-sized memory region") Commit 66578b0b2f69 ("IB/core: don't disallow registering region starting at 0x0") Commit 53376fedb9da ("RDMA/core: not to set page dirty bit if it's already set.") Commit 8e907ed48827 ("IB/umem: Use the correct mm during ib_umem_release") Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 2 +- drivers/infiniband/hw/usnic/usnic_uiom.c | 40 ++++++++++++++------ drivers/infiniband/hw/usnic/usnic_uiom.h | 5 ++- 3 files changed, 33 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index e4113ef093159..3c3453d213dcf 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -642,7 +642,7 @@ int usnic_ib_dereg_mr(struct ib_mr *ibmr) usnic_dbg("va 0x%lx length 0x%zx\n", mr->umem->va, mr->umem->length); - usnic_uiom_reg_release(mr->umem, ibmr->pd->uobject->context->closing); + usnic_uiom_reg_release(mr->umem, ibmr->uobject->context); kfree(mr); return 0; } diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 4381c0a9a8738..9dd39daa602bd 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "usnic_log.h" #include "usnic_uiom.h" @@ -88,7 +89,7 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty) for_each_sg(chunk->page_list, sg, chunk->nents, i) { page = sg_page(sg); pa = sg_phys(sg); - if (dirty) + if (!PageDirty(page) && dirty) set_page_dirty_lock(page); put_page(page); usnic_dbg("pa: %pa\n", &pa); @@ -114,6 +115,16 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, dma_addr_t pa; unsigned int gup_flags; + /* + * If the combination of the addr and size requested for this memory + * region causes an integer overflow, return error. + */ + if (((addr + size) < addr) || PAGE_ALIGN(addr + size) < (addr + size)) + return -EINVAL; + + if (!size) + return -EINVAL; + if (!can_do_mlock()) return -EPERM; @@ -127,7 +138,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, down_write(¤t->mm->mmap_sem); - locked = npages + current->mm->locked_vm; + locked = npages + current->mm->pinned_vm; lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { @@ -143,7 +154,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, ret = 0; while (npages) { - ret = get_user_pages(cur_base, + ret = get_user_pages_longterm(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof(struct page *)), gup_flags, page_list, NULL); @@ -186,7 +197,7 @@ out: if (ret < 0) usnic_uiom_put_pages(chunk_list, 0); else - current->mm->locked_vm = locked; + current->mm->pinned_vm = locked; up_write(¤t->mm->mmap_sem); free_page((unsigned long) page_list); @@ -420,18 +431,22 @@ out_free_uiomr: return ERR_PTR(err); } -void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing) +void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, + struct ib_ucontext *ucontext) { + struct task_struct *task; struct mm_struct *mm; unsigned long diff; __usnic_uiom_reg_release(uiomr->pd, uiomr, 1); - mm = get_task_mm(current); - if (!mm) { - kfree(uiomr); - return; - } + task = get_pid_task(ucontext->tgid, PIDTYPE_PID); + if (!task) + goto out; + mm = get_task_mm(task); + put_task_struct(task); + if (!mm) + goto out; diff = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT; @@ -443,7 +458,7 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing) * up here and not be able to take the mmap_sem. In that case * we defer the vm_locked accounting to the system workqueue. */ - if (closing) { + if (ucontext->closing) { if (!down_write_trylock(&mm->mmap_sem)) { INIT_WORK(&uiomr->work, usnic_uiom_reg_account); uiomr->mm = mm; @@ -455,9 +470,10 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing) } else down_write(&mm->mmap_sem); - current->mm->locked_vm -= diff; + mm->pinned_vm -= diff; up_write(&mm->mmap_sem); mmput(mm); +out: kfree(uiomr); } diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h index 431efe4143f40..8c096acff1235 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.h +++ b/drivers/infiniband/hw/usnic/usnic_uiom.h @@ -39,6 +39,8 @@ #include "usnic_uiom_interval_tree.h" +struct ib_ucontext; + #define USNIC_UIOM_READ (1) #define USNIC_UIOM_WRITE (2) @@ -89,7 +91,8 @@ void usnic_uiom_free_dev_list(struct device **devs); struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd, unsigned long addr, size_t size, int access, int dmasync); -void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing); +void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, + struct ib_ucontext *ucontext); int usnic_uiom_init(char *drv_name); void usnic_uiom_fini(void); #endif /* USNIC_UIOM_H_ */ -- GitLab From f274c57f0479e860f40bad85b046967e3af04f40 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Thu, 1 Mar 2018 10:36:25 +0000 Subject: [PATCH 1159/2269] mmc: dw_mmc-rockchip: correct property names in debug [ Upstream commit e988867fd774d00aeaf5d3c332032bf5b97a4147 ] Following up the device tree fixed in commits e78c637127ee ("ARM: dts: rockchip: Fix DWMMC clocks") and ca9eee95a2de ("arm64: dts: rockchip: Fix DWMMC clocks", 2018-02-15), avoid confusion by using the correct property name in the debug output if clocks are not found. Signed-off-by: John Keeping Reviewed-by: Robin Murphy Reviewed-by: Shawn Lin Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/dw_mmc-rockchip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/dw_mmc-rockchip.c b/drivers/mmc/host/dw_mmc-rockchip.c index 3392952129355..40d7de2eea121 100644 --- a/drivers/mmc/host/dw_mmc-rockchip.c +++ b/drivers/mmc/host/dw_mmc-rockchip.c @@ -282,11 +282,11 @@ static int dw_mci_rk3288_parse_dt(struct dw_mci *host) priv->drv_clk = devm_clk_get(host->dev, "ciu-drive"); if (IS_ERR(priv->drv_clk)) - dev_dbg(host->dev, "ciu_drv not available\n"); + dev_dbg(host->dev, "ciu-drive not available\n"); priv->sample_clk = devm_clk_get(host->dev, "ciu-sample"); if (IS_ERR(priv->sample_clk)) - dev_dbg(host->dev, "ciu_sample not available\n"); + dev_dbg(host->dev, "ciu-sample not available\n"); host->priv = priv; -- GitLab From f18ed65d70f019aae09437e5b6a317d365571208 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 20 Aug 2018 15:36:18 -0700 Subject: [PATCH 1160/2269] MIPS: Workaround GCC __builtin_unreachable reordering bug [ Upstream commit 906d441febc0de974b2a6ef848a8f058f3bfada3 ] Some versions of GCC for the MIPS architecture suffer from a bug which can lead to instructions from beyond an unreachable statement being incorrectly reordered into earlier branch delay slots if the unreachable statement is the only content of a case in a switch statement. This can lead to seemingly random behaviour, such as invalid memory accesses from incorrectly reordered loads or stores, and link failures on microMIPS builds. See this potential GCC fix for details: https://gcc.gnu.org/ml/gcc-patches/2015-09/msg00360.html Runtime problems resulting from this bug were initially observed using a maltasmvp_defconfig v4.4 kernel built using GCC 4.9.2 (from a Codescape SDK 2015.06-05 toolchain), with the result being an address exception taken after log messages about the L1 caches (during probe of the L2 cache): Initmem setup node 0 [mem 0x0000000080000000-0x000000009fffffff] VPE topology {2,2} total 4 Primary instruction cache 64kB, VIPT, 4-way, linesize 32 bytes. Primary data cache 64kB, 4-way, PIPT, no aliases, linesize 32 bytes This is early enough that the kernel exception vectors are not in use, so any further output depends upon the bootloader. This is reproducible in QEMU where no further output occurs - ie. the system hangs here. Given the nature of the bug it may potentially be hit with differing symptoms. The bug is known to affect GCC versions as recent as 7.3, and it is unclear whether GCC 8 fixed it or just happens not to encounter the bug in the testcase found at the link above due to differing optimizations. This bug can be worked around by placing a volatile asm statement, which GCC is prevented from reordering past, prior to the __builtin_unreachable call. That was actually done already for other reasons by commit 173a3efd3edb ("bug.h: work around GCC PR82365 in BUG()"), but creates problems for microMIPS builds due to the lack of a .insn directive. The microMIPS ISA allows for interlinking with regular MIPS32 code by repurposing bit 0 of the program counter as an ISA mode bit. To switch modes one changes the value of this bit in the PC. However typical branch instructions encode their offsets as multiples of 2-byte instruction halfwords, which means they cannot change ISA mode - this must be done using either an indirect branch (a jump-register in MIPS terminology) or a dedicated jalx instruction. In order to ensure that regular branches don't attempt to target code in a different ISA which they can't actually switch to, the linker will check that branch targets are code in the same ISA as the branch. Unfortunately our empty asm volatile statements don't qualify as code, and the link for microMIPS builds fails with errors such as: arch/mips/mm/dma-default.s:3265: Error: branch to a symbol in another ISA mode arch/mips/mm/dma-default.s:5027: Error: branch to a symbol in another ISA mode Resolve this by adding a .insn directive within the asm statement which declares that what comes next is code. This may or may not be true, since we don't really know what comes next, but as this code is in an unreachable path anyway that doesn't matter since we won't execute it. We do this in asm/compiler.h & select CONFIG_HAVE_ARCH_COMPILER_H in order to have this included by linux/compiler_types.h after linux/compiler-gcc.h. This will result in asm/compiler.h being included in all C compilations via the -include linux/compiler_types.h argument in c_flags, which should be harmless. Signed-off-by: Paul Burton Fixes: 173a3efd3edb ("bug.h: work around GCC PR82365 in BUG()") Patchwork: https://patchwork.linux-mips.org/patch/20270/ Cc: James Hogan Cc: Ralf Baechle Cc: Arnd Bergmann Cc: linux-mips@linux-mips.org Signed-off-by: Sasha Levin --- arch/mips/Kconfig | 1 + arch/mips/include/asm/compiler.h | 35 ++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index c82457b0e7333..23e3d3e0ee5b5 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -29,6 +29,7 @@ config MIPS select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL select HANDLE_DOMAIN_IRQ + select HAVE_ARCH_COMPILER_H select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS if MMU diff --git a/arch/mips/include/asm/compiler.h b/arch/mips/include/asm/compiler.h index e081a265f4227..cc2eb1b06050c 100644 --- a/arch/mips/include/asm/compiler.h +++ b/arch/mips/include/asm/compiler.h @@ -8,6 +8,41 @@ #ifndef _ASM_COMPILER_H #define _ASM_COMPILER_H +/* + * With GCC 4.5 onwards we can use __builtin_unreachable to indicate to the + * compiler that a particular code path will never be hit. This allows it to be + * optimised out of the generated binary. + * + * Unfortunately at least GCC 4.6.3 through 7.3.0 inclusive suffer from a bug + * that can lead to instructions from beyond an unreachable statement being + * incorrectly reordered into earlier delay slots if the unreachable statement + * is the only content of a case in a switch statement. This can lead to + * seemingly random behaviour, such as invalid memory accesses from incorrectly + * reordered loads or stores. See this potential GCC fix for details: + * + * https://gcc.gnu.org/ml/gcc-patches/2015-09/msg00360.html + * + * It is unclear whether GCC 8 onwards suffer from the same issue - nothing + * relevant is mentioned in GCC 8 release notes and nothing obviously relevant + * stands out in GCC commit logs, but these newer GCC versions generate very + * different code for the testcase which doesn't exhibit the bug. + * + * GCC also handles stack allocation suboptimally when calling noreturn + * functions or calling __builtin_unreachable(): + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365 + * + * We work around both of these issues by placing a volatile asm statement, + * which GCC is prevented from reordering past, prior to __builtin_unreachable + * calls. + * + * The .insn statement is required to ensure that any branches to the + * statement, which sadly must be kept due to the asm statement, are known to + * be branches to code and satisfy linker requirements for microMIPS kernels. + */ +#undef barrier_before_unreachable +#define barrier_before_unreachable() asm volatile(".insn") + #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) #define GCC_IMM_ASM() "n" #define GCC_REG_ACCUM "$0" -- GitLab From b9627a218e2b2a42b812499ccb5d92707482d5d0 Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Tue, 10 Apr 2018 13:18:25 +0100 Subject: [PATCH 1161/2269] lan78xx: Don't reset the interface on open [ Upstream commit 47b998653fea4ef69e3e89574956386f262bccca ] Commit 92571a1aae40 ("lan78xx: Connect phy early") moves the PHY initialisation into lan78xx_probe, but lan78xx_open subsequently calls lan78xx_reset. As well as forcing a second round of link negotiation, this reset frequently prevents the phy interrupt from being generated (even though the link is up), rendering the interface unusable. Fix this issue by removing the lan78xx_reset call from lan78xx_open. Fixes: 92571a1aae40 ("lan78xx: Connect phy early") Signed-off-by: Phil Elwell Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/lan78xx.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 00ddcaf09014e..50e2e10a9050d 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2508,10 +2508,6 @@ static int lan78xx_open(struct net_device *net) if (ret < 0) goto out; - ret = lan78xx_reset(dev); - if (ret < 0) - goto done; - phy_start(net->phydev); netif_dbg(dev, ifup, dev->net, "phy initialised successfully"); -- GitLab From 1bee5f3d9530c5e4153c017190994b9a6b3144b7 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan Date: Mon, 18 Jun 2018 10:01:05 -0700 Subject: [PATCH 1162/2269] enic: do not overwrite error code [ Upstream commit 56f772279a762984f6e9ebbf24a7c829faba5712 ] In failure path, we overwrite err to what vnic_rq_disable() returns. In case it returns 0, enic_open() returns success in case of error. Reported-by: Ben Hutchings Fixes: e8588e268509 ("enic: enable rq before updating rq descriptors") Signed-off-by: Govindarajulu Varadarajan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/cisco/enic/enic_main.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 2bfaf3e118b1e..03f4fee1bbc99 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1879,7 +1879,7 @@ static int enic_open(struct net_device *netdev) { struct enic *enic = netdev_priv(netdev); unsigned int i; - int err; + int err, ret; err = enic_request_intr(enic); if (err) { @@ -1936,10 +1936,9 @@ static int enic_open(struct net_device *netdev) err_out_free_rq: for (i = 0; i < enic->rq_count; i++) { - err = vnic_rq_disable(&enic->rq[i]); - if (err) - return err; - vnic_rq_clean(&enic->rq[i], enic_free_rq_buf); + ret = vnic_rq_disable(&enic->rq[i]); + if (!ret) + vnic_rq_clean(&enic->rq[i], enic_free_rq_buf); } enic_dev_notify_unset(enic); err_out_free_intr: -- GitLab From 672fdbd5966e8182a0afbc0b178f03bb133cad5a Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Tue, 5 Jun 2018 14:15:10 +0800 Subject: [PATCH 1163/2269] iio: buffer: fix the function signature to match implementation [ Upstream commit 92397a6c38d139d50fabbe9e2dc09b61d53b2377 ] linux/iio/buffer-dma.h was not updated to when length was changed to unsigned int. Fixes: c043ec1ca5ba ("iio:buffer: make length types match kfifo types") Signed-off-by: Phil Reid Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- include/linux/iio/buffer-dma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/iio/buffer-dma.h b/include/linux/iio/buffer-dma.h index 767467d886de4..67c75372b6915 100644 --- a/include/linux/iio/buffer-dma.h +++ b/include/linux/iio/buffer-dma.h @@ -141,7 +141,7 @@ int iio_dma_buffer_read(struct iio_buffer *buffer, size_t n, char __user *user_buffer); size_t iio_dma_buffer_data_available(struct iio_buffer *buffer); int iio_dma_buffer_set_bytes_per_datum(struct iio_buffer *buffer, size_t bpd); -int iio_dma_buffer_set_length(struct iio_buffer *buffer, int length); +int iio_dma_buffer_set_length(struct iio_buffer *buffer, unsigned int length); int iio_dma_buffer_request_update(struct iio_buffer *buffer); int iio_dma_buffer_init(struct iio_dma_buffer_queue *queue, -- GitLab From 07e2121f4f2b8f6534a3dc42d67fd32403c6b65e Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 22 May 2018 16:14:27 +1000 Subject: [PATCH 1164/2269] selftests/powerpc: Add ptrace hw breakpoint test [ Upstream commit 9c2ddfe55c42bf4b9bc336a0650ab78f9222a159 ] This test the ptrace hw breakpoints via PTRACE_SET_DEBUGREG and PPC_PTRACE_SETHWDEBUG. This test was use to find the bugs fixed by these recent commits: 4f7c06e26e powerpc/ptrace: Fix setting 512B aligned breakpoints with PTRACE_SET_DEBUGREG cd6ef7eebf powerpc/ptrace: Fix enforcement of DAWR constraints Signed-off-by: Michael Neuling [mpe: Add SPDX tag, clang format it] Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- .../selftests/powerpc/ptrace/.gitignore | 1 + .../testing/selftests/powerpc/ptrace/Makefile | 2 +- .../selftests/powerpc/ptrace/ptrace-hwbreak.c | 342 ++++++++++++++++++ 3 files changed, 344 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c diff --git a/tools/testing/selftests/powerpc/ptrace/.gitignore b/tools/testing/selftests/powerpc/ptrace/.gitignore index 349acfafc95b6..9dcc16ea8179c 100644 --- a/tools/testing/selftests/powerpc/ptrace/.gitignore +++ b/tools/testing/selftests/powerpc/ptrace/.gitignore @@ -8,3 +8,4 @@ ptrace-vsx ptrace-tm-vsx ptrace-tm-spd-vsx ptrace-tm-spr +ptrace-hwbreak diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index 4803052665043..0e2f4601d1a8e 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \ ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \ - ptrace-tm-spd-vsx ptrace-tm-spr + ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c new file mode 100644 index 0000000000000..3066d310f32b6 --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c @@ -0,0 +1,342 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Ptrace test for hw breakpoints + * + * Based on tools/testing/selftests/breakpoints/breakpoint_test.c + * + * This test forks and the parent then traces the child doing various + * types of ptrace enabled breakpoints + * + * Copyright (C) 2018 Michael Neuling, IBM Corporation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ptrace.h" + +/* Breakpoint access modes */ +enum { + BP_X = 1, + BP_RW = 2, + BP_W = 4, +}; + +static pid_t child_pid; +static struct ppc_debug_info dbginfo; + +static void get_dbginfo(void) +{ + int ret; + + ret = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo); + if (ret) { + perror("Can't get breakpoint info\n"); + exit(-1); + } +} + +static bool hwbreak_present(void) +{ + return (dbginfo.num_data_bps != 0); +} + +static bool dawr_present(void) +{ + return !!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_DAWR); +} + +static void set_breakpoint_addr(void *addr) +{ + int ret; + + ret = ptrace(PTRACE_SET_DEBUGREG, child_pid, 0, addr); + if (ret) { + perror("Can't set breakpoint addr\n"); + exit(-1); + } +} + +static int set_hwbreakpoint_addr(void *addr, int range) +{ + int ret; + + struct ppc_hw_breakpoint info; + + info.version = 1; + info.trigger_type = PPC_BREAKPOINT_TRIGGER_RW; + info.addr_mode = PPC_BREAKPOINT_MODE_EXACT; + if (range > 0) + info.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE; + info.condition_mode = PPC_BREAKPOINT_CONDITION_NONE; + info.addr = (__u64)addr; + info.addr2 = (__u64)addr + range; + info.condition_value = 0; + + ret = ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info); + if (ret < 0) { + perror("Can't set breakpoint\n"); + exit(-1); + } + return ret; +} + +static int del_hwbreakpoint_addr(int watchpoint_handle) +{ + int ret; + + ret = ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, watchpoint_handle); + if (ret < 0) { + perror("Can't delete hw breakpoint\n"); + exit(-1); + } + return ret; +} + +#define DAWR_LENGTH_MAX 512 + +/* Dummy variables to test read/write accesses */ +static unsigned long long + dummy_array[DAWR_LENGTH_MAX / sizeof(unsigned long long)] + __attribute__((aligned(512))); +static unsigned long long *dummy_var = dummy_array; + +static void write_var(int len) +{ + long long *plval; + char *pcval; + short *psval; + int *pival; + + switch (len) { + case 1: + pcval = (char *)dummy_var; + *pcval = 0xff; + break; + case 2: + psval = (short *)dummy_var; + *psval = 0xffff; + break; + case 4: + pival = (int *)dummy_var; + *pival = 0xffffffff; + break; + case 8: + plval = (long long *)dummy_var; + *plval = 0xffffffffffffffffLL; + break; + } +} + +static void read_var(int len) +{ + char cval __attribute__((unused)); + short sval __attribute__((unused)); + int ival __attribute__((unused)); + long long lval __attribute__((unused)); + + switch (len) { + case 1: + cval = *(char *)dummy_var; + break; + case 2: + sval = *(short *)dummy_var; + break; + case 4: + ival = *(int *)dummy_var; + break; + case 8: + lval = *(long long *)dummy_var; + break; + } +} + +/* + * Do the r/w accesses to trigger the breakpoints. And run + * the usual traps. + */ +static void trigger_tests(void) +{ + int len, ret; + + ret = ptrace(PTRACE_TRACEME, 0, NULL, 0); + if (ret) { + perror("Can't be traced?\n"); + return; + } + + /* Wake up father so that it sets up the first test */ + kill(getpid(), SIGUSR1); + + /* Test write watchpoints */ + for (len = 1; len <= sizeof(long); len <<= 1) + write_var(len); + + /* Test read/write watchpoints (on read accesses) */ + for (len = 1; len <= sizeof(long); len <<= 1) + read_var(len); + + /* Test when breakpoint is unset */ + + /* Test write watchpoints */ + for (len = 1; len <= sizeof(long); len <<= 1) + write_var(len); + + /* Test read/write watchpoints (on read accesses) */ + for (len = 1; len <= sizeof(long); len <<= 1) + read_var(len); +} + +static void check_success(const char *msg) +{ + const char *msg2; + int status; + + /* Wait for the child to SIGTRAP */ + wait(&status); + + msg2 = "Failed"; + + if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) { + msg2 = "Child process hit the breakpoint"; + } + + printf("%s Result: [%s]\n", msg, msg2); +} + +static void launch_watchpoints(char *buf, int mode, int len, + struct ppc_debug_info *dbginfo, bool dawr) +{ + const char *mode_str; + unsigned long data = (unsigned long)(dummy_var); + int wh, range; + + data &= ~0x7UL; + + if (mode == BP_W) { + data |= (1UL << 1); + mode_str = "write"; + } else { + data |= (1UL << 0); + data |= (1UL << 1); + mode_str = "read"; + } + + /* Set DABR_TRANSLATION bit */ + data |= (1UL << 2); + + /* use PTRACE_SET_DEBUGREG breakpoints */ + set_breakpoint_addr((void *)data); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len); + check_success(buf); + /* Unregister hw brkpoint */ + set_breakpoint_addr(NULL); + + data = (data & ~7); /* remove dabr control bits */ + + /* use PPC_PTRACE_SETHWDEBUG breakpoint */ + if (!(dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE)) + return; /* not supported */ + wh = set_hwbreakpoint_addr((void *)data, 0); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len); + check_success(buf); + /* Unregister hw brkpoint */ + del_hwbreakpoint_addr(wh); + + /* try a wider range */ + range = 8; + if (dawr) + range = 512 - ((int)data & (DAWR_LENGTH_MAX - 1)); + wh = set_hwbreakpoint_addr((void *)data, range); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len); + check_success(buf); + /* Unregister hw brkpoint */ + del_hwbreakpoint_addr(wh); +} + +/* Set the breakpoints and check the child successfully trigger them */ +static int launch_tests(bool dawr) +{ + char buf[1024]; + int len, i, status; + + struct ppc_debug_info dbginfo; + + i = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo); + if (i) { + perror("Can't set breakpoint info\n"); + exit(-1); + } + if (!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_RANGE)) + printf("WARNING: Kernel doesn't support PPC_PTRACE_SETHWDEBUG\n"); + + /* Write watchpoint */ + for (len = 1; len <= sizeof(long); len <<= 1) + launch_watchpoints(buf, BP_W, len, &dbginfo, dawr); + + /* Read-Write watchpoint */ + for (len = 1; len <= sizeof(long); len <<= 1) + launch_watchpoints(buf, BP_RW, len, &dbginfo, dawr); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + + /* + * Now we have unregistered the breakpoint, access by child + * should not cause SIGTRAP. + */ + + wait(&status); + + if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) { + printf("FAIL: Child process hit the breakpoint, which is not expected\n"); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + return TEST_FAIL; + } + + if (WIFEXITED(status)) + printf("Child exited normally\n"); + + return TEST_PASS; +} + +static int ptrace_hwbreak(void) +{ + pid_t pid; + int ret; + bool dawr; + + pid = fork(); + if (!pid) { + trigger_tests(); + return 0; + } + + wait(NULL); + + child_pid = pid; + + get_dbginfo(); + SKIP_IF(!hwbreak_present()); + dawr = dawr_present(); + + ret = launch_tests(dawr); + + wait(NULL); + + return ret; +} + +int main(int argc, char **argv, char **envp) +{ + return test_harness(ptrace_hwbreak, "ptrace-hwbreak"); +} -- GitLab From 5aa0d86363b2f89789619907dd6f886989eb1c4a Mon Sep 17 00:00:00 2001 From: Brian King Date: Wed, 14 Mar 2018 17:13:39 -0500 Subject: [PATCH 1165/2269] scsi: ibmvfc: Avoid unnecessary port relogin [ Upstream commit 09dd15e0d9547ca424de4043bcd429bab6f285c8 ] Following an RSCN, ibmvfc will issue an ADISC to determine if the underlying target has changed, comparing the SCSI ID, WWPN, and WWNN to determine how to handle the rport in discovery. However, the comparison of the WWPN and WWNN was performing a memcmp between a big endian field against a CPU endian field, which resulted in the wrong answer on LE systems. This was observed as unexpected errors getting logged at boot time as targets were getting relogins when not needed. Signed-off-by: Brian King Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/ibmvscsi/ibmvfc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index b491af31a5f8e..a06b24a61622c 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -3580,11 +3580,9 @@ static void ibmvfc_tgt_implicit_logout(struct ibmvfc_target *tgt) static int ibmvfc_adisc_needs_plogi(struct ibmvfc_passthru_mad *mad, struct ibmvfc_target *tgt) { - if (memcmp(&mad->fc_iu.response[2], &tgt->ids.port_name, - sizeof(tgt->ids.port_name))) + if (wwn_to_u64((u8 *)&mad->fc_iu.response[2]) != tgt->ids.port_name) return 1; - if (memcmp(&mad->fc_iu.response[4], &tgt->ids.node_name, - sizeof(tgt->ids.node_name))) + if (wwn_to_u64((u8 *)&mad->fc_iu.response[4]) != tgt->ids.node_name) return 1; if (be32_to_cpu(mad->fc_iu.response[6]) != tgt->scsi_id) return 1; -- GitLab From 3c92c7e96fc447f62d3a7b8f6d1e932388b0b8df Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 14 Mar 2018 12:15:56 -0400 Subject: [PATCH 1166/2269] scsi: sd: Remember that READ CAPACITY(16) succeeded [ Upstream commit 597d74005ba85e87c256cd732128ebf7faf54247 ] The USB storage glue sets the try_rc_10_first flag in an attempt to avoid wedging poorly implemented legacy USB devices. If the device capacity is too large to be expressed in the provided response buffer field of READ CAPACITY(10), a well-behaved device will set the reported capacity to 0xFFFFFFFF. We will then attempt to issue a READ CAPACITY(16) to obtain the real capacity. Since this part of the discovery logic is not covered by the first_scan flag, a warning will be printed a couple of times times per revalidate attempt if we upgrade from READ CAPACITY(10) to READ CAPACITY(16). Remember that we have successfully issued READ CAPACITY(16) so we can take the fast path on subsequent revalidate attempts. Reported-by: Menion Reviewed-by: Laurence Oberman Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/sd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 6d3091ff9b928..c7b2845873651 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2498,6 +2498,8 @@ sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer) sector_size = old_sector_size; goto got_data; } + /* Remember that READ CAPACITY(16) succeeded */ + sdp->try_rc_10_first = 0; } } -- GitLab From f72388e3670e870a32854058eff63d0021036457 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 27 Jun 2018 18:19:55 +0800 Subject: [PATCH 1167/2269] btrfs: quota: Set rescan progress to (u64)-1 if we hit last leaf [ Upstream commit 6f7de19ed3d4d3526ca5eca428009f97cf969c2f ] Commit ff3d27a048d9 ("btrfs: qgroup: Finish rescan when hit the last leaf of extent tree") added a new exit for rescan finish. However after finishing quota rescan, we set fs_info->qgroup_rescan_progress to (u64)-1 before we exit through the original exit path. While we missed that assignment of (u64)-1 in the new exit path. The end result is, the quota status item doesn't have the same value. (-1 vs the last bytenr + 1) Although it doesn't affect quota accounting, it's still better to keep the original behavior. Reported-by: Misono Tomohiro Fixes: ff3d27a048d9 ("btrfs: qgroup: Finish rescan when hit the last leaf of extent tree") Signed-off-by: Qu Wenruo Reviewed-by: Misono Tomohiro Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/qgroup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 473ad5985aa37..47dec283628dd 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2603,8 +2603,10 @@ out: } btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); - if (done && !ret) + if (done && !ret) { ret = 1; + fs_info->qgroup_rescan_progress.objectid = (u64)-1; + } return ret; } -- GitLab From aa3aff5b469b7520932fa8816efb78a0eaf012d4 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 20 May 2018 20:49:47 -0700 Subject: [PATCH 1168/2269] net: phy: phylink: Don't release NULL GPIO [ Upstream commit 3bcd47726c3b744fd08781795cca905cc59a1382 ] If CONFIG_GPIOLIB is disabled, gpiod_put() becomes a stub that produces a warning, this helped identify that we could be attempting to release a NULL pl->link_gpio GPIO descriptor, so guard against that. Fixes: daab3349ad1a ("net: phy: phylink: Release link GPIO") Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/phylink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index e4a6ed88b9cf0..79f28b9186c69 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -561,7 +561,7 @@ void phylink_destroy(struct phylink *pl) { if (pl->sfp_bus) sfp_unregister_upstream(pl->sfp_bus); - if (!IS_ERR(pl->link_gpio)) + if (!IS_ERR_OR_NULL(pl->link_gpio)) gpiod_put(pl->link_gpio); cancel_work_sync(&pl->resolve); -- GitLab From 726f05951941cc88e91b25b98921182cfc8d45f7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 19 Sep 2018 13:35:53 +0300 Subject: [PATCH 1169/2269] x86/paravirt: Fix some warning messages [ Upstream commit 571d0563c8881595f4ab027aef9ed1c55e3e7b7c ] The first argument to WARN_ONCE() is a condition. Fixes: 5800dc5c19f3 ("x86/paravirt: Fix spectre-v2 mitigations for paravirt guests") Signed-off-by: Dan Carpenter Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross Cc: Peter Zijlstra Cc: Alok Kataria Cc: "H. Peter Anvin" Cc: virtualization@lists.linux-foundation.org Cc: kernel-janitors@vger.kernel.org Link: https://lkml.kernel.org/r/20180919103553.GD9238@mwanda Signed-off-by: Sasha Levin --- arch/x86/kernel/paravirt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index f3559b84cd753..04da826381c9c 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -90,7 +90,7 @@ unsigned paravirt_patch_call(void *insnbuf, if (len < 5) { #ifdef CONFIG_RETPOLINE - WARN_ONCE("Failing to patch indirect CALL in %ps\n", (void *)addr); + WARN_ONCE(1, "Failing to patch indirect CALL in %ps\n", (void *)addr); #endif return len; /* call too long for patch site */ } @@ -110,7 +110,7 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target, if (len < 5) { #ifdef CONFIG_RETPOLINE - WARN_ONCE("Failing to patch indirect JMP in %ps\n", (void *)addr); + WARN_ONCE(1, "Failing to patch indirect JMP in %ps\n", (void *)addr); #endif return len; /* call too long for patch site */ } -- GitLab From cbc38304947ea2e57440bb38b733613e3f003605 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 13 Aug 2018 23:50:41 +0200 Subject: [PATCH 1170/2269] net: stmmac: mark PM functions as __maybe_unused [ Upstream commit 81a8b0799632627b587af31ecd06112397e4ec36 ] The newly added suspend/resume functions cause a build warning when CONFIG_PM is disabled: drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c:324:12: error: 'stmmac_pci_resume' defined but not used [-Werror=unused-function] drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c:306:12: error: 'stmmac_pci_suspend' defined but not used [-Werror=unused-function] Mark them as __maybe_unused so gcc can drop them silently. Fixes: b7d0f08e9129 ("net: stmmac: Fix WoL for PCI-based setups") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 6a393b16a1fca..c54a50dbd5ac2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -303,7 +303,7 @@ static void stmmac_pci_remove(struct pci_dev *pdev) pci_disable_device(pdev); } -static int stmmac_pci_suspend(struct device *dev) +static int __maybe_unused stmmac_pci_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); int ret; @@ -321,7 +321,7 @@ static int stmmac_pci_suspend(struct device *dev) return 0; } -static int stmmac_pci_resume(struct device *dev) +static int __maybe_unused stmmac_pci_resume(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); int ret; -- GitLab From 48f1b3b56ee4f936c105b550ca4357722d6460da Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 9 Aug 2018 15:47:06 +0900 Subject: [PATCH 1171/2269] kconfig: fix the rule of mainmenu_stmt symbol [ Upstream commit 56869d45e364244a721de34ce9c5dc9ed022779e ] The rule of mainmenu_stmt does not have debug print of zconf_lineno(), but if it had, it would print a wrong line number for the same reason as commit b2d00d7c61c8 ("kconfig: fix line numbers for if-entries in menu tree"). The mainmenu_stmt does not need to eat following empty lines because they are reduced to common_stmt. Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/kconfig/zconf.y | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/zconf.y b/scripts/kconfig/zconf.y index 126e3f2e1ed74..2b0adeb5fc42e 100644 --- a/scripts/kconfig/zconf.y +++ b/scripts/kconfig/zconf.y @@ -31,7 +31,7 @@ struct symbol *symbol_hash[SYMBOL_HASHSIZE]; static struct menu *current_menu, *current_entry; %} -%expect 31 +%expect 30 %union { @@ -112,7 +112,7 @@ start: mainmenu_stmt stmt_list | no_mainmenu_stmt stmt_list; /* mainmenu entry */ -mainmenu_stmt: T_MAINMENU prompt nl +mainmenu_stmt: T_MAINMENU prompt T_EOL { menu_add_prompt(P_MENU, $2, NULL); }; -- GitLab From 543f1084b67fb8ad62fc7398620e0ecd839594ea Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Mon, 8 Oct 2018 22:03:57 +0200 Subject: [PATCH 1172/2269] libertas: call into generic suspend code before turning off power [ Upstream commit 4f666675cdff0b986195413215eb062b7da6586f ] When powering down a SDIO connected card during suspend, make sure to call into the generic lbs_suspend() function before pulling the plug. This will make sure the card is successfully deregistered from the system to avoid communication to the card starving out. Fixes: 7444a8092906 ("libertas: fix suspend and resume for SDIO connected cards") Signed-off-by: Daniel Mack Reviewed-by: Ulf Hansson Acked-by: Kalle Valo Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/net/wireless/marvell/libertas/if_sdio.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/marvell/libertas/if_sdio.c b/drivers/net/wireless/marvell/libertas/if_sdio.c index 43743c26c071f..39bf85d0ade0e 100644 --- a/drivers/net/wireless/marvell/libertas/if_sdio.c +++ b/drivers/net/wireless/marvell/libertas/if_sdio.c @@ -1317,6 +1317,10 @@ static int if_sdio_suspend(struct device *dev) if (priv->wol_criteria == EHS_REMOVE_WAKEUP) { dev_info(dev, "Suspend without wake params -- powering down card\n"); if (priv->fw_ready) { + ret = lbs_suspend(priv); + if (ret) + return ret; + priv->power_up_on_resume = true; if_sdio_power_off(card); } -- GitLab From b8f4d375cd1ebc1bb5fbb174a0237eff24dcbd68 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 26 Jul 2018 22:47:33 +0530 Subject: [PATCH 1173/2269] perf tests: Fix indexing when invoking subtests [ Upstream commit aa90f9f9554616d5738f7bedb4a8f0e5e14d1bc6 ] Recently, the subtest numbering was changed to start from 1. While it is fine for displaying results, this should not be the case when the subtests are actually invoked. Typically, the subtests are stored in zero-indexed arrays and invoked based on the index passed to the main test function. Since the index now starts from 1, the second subtest in the array (index 1) gets invoked instead of the first (index 0). This applies to all of the following subtests but for the last one, the subtest always fails because it does not meet the boundary condition of the subtest index being lesser than the number of subtests. This can be observed on powerpc64 and x86_64 systems running Fedora 28 as shown below. Before: # perf test "builtin clang support" 55: builtin clang support : 55.1: builtin clang compile C source to IR : Ok 55.2: builtin clang compile C source to ELF object : FAILED! # perf test "LLVM search and compile" 38: LLVM search and compile : 38.1: Basic BPF llvm compile : Ok 38.2: kbuild searching : Ok 38.3: Compile source for BPF prologue generation : Ok 38.4: Compile source for BPF relocation : FAILED! # perf test "BPF filter" 40: BPF filter : 40.1: Basic BPF filtering : Ok 40.2: BPF pinning : Ok 40.3: BPF prologue generation : Ok 40.4: BPF relocation checker : FAILED! After: # perf test "builtin clang support" 55: builtin clang support : 55.1: builtin clang compile C source to IR : Ok 55.2: builtin clang compile C source to ELF object : Ok # perf test "LLVM search and compile" 38: LLVM search and compile : 38.1: Basic BPF llvm compile : Ok 38.2: kbuild searching : Ok 38.3: Compile source for BPF prologue generation : Ok 38.4: Compile source for BPF relocation : Ok # perf test "BPF filter" 40: BPF filter : 40.1: Basic BPF filtering : Ok 40.2: BPF pinning : Ok 40.3: BPF prologue generation : Ok 40.4: BPF relocation checker : Ok Signed-off-by: Sandipan Das Reported-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Cc: Heiko Carstens Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Martin Schwidefsky Cc: Naveen N. Rao Cc: Ravi Bangoria Cc: Thomas Richter Fixes: 9ef0112442bd ("perf test: Fix subtest number when showing results") Link: http://lkml.kernel.org/r/20180726171733.33208-1-sandipan@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/tests/builtin-test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 5966f1f9b160e..1c9bc3516f8b1 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -375,7 +375,7 @@ static int test_and_print(struct test *t, bool force_skip, int subtest) if (!t->subtest.get_nr) pr_debug("%s:", t->desc); else - pr_debug("%s subtest %d:", t->desc, subtest); + pr_debug("%s subtest %d:", t->desc, subtest + 1); switch (err) { case TEST_OK: @@ -589,7 +589,7 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) for (subi = 0; subi < subn; subi++) { pr_info("%2d.%1d: %-*s:", i, subi + 1, subw, t->subtest.get_desc(subi)); - err = test_and_print(t, skip, subi + 1); + err = test_and_print(t, skip, subi); if (err != TEST_OK && t->subtest.skip_if_fail) skip = true; } -- GitLab From 3c3bec81e267a6be198c540ea8d6f80e8f51b433 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 20 Aug 2018 15:36:17 -0700 Subject: [PATCH 1174/2269] compiler.h: Allow arch-specific asm/compiler.h [ Upstream commit 04f264d3a8b0eb25d378127bd78c3c9a0261c828 ] We have a need to override the definition of barrier_before_unreachable() for MIPS, which means we either need to add architecture-specific code into linux/compiler-gcc.h or we need to allow the architecture to provide a header that can define the macro before the generic definition. The latter seems like the better approach. A straightforward approach to the per-arch header is to make use of asm-generic to provide a default empty header & adjust architectures which don't need anything specific to make use of that by adding the header to generic-y. Unfortunately this doesn't work so well due to commit 28128c61e08e ("kconfig.h: Include compiler types to avoid missed struct attributes") which caused linux/compiler_types.h to be included in the compilation of every C file via the -include linux/kconfig.h flag in c_flags. Because the -include flag is present for all C files we compile, we need the architecture-provided header to be present before any C files are compiled. If any C files can be compiled prior to the asm-generic header wrappers being generated then we hit a build failure due to missing header. Such cases do exist - one pointed out by the kbuild test robot is the compilation of arch/ia64/kernel/nr-irqs.c, which occurs as part of the archprepare target [1]. This leaves us with a few options: 1) Use generic-y & fix any build failures we find by enforcing ordering such that the asm-generic target occurs before any C compilation, such that linux/compiler_types.h can always include the generated asm-generic wrapper which in turn includes the empty asm-generic header. This would rely on us finding all the problematic cases - I don't know for sure that the ia64 issue is the only one. 2) Add an actual empty header to each architecture, so that we don't need the generated asm-generic wrapper. This seems messy. 3) Give up & add #ifdef CONFIG_MIPS or similar to linux/compiler_types.h. This seems messy too. 4) Include the arch header only when it's actually needed, removing the need for the asm-generic wrapper for all other architectures. This patch allows us to use approach 4, by including an asm/compiler.h header from linux/compiler_types.h after the inclusion of the compiler-specific linux/compiler-*.h header(s). We do this conditionally, only when CONFIG_HAVE_ARCH_COMPILER_H is selected, in order to avoid the need for asm-generic wrappers & the associated build ordering issue described above. The asm/compiler.h header is included after the generic linux/compiler-*.h header(s) for consistency with the way linux/compiler-intel.h & linux/compiler-clang.h are included after the linux/compiler-gcc.h header that they override. [1] https://lists.01.org/pipermail/kbuild-all/2018-August/051175.html Signed-off-by: Paul Burton Reviewed-by: Masahiro Yamada Patchwork: https://patchwork.linux-mips.org/patch/20269/ Cc: Arnd Bergmann Cc: James Hogan Cc: Masahiro Yamada Cc: Ralf Baechle Cc: linux-arch@vger.kernel.org Cc: linux-kbuild@vger.kernel.org Cc: linux-mips@linux-mips.org Signed-off-by: Sasha Levin --- arch/Kconfig | 8 ++++++++ include/linux/compiler_types.h | 12 ++++++++++++ 2 files changed, 20 insertions(+) diff --git a/arch/Kconfig b/arch/Kconfig index 40dc31fea90ca..77b3e21c4844e 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -965,4 +965,12 @@ config REFCOUNT_FULL against various use-after-free conditions that can be used in security flaw exploits. +config HAVE_ARCH_COMPILER_H + bool + help + An architecture can select this if it provides an + asm/compiler.h header that should be included after + linux/compiler-*.h in order to override macro definitions that those + headers generally provide. + source "kernel/gcov/Kconfig" diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 6b79a9bba9a76..4be464a076123 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -78,6 +78,18 @@ extern void __chk_io_ptr(const volatile void __iomem *); #include #endif +/* + * Some architectures need to provide custom definitions of macros provided + * by linux/compiler-*.h, and can do so using asm/compiler.h. We include that + * conditionally rather than using an asm-generic wrapper in order to avoid + * build failures if any C compilation, which will include this file via an + * -include argument in c_flags, occurs prior to the asm-generic wrappers being + * generated. + */ +#ifdef CONFIG_HAVE_ARCH_COMPILER_H +#include +#endif + /* * Generic compiler-dependent macros required for kernel * build go below this comment. Actual compiler/compiler version -- GitLab From c005e014fbc6391ed6f22b141865bf826bbc278d Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 12 Sep 2018 08:23:01 +0200 Subject: [PATCH 1175/2269] ARM: dts: imx53-qsb: disable 1.2GHz OPP [ Upstream commit eea96566c189c77e5272585984eb2729881a2f1d ] The maximum CPU frequency for the i.MX53 QSB is 1GHz, so disable the 1.2GHz OPP. This makes the board work again with configs that have cpufreq enabled like imx_v6_v7_defconfig on which the board stopped working with the addition of cpufreq-dt support. Fixes: 791f416608 ("ARM: dts: imx53: add cpufreq-dt support") Signed-off-by: Sascha Hauer Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx53-qsb-common.dtsi | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm/boot/dts/imx53-qsb-common.dtsi b/arch/arm/boot/dts/imx53-qsb-common.dtsi index 683dcbe27cbd6..8c11190c52183 100644 --- a/arch/arm/boot/dts/imx53-qsb-common.dtsi +++ b/arch/arm/boot/dts/imx53-qsb-common.dtsi @@ -130,6 +130,17 @@ }; }; +&cpu0 { + /* CPU rated to 1GHz, not 1.2GHz as per the default settings */ + operating-points = < + /* kHz uV */ + 166666 850000 + 400000 900000 + 800000 1050000 + 1000000 1200000 + >; +}; + &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; -- GitLab From 4fdaadbce403a7cb20ac0c9c3be260d0cf17177d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Sep 2018 16:08:02 -0300 Subject: [PATCH 1176/2269] perf python: Use -Wno-redundant-decls to build with PYTHON=python3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 05a2f54679861deb188750ba2a70187000b2c71f ] When building in ClearLinux using 'make PYTHON=python3' with gcc 8.2.1 it fails with: GEN /tmp/build/perf/python/perf.so In file included from /usr/include/python3.7m/Python.h:126, from /git/linux/tools/perf/util/python.c:2: /usr/include/python3.7m/import.h:58:24: error: redundant redeclaration of ‘_PyImport_AddModuleObject’ [-Werror=redundant-decls] PyAPI_FUNC(PyObject *) _PyImport_AddModuleObject(PyObject *, PyObject *); ^~~~~~~~~~~~~~~~~~~~~~~~~ /usr/include/python3.7m/import.h:47:24: note: previous declaration of ‘_PyImport_AddModuleObject’ was here PyAPI_FUNC(PyObject *) _PyImport_AddModuleObject(PyObject *name, ^~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors error: command 'gcc' failed with exit status 1 And indeed there is a redundant declaration in that Python.h file, one with parameter names and the other without, so just add -Wno-error=redundant-decls to the python setup instructions. Now perf builds with gcc in ClearLinux with the following Dockerfile: # docker.io/acmel/linux-perf-tools-build-clearlinux:latest FROM docker.io/clearlinux:latest MAINTAINER Arnaldo Carvalho de Melo RUN swupd update && \ swupd bundle-add sysadmin-basic-dev RUN mkdir -m 777 -p /git /tmp/build/perf /tmp/build/objtool /tmp/build/linux && \ groupadd -r perfbuilder && \ useradd -m -r -g perfbuilder perfbuilder && \ chown -R perfbuilder.perfbuilder /tmp/build/ /git/ USER perfbuilder COPY rx_and_build.sh / ENV EXTRA_MAKE_ARGS=PYTHON=python3 ENTRYPOINT ["/rx_and_build.sh"] Now to figure out why the build fails with clang, that is present in the above container as detected by the rx_and_build.sh script: clang version 6.0.1 (tags/RELEASE_601/final) Target: x86_64-unknown-linux-gnu Thread model: posix InstalledDir: /usr/sbin make: Entering directory '/git/linux/tools/perf' BUILD: Doing 'make -j4' parallel build HOSTCC /tmp/build/perf/fixdep.o HOSTLD /tmp/build/perf/fixdep-in.o LINK /tmp/build/perf/fixdep Auto-detecting system features: ... dwarf: [ OFF ] ... dwarf_getlocations: [ OFF ] ... glibc: [ OFF ] ... gtk2: [ OFF ] ... libaudit: [ OFF ] ... libbfd: [ OFF ] ... libelf: [ OFF ] ... libnuma: [ OFF ] ... numa_num_possible_cpus: [ OFF ] ... libperl: [ OFF ] ... libpython: [ OFF ] ... libslang: [ OFF ] ... libcrypto: [ OFF ] ... libunwind: [ OFF ] ... libdw-dwarf-unwind: [ OFF ] ... zlib: [ OFF ] ... lzma: [ OFF ] ... get_cpuid: [ OFF ] ... bpf: [ OFF ] Makefile.config:331: *** No gnu/libc-version.h found, please install glibc-dev[el]. Stop. make[1]: *** [Makefile.perf:206: sub-make] Error 2 make: *** [Makefile:70: all] Error 2 make: Leaving directory '/git/linux/tools/perf' Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thiago Macieira Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-c3khb9ac86s00qxzjrueomme@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index da4df7fd43a2f..23f1bf1751797 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -27,7 +27,7 @@ class install_lib(_install_lib): cflags = getenv('CFLAGS', '').split() # switch off several checks (need to be at the end of cflags list) -cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ] +cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls' ] if cc != "clang": cflags += ['-Wno-cast-function-type' ] -- GitLab From 9a6d45700adf03085add4ab26bcdca6ee2487a05 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 8 Oct 2018 15:46:01 +0100 Subject: [PATCH 1177/2269] rxrpc: Don't check RXRPC_CALL_TX_LAST after calling rxrpc_rotate_tx_window() [ Upstream commit c479d5f2c2e1ce609da08c075054440d97ddff52 ] We should only call the function to end a call's Tx phase if we rotated the marked-last packet out of the transmission buffer. Make rxrpc_rotate_tx_window() return an indication of whether it just rotated the packet marked as the last out of the transmit buffer, carrying the information out of the locked section in that function. We can then check the return value instead of examining RXRPC_CALL_TX_LAST. Fixes: 70790dbe3f66 ("rxrpc: Pass the last Tx packet marker in the annotation buffer") Signed-off-by: David Howells Signed-off-by: Sasha Levin --- net/rxrpc/input.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 5edb636dbc4d6..3a501bf0fc1a7 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -216,10 +216,11 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb, /* * Apply a hard ACK by advancing the Tx window. */ -static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, +static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, struct rxrpc_ack_summary *summary) { struct sk_buff *skb, *list = NULL; + bool rot_last = false; int ix; u8 annotation; @@ -243,15 +244,17 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, skb->next = list; list = skb; - if (annotation & RXRPC_TX_ANNO_LAST) + if (annotation & RXRPC_TX_ANNO_LAST) { set_bit(RXRPC_CALL_TX_LAST, &call->flags); + rot_last = true; + } if ((annotation & RXRPC_TX_ANNO_MASK) != RXRPC_TX_ANNO_ACK) summary->nr_rot_new_acks++; } spin_unlock(&call->lock); - trace_rxrpc_transmit(call, (test_bit(RXRPC_CALL_TX_LAST, &call->flags) ? + trace_rxrpc_transmit(call, (rot_last ? rxrpc_transmit_rotate_last : rxrpc_transmit_rotate)); wake_up(&call->waitq); @@ -262,6 +265,8 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, skb->next = NULL; rxrpc_free_skb(skb, rxrpc_skb_tx_freed); } + + return rot_last; } /* @@ -332,11 +337,11 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) ktime_get_real()); } - if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) - rxrpc_rotate_tx_window(call, top, &summary); if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { - rxrpc_proto_abort("TXL", call, top); - return false; + if (!rxrpc_rotate_tx_window(call, top, &summary)) { + rxrpc_proto_abort("TXL", call, top); + return false; + } } if (!rxrpc_end_tx_phase(call, true, "ETD")) return false; @@ -837,8 +842,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, if (nr_acks > call->tx_top - hard_ack) return rxrpc_proto_abort("AKN", call, 0); - if (after(hard_ack, call->tx_hard_ack)) - rxrpc_rotate_tx_window(call, hard_ack, &summary); + if (after(hard_ack, call->tx_hard_ack)) { + if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) { + rxrpc_end_tx_phase(call, false, "ETA"); + return; + } + } if (nr_acks > 0) { if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0) @@ -847,11 +856,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, &summary); } - if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { - rxrpc_end_tx_phase(call, false, "ETA"); - return; - } - if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] & RXRPC_TX_ANNO_LAST && summary.nr_acks == call->tx_top - hard_ack && @@ -873,8 +877,7 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) _proto("Rx ACKALL %%%u", sp->hdr.serial); - rxrpc_rotate_tx_window(call, call->tx_top, &summary); - if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) + if (rxrpc_rotate_tx_window(call, call->tx_top, &summary)) rxrpc_end_tx_phase(call, false, "ETL"); } -- GitLab From f1d27ff6e027204e6bec8dd01153a196219d9d9b Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 8 Oct 2018 15:46:11 +0100 Subject: [PATCH 1178/2269] rxrpc: Only take the rwind and mtu values from latest ACK [ Upstream commit 298bc15b2079c324e82d0a6fda39c3d762af7282 ] Move the out-of-order and duplicate ACK packet check to before the call to rxrpc_input_ackinfo() so that the receive window size and MTU size are only checked in the latest ACK packet and don't regress. Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") Signed-off-by: David Howells Signed-off-by: Sasha Levin --- net/rxrpc/input.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 3a501bf0fc1a7..ea506a77f3c81 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -806,6 +806,16 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_propose_ack_respond_to_ack); } + /* Discard any out-of-order or duplicate ACKs. */ + if (before_eq(sp->hdr.serial, call->acks_latest)) { + _debug("discard ACK %d <= %d", + sp->hdr.serial, call->acks_latest); + return; + } + call->acks_latest_ts = skb->tstamp; + call->acks_latest = sp->hdr.serial; + + /* Parse rwind and mtu sizes if provided. */ ioffset = offset + nr_acks + 3; if (skb->len >= ioffset + sizeof(buf.info)) { if (skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0) @@ -827,15 +837,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, return; } - /* Discard any out-of-order or duplicate ACKs. */ - if (before_eq(sp->hdr.serial, call->acks_latest)) { - _debug("discard ACK %d <= %d", - sp->hdr.serial, call->acks_latest); - return; - } - call->acks_latest_ts = skb->tstamp; - call->acks_latest = sp->hdr.serial; - if (before(hard_ack, call->tx_hard_ack) || after(hard_ack, call->tx_top)) return rxrpc_proto_abort("AKW", call, 0); -- GitLab From 252869e5fb49b6397584d9dbe46b246bb1787ef3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 8 Oct 2018 15:46:17 +0100 Subject: [PATCH 1179/2269] rxrpc: Fix connection-level abort handling [ Upstream commit 647530924f47c93db472ee3cf43b7ef1425581b6 ] Fix connection-level abort handling to cache the abort and error codes properly so that a new incoming call can be properly aborted if it races with the parent connection being aborted by another CPU. The abort_code and error parameters can then be dropped from rxrpc_abort_calls(). Fixes: f5c17aaeb2ae ("rxrpc: Calls should only have one terminal state") Signed-off-by: David Howells Signed-off-by: Sasha Levin --- net/rxrpc/ar-internal.h | 4 ++-- net/rxrpc/call_accept.c | 4 ++-- net/rxrpc/conn_event.c | 26 +++++++++++++++----------- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index e6c2c4f56fb12..71c7f1dd4599e 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -424,8 +424,7 @@ struct rxrpc_connection { spinlock_t state_lock; /* state-change lock */ enum rxrpc_conn_cache_state cache_state; enum rxrpc_conn_proto_state state; /* current state of connection */ - u32 local_abort; /* local abort code */ - u32 remote_abort; /* remote abort code */ + u32 abort_code; /* Abort code of connection abort */ int debug_id; /* debug ID for printks */ atomic_t serial; /* packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ @@ -435,6 +434,7 @@ struct rxrpc_connection { u8 security_size; /* security header size */ u8 security_ix; /* security type */ u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */ + short error; /* Local error code */ }; /* diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 62b1581d44a5a..2dd13f5c47c86 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -418,11 +418,11 @@ found_service: case RXRPC_CONN_REMOTELY_ABORTED: rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, - conn->remote_abort, -ECONNABORTED); + conn->abort_code, conn->error); break; case RXRPC_CONN_LOCALLY_ABORTED: rxrpc_abort_call("CON", call, sp->hdr.seq, - conn->local_abort, -ECONNABORTED); + conn->abort_code, conn->error); break; default: BUG(); diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 0435c4167a1ae..75ec1ad595b72 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -117,7 +117,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, switch (chan->last_type) { case RXRPC_PACKET_TYPE_ABORT: - _proto("Tx ABORT %%%u { %d } [re]", serial, conn->local_abort); + _proto("Tx ABORT %%%u { %d } [re]", serial, conn->abort_code); break; case RXRPC_PACKET_TYPE_ACK: trace_rxrpc_tx_ack(NULL, serial, chan->last_seq, 0, @@ -135,13 +135,12 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, * pass a connection-level abort onto all calls on that connection */ static void rxrpc_abort_calls(struct rxrpc_connection *conn, - enum rxrpc_call_completion compl, - u32 abort_code, int error) + enum rxrpc_call_completion compl) { struct rxrpc_call *call; int i; - _enter("{%d},%x", conn->debug_id, abort_code); + _enter("{%d},%x", conn->debug_id, conn->abort_code); spin_lock(&conn->channel_lock); @@ -153,9 +152,11 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, if (compl == RXRPC_CALL_LOCALLY_ABORTED) trace_rxrpc_abort("CON", call->cid, call->call_id, 0, - abort_code, error); + conn->abort_code, + conn->error); if (rxrpc_set_call_completion(call, compl, - abort_code, error)) + conn->abort_code, + conn->error)) rxrpc_notify_socket(call); } } @@ -188,10 +189,12 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, return 0; } + conn->error = error; + conn->abort_code = abort_code; conn->state = RXRPC_CONN_LOCALLY_ABORTED; spin_unlock_bh(&conn->state_lock); - rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code, error); + rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED); msg.msg_name = &conn->params.peer->srx.transport; msg.msg_namelen = conn->params.peer->srx.transport_len; @@ -210,7 +213,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, whdr._rsvd = 0; whdr.serviceId = htons(conn->service_id); - word = htonl(conn->local_abort); + word = htonl(conn->abort_code); iov[0].iov_base = &whdr; iov[0].iov_len = sizeof(whdr); @@ -221,7 +224,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, serial = atomic_inc_return(&conn->serial); whdr.serial = htonl(serial); - _proto("Tx CONN ABORT %%%u { %d }", serial, conn->local_abort); + _proto("Tx CONN ABORT %%%u { %d }", serial, conn->abort_code); ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); if (ret < 0) { @@ -289,9 +292,10 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, abort_code = ntohl(wtmp); _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code); + conn->error = -ECONNABORTED; + conn->abort_code = abort_code; conn->state = RXRPC_CONN_REMOTELY_ABORTED; - rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, - abort_code, -ECONNABORTED); + rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED); return -ECONNABORTED; case RXRPC_PACKET_TYPE_CHALLENGE: -- GitLab From ba4cf1eefd54fbeadf90cfe8236643bed5681548 Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 9 Oct 2018 11:21:27 +0300 Subject: [PATCH 1180/2269] net: ena: fix warning in rmmod caused by double iounmap [ Upstream commit d79c3888bde6581da7ff9f9d6f581900ecb5e632 ] Memory mapped with devm_ioremap is automatically freed when the driver is disconnected from the device. Therefore there is no need to explicitly call devm_iounmap. Fixes: 0857d92f71b6 ("net: ena: add missing unmap bars on device removal") Fixes: 411838e7b41c ("net: ena: fix rare kernel crash when bar memory remap fails") Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 60b3ee29d82c8..08c9c99a8331a 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3059,15 +3059,8 @@ err_rss_init: static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev) { - int release_bars; + int release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; - if (ena_dev->mem_bar) - devm_iounmap(&pdev->dev, ena_dev->mem_bar); - - if (ena_dev->reg_bar) - devm_iounmap(&pdev->dev, ena_dev->reg_bar); - - release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; pci_release_selected_regions(pdev, release_bars); } -- GitLab From 1e4f8315a5c8e914fb6365c930b45fffd73b02cd Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 9 Oct 2018 11:21:29 +0300 Subject: [PATCH 1181/2269] net: ena: fix NULL dereference due to untimely napi initialization [ Upstream commit 78a55d05def95144ca5fa9a64c49b2a0636a9866 ] napi poll functions should be initialized before running request_irq(), to handle a rare condition where there is a pending interrupt, causing the ISR to fire immediately while the poll function wasn't set yet, causing a NULL dereference. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 08c9c99a8331a..3c7813f04962b 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1571,8 +1571,6 @@ static int ena_up_complete(struct ena_adapter *adapter) if (rc) return rc; - ena_init_napi(adapter); - ena_change_mtu(adapter->netdev, adapter->netdev->mtu); ena_refill_all_rx_bufs(adapter); @@ -1726,6 +1724,13 @@ static int ena_up(struct ena_adapter *adapter) ena_setup_io_intr(adapter); + /* napi poll functions should be initialized before running + * request_irq(), to handle a rare condition where there is a pending + * interrupt, causing the ISR to fire immediately while the poll + * function wasn't set yet, causing a null dereference + */ + ena_init_napi(adapter); + rc = ena_request_io_irq(adapter); if (rc) goto err_req_irq; -- GitLab From 5309191ec0496e58167421042d42263bd6e13dac Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 11 Oct 2018 10:54:52 +0200 Subject: [PATCH 1182/2269] selftests: rtnetlink.sh explicitly requires bash. [ Upstream commit 3c718e677c2b35b449992adc36ecce883c467e98 ] the script rtnetlink.sh requires a bash-only features (sleep with sub-second precision). This may cause random test failure if the default shell is not bash. Address the above explicitly requiring bash as the script interpreter. Fixes: 33b01b7b4f19 ("selftests: add rtnetlink test script") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- tools/testing/selftests/net/rtnetlink.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 57b5ff5762403..891130daac7ce 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # # This test is for checking rtnetlink callpaths, and get as much coverage as possible. # -- GitLab From 3d69b85e14be7a6604a8820f80c1b712c65241d7 Mon Sep 17 00:00:00 2001 From: Khazhismel Kumykov Date: Fri, 12 Oct 2018 21:34:40 -0700 Subject: [PATCH 1183/2269] fs/fat/fatent.c: add cond_resched() to fat_count_free_clusters() [ Upstream commit ac081c3be3fae6d0cc3e1862507fca3862d30b67 ] On non-preempt kernels this loop can take a long time (more than 50 ticks) processing through entries. Link: http://lkml.kernel.org/r/20181010172623.57033-1-khazhy@google.com Signed-off-by: Khazhismel Kumykov Acked-by: OGAWA Hirofumi Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- fs/fat/fatent.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index a40f36b1b292e..9635df94db7d9 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -681,6 +681,7 @@ int fat_count_free_clusters(struct super_block *sb) if (ops->ent_get(&fatent) == FAT_ENT_FREE) free++; } while (fat_ent_next(sbi, &fatent)); + cond_resched(); } sbi->free_clusters = free; sbi->free_clus_valid = 1; -- GitLab From bd6df7a19559f9b52049f97c3188a7d1544e16df Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Thu, 18 Oct 2018 13:38:40 -0700 Subject: [PATCH 1184/2269] sch_netem: restore skb->dev after dequeuing from the rbtree Upstream commit bffa72cf7f9d ("net: sk_buff rbnode reorg") got backported as commit 6b921536f170 ("net: sk_buff rbnode reorg") into the v4.14.x-tree. However, the backport does not include the changes in sch_netem.c We need these, as otherwise the skb->dev pointer is not set when dequeueing from the netem rbtree, resulting in a panic: [ 15.427748] BUG: unable to handle kernel NULL pointer dereference at 00000000000000d0 [ 15.428863] IP: netif_skb_features+0x24/0x230 [ 15.429402] PGD 0 P4D 0 [ 15.429733] Oops: 0000 [#1] SMP PTI [ 15.430169] Modules linked in: [ 15.430614] CPU: 3 PID: 0 Comm: swapper/3 Not tainted 4.14.77.mptcp #77 [ 15.431497] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.5.1 01/01/2011 [ 15.432568] task: ffff88042db19680 task.stack: ffffc90000070000 [ 15.433356] RIP: 0010:netif_skb_features+0x24/0x230 [ 15.433977] RSP: 0018:ffff88043fd83e70 EFLAGS: 00010286 [ 15.434665] RAX: ffff880429ad80c0 RBX: ffff88042bd0e400 RCX: ffff880429ad8000 [ 15.435585] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88042bd0e400 [ 15.436551] RBP: ffff88042bd0e400 R08: ffff88042a4b6c9c R09: 0000000000000001 [ 15.437485] R10: 0000000000000004 R11: 0000000000000000 R12: ffff88042c700000 [ 15.438393] R13: ffff88042c700000 R14: ffff88042a4b6c00 R15: ffff88042c6bb000 [ 15.439315] FS: 0000000000000000(0000) GS:ffff88043fd80000(0000) knlGS:0000000000000000 [ 15.440314] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 15.441084] CR2: 00000000000000d0 CR3: 000000042c374000 CR4: 00000000000006e0 [ 15.442016] Call Trace: [ 15.442333] [ 15.442596] validate_xmit_skb+0x17/0x270 [ 15.443134] validate_xmit_skb_list+0x38/0x60 [ 15.443698] sch_direct_xmit+0x102/0x190 [ 15.444198] __qdisc_run+0xe3/0x240 [ 15.444671] net_tx_action+0x121/0x140 [ 15.445177] __do_softirq+0xe2/0x224 [ 15.445654] irq_exit+0xbf/0xd0 [ 15.446072] smp_apic_timer_interrupt+0x5d/0x90 [ 15.446654] apic_timer_interrupt+0x7d/0x90 [ 15.447185] [ 15.447460] RIP: 0010:native_safe_halt+0x2/0x10 [ 15.447992] RSP: 0018:ffffc90000073f10 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff10 [ 15.449008] RAX: ffffffff816667d0 RBX: ffffffff820946b0 RCX: 0000000000000000 [ 15.449895] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 15.450768] RBP: ffffffff82026940 R08: 00000004e858e5e1 R09: ffff88042a4b6d58 [ 15.451643] R10: 0000000000000000 R11: 000000d0d56879bb R12: 0000000000000000 [ 15.452478] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 [ 15.453340] ? __sched_text_end+0x2/0x2 [ 15.453835] default_idle+0xf/0x20 [ 15.454259] do_idle+0x170/0x200 [ 15.454653] cpu_startup_entry+0x14/0x20 [ 15.455142] secondary_startup_64+0xa5/0xb0 [ 15.455715] Code: 1f 84 00 00 00 00 00 55 53 48 89 fd 48 83 ec 08 8b 87 bc 00 00 00 48 8b 8f c0 00 00 00 0f b6 97 81 00 00 00 48 8b 77 10 48 01 c8 <48> 8b 9 [ 15.458138] RIP: netif_skb_features+0x24/0x230 RSP: ffff88043fd83e70 [ 15.458933] CR2: 00000000000000d0 [ 15.459352] ---[ end trace 083925903ae60570 ]--- Fixes: 6b921536f170 ("net: sk_buff rbnode reorg") Cc: Stephen Hemminger Cc: Eric Dumazet Cc: Soheil Hassas Yeganeh Cc: Wei Wang Cc: Willem de Bruijn Signed-off-by: Christoph Paasch Signed-off-by: Sasha Levin --- net/sched/sch_netem.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 2a2ab6bfe5d85..3d325b8408022 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -624,6 +624,10 @@ deliver: skb->next = NULL; skb->prev = NULL; skb->tstamp = netem_skb_cb(skb)->tstamp_save; + /* skb->dev shares skb->rbnode area, + * we need to restore its value. + */ + skb->dev = qdisc_dev(sch); #ifdef CONFIG_NET_CLS_ACT /* -- GitLab From 3642e35447495208665e30ffbc2466a2a488b99e Mon Sep 17 00:00:00 2001 From: Kimmo Rautkoski Date: Sat, 20 Oct 2018 22:44:42 +0300 Subject: [PATCH 1185/2269] mtd: spi-nor: Add support for is25wp series chips [ Upstream commit d616f81cdd2a21edfa90a595a4e9b143f5ba8414 ] Added support for is25wp032, is25wp064 and is25wp128. Signed-off-by: Kimmo Rautkoski Reviewed-by: Marek Vasut Signed-off-by: Boris Brezillon [ Adrian Bunk: Trivial adaption to changed context. ] Signed-off-by: Adrian Bunk Signed-off-by: Sasha Levin --- drivers/mtd/spi-nor/spi-nor.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 19c000722cbc8..34ecc12ee3d93 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -1005,6 +1005,12 @@ static const struct flash_info spi_nor_ids[] = { /* ISSI */ { "is25cd512", INFO(0x7f9d20, 0, 32 * 1024, 2, SECT_4K) }, + { "is25wp032", INFO(0x9d7016, 0, 64 * 1024, 64, + SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, + { "is25wp064", INFO(0x9d7017, 0, 64 * 1024, 128, + SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, + { "is25wp128", INFO(0x9d7018, 0, 64 * 1024, 256, + SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, /* Macronix */ { "mx25l512e", INFO(0xc22010, 0, 64 * 1024, 1, SECT_4K) }, -- GitLab From 871424f0c3cfcf51727cb56a0a5f1cbfc9e429c6 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 12 Dec 2017 17:15:02 +0100 Subject: [PATCH 1186/2269] kvm: x86: fix WARN due to uninitialized guest FPU state commit 5663d8f9bbe4bf15488f7351efb61ea20fa6de06 upstream ------------[ cut here ]------------ Bad FPU state detected at kvm_put_guest_fpu+0xd8/0x2d0 [kvm], reinitializing FPU registers. WARNING: CPU: 1 PID: 4594 at arch/x86/mm/extable.c:103 ex_handler_fprestore+0x88/0x90 CPU: 1 PID: 4594 Comm: qemu-system-x86 Tainted: G B OE 4.15.0-rc2+ #10 RIP: 0010:ex_handler_fprestore+0x88/0x90 Call Trace: fixup_exception+0x4e/0x60 do_general_protection+0xff/0x270 general_protection+0x22/0x30 RIP: 0010:kvm_put_guest_fpu+0xd8/0x2d0 [kvm] RSP: 0018:ffff8803d5627810 EFLAGS: 00010246 kvm_vcpu_reset+0x3b4/0x3c0 [kvm] kvm_apic_accept_events+0x1c0/0x240 [kvm] kvm_arch_vcpu_ioctl_run+0x1658/0x2fb0 [kvm] kvm_vcpu_ioctl+0x479/0x880 [kvm] do_vfs_ioctl+0x142/0x9a0 SyS_ioctl+0x74/0x80 do_syscall_64+0x15f/0x600 where kvm_put_guest_fpu is called without a prior kvm_load_guest_fpu. To fix it, move kvm_load_guest_fpu to the very beginning of kvm_arch_vcpu_ioctl_run. Cc: stable@vger.kernel.org Fixes: f775b13eedee2f7f3c6fdd4e90fb79090ce5d339 Signed-off-by: Peter Xu Signed-off-by: Paolo Bonzini Signed-off-by: Sudip Mukherjee Acked-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/x86.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3856828ee1dc9..8d688b213504c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7393,13 +7393,12 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - struct fpu *fpu = ¤t->thread.fpu; int r; - fpu__initialize(fpu); - kvm_sigset_activate(vcpu); + kvm_load_guest_fpu(vcpu); + if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { if (kvm_run->immediate_exit) { r = -EINTR; @@ -7440,6 +7439,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) r = vcpu_run(vcpu); out: + kvm_put_guest_fpu(vcpu); post_kvm_run_save(vcpu); kvm_sigset_deactivate(vcpu); -- GitLab From cdb44bdafc3a846aa785102f65a26762fbe32037 Mon Sep 17 00:00:00 2001 From: Chris Paterson Date: Thu, 14 Dec 2017 09:08:39 +0000 Subject: [PATCH 1187/2269] ARM: dts: r8a7790: Correct critical CPU temperature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fcab5651fae4258a993170b7aaf443adbd3d4d84 upstream The R-Car H2 hardware manual states that Tc = –40°C to +105°C. The thermal sensor has an accuracy of ±5°C and there can be a temperature difference of 1 or 2 degrees between Tjmax and the thermal sensor due to the location of the latter. This means that 95°C is a safer value to use. Fixes: a8b805f3606f7af7 ("ARM: dts: r8a7790: enable to use thermal-zone") Signed-off-by: Chris Paterson Reviewed-by: Geert Uytterhoeven Signed-off-by: Simon Horman Signed-off-by: Sudip Mukherjee Signed-off-by: Sasha Levin --- arch/arm/boot/dts/r8a7790.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi index 16358bf8d1dbf..97e8b9b0b7501 100644 --- a/arch/arm/boot/dts/r8a7790.dtsi +++ b/arch/arm/boot/dts/r8a7790.dtsi @@ -153,7 +153,7 @@ trips { cpu-crit { - temperature = <115000>; + temperature = <95000>; hysteresis = <0>; type = "critical"; }; -- GitLab From e75194d294421704baf4c577e504cb9c92ec7ce2 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 21 May 2018 06:24:58 -0400 Subject: [PATCH 1188/2269] media: uvcvideo: Fix driver reference counting commit f9ffcb0a21e1fa8e64d09ed613d884e054ae8191 upstream kref_init initializes the reference count to 1, not 0. This additional reference is never released since the conversion to reference counters. As a result, uvc_delete is not called anymore when UVC cameras are disconnected. Fix this by adding an additional kref_put in uvc_disconnect and in the probe error path. This also allows to remove the temporary additional reference in uvc_unregister_video. Fixes: 9d15cd958c17 ("media: uvcvideo: Convert from using an atomic variable to a reference count") Signed-off-by: Philipp Zabel Reviewed-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sudip Mukherjee Signed-off-by: Sasha Levin --- drivers/media/usb/uvc/uvc_driver.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index 6d22b22cb35b6..064d88299adc0 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -1865,13 +1865,6 @@ static void uvc_unregister_video(struct uvc_device *dev) { struct uvc_streaming *stream; - /* Unregistering all video devices might result in uvc_delete() being - * called from inside the loop if there's no open file handle. To avoid - * that, increment the refcount before iterating over the streams and - * decrement it when done. - */ - kref_get(&dev->ref); - list_for_each_entry(stream, &dev->streams, list) { if (!video_is_registered(&stream->vdev)) continue; @@ -1880,8 +1873,6 @@ static void uvc_unregister_video(struct uvc_device *dev) uvc_debugfs_cleanup_stream(stream); } - - kref_put(&dev->ref, uvc_delete); } static int uvc_register_video(struct uvc_device *dev, @@ -2129,6 +2120,7 @@ static int uvc_probe(struct usb_interface *intf, error: uvc_unregister_video(dev); + kref_put(&dev->ref, uvc_delete); return -ENODEV; } @@ -2146,6 +2138,7 @@ static void uvc_disconnect(struct usb_interface *intf) return; uvc_unregister_video(dev); + kref_put(&dev->ref, uvc_delete); } static int uvc_suspend(struct usb_interface *intf, pm_message_t message) -- GitLab From ee66ad5896aef9b67b7b1e3db6fa9430b907fa58 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 6 Nov 2017 10:47:14 +0100 Subject: [PATCH 1189/2269] ALSA: usx2y: Fix invalid stream URBs commit f9a1c372299fed53d4b72bb601f7f3bfe6f9999c upstream The us122l driver creates URBs per the fixed endpoints, and this may end up with URBs with inconsistent pipes when a fuzzer or a malicious program deals with the manipulated endpoints. It ends up with a kernel warning like: usb 1-1: BOGUS urb xfer, pipe 0 != type 3 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 24 at drivers/usb/core/urb.c:471 usb_submit_urb+0x113e/0x1400 Call Trace: usb_stream_start+0x48a/0x9f0 sound/usb/usx2y/usb_stream.c:690 us122l_start+0x116/0x290 sound/usb/usx2y/us122l.c:365 us122l_create_card sound/usb/usx2y/us122l.c:502 us122l_usb_probe sound/usb/usx2y/us122l.c:588 .... For avoiding the bad access, this patch adds a few sanity checks of the validity of created URBs like previous similar fixes using the new usb_urb_ep_type_check() helper function. Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Takashi Iwai Signed-off-by: Sudip Mukherjee Signed-off-by: Sasha Levin --- sound/usb/usx2y/usb_stream.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/sound/usb/usx2y/usb_stream.c b/sound/usb/usx2y/usb_stream.c index e229abd216526..b0f8979ff2d2f 100644 --- a/sound/usb/usx2y/usb_stream.c +++ b/sound/usb/usx2y/usb_stream.c @@ -56,7 +56,7 @@ check: lb, s->period_size); } -static void init_pipe_urbs(struct usb_stream_kernel *sk, unsigned use_packsize, +static int init_pipe_urbs(struct usb_stream_kernel *sk, unsigned use_packsize, struct urb **urbs, char *transfer, struct usb_device *dev, int pipe) { @@ -77,6 +77,8 @@ static void init_pipe_urbs(struct usb_stream_kernel *sk, unsigned use_packsize, urb->interval = 1; if (usb_pipeout(pipe)) continue; + if (usb_urb_ep_type_check(urb)) + return -EINVAL; urb->transfer_buffer_length = transfer_length; desc = urb->iso_frame_desc; @@ -87,9 +89,11 @@ static void init_pipe_urbs(struct usb_stream_kernel *sk, unsigned use_packsize, desc[p].length = maxpacket; } } + + return 0; } -static void init_urbs(struct usb_stream_kernel *sk, unsigned use_packsize, +static int init_urbs(struct usb_stream_kernel *sk, unsigned use_packsize, struct usb_device *dev, int in_pipe, int out_pipe) { struct usb_stream *s = sk->s; @@ -103,9 +107,12 @@ static void init_urbs(struct usb_stream_kernel *sk, unsigned use_packsize, sk->outurb[u] = usb_alloc_urb(sk->n_o_ps, GFP_KERNEL); } - init_pipe_urbs(sk, use_packsize, sk->inurb, indata, dev, in_pipe); - init_pipe_urbs(sk, use_packsize, sk->outurb, sk->write_page, dev, - out_pipe); + if (init_pipe_urbs(sk, use_packsize, sk->inurb, indata, dev, in_pipe) || + init_pipe_urbs(sk, use_packsize, sk->outurb, sk->write_page, dev, + out_pipe)) + return -EINVAL; + + return 0; } @@ -226,7 +233,11 @@ struct usb_stream *usb_stream_new(struct usb_stream_kernel *sk, else sk->freqn = get_usb_high_speed_rate(sample_rate); - init_urbs(sk, use_packsize, dev, in_pipe, out_pipe); + if (init_urbs(sk, use_packsize, dev, in_pipe, out_pipe) < 0) { + usb_stream_free(sk); + return NULL; + } + sk->s->state = usb_stream_stopped; out: return sk->s; -- GitLab From ac7c2bb59870b8fd41c09502e73e72fcb63b85c3 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 31 Oct 2018 11:05:19 -0400 Subject: [PATCH 1190/2269] Revert "netfilter: ipv6: nf_defrag: drop skb dst before queueing" This reverts commit 28c74ff85efd192aeca9005499ca50c24d795f61. From Florian Westphal : It causes kernel crash for locally generated ipv6 fragments when netfilter ipv6 defragmentation is used. The faulty commit is not essential for -stable, it only delays netns teardown for longer than needed when that netns still has ipv6 frags queued. Much better than crash :-/ Signed-off-by: Sasha Levin --- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 2ed8536e10b64..611d406c4656d 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -598,8 +598,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) fq->q.meat == fq->q.len && nf_ct_frag6_reasm(fq, skb, dev)) ret = 0; - else - skb_dst_drop(skb); out_unlock: spin_unlock_bh(&fq->q.lock); -- GitLab From 2980235974cdb243b06d8eb9d12a3fce44f401a1 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 5 Jul 2018 15:15:27 +0200 Subject: [PATCH 1191/2269] perf tools: Disable parallelism for 'make clean' [ Upstream commit da15fc2fa9c07b23db8f5e479bd8a9f0d741ca07 ] The Yocto build system does a 'make clean' when rebuilding due to changed dependencies, and that consistently fails for me (causing the whole BSP build to fail) with errors such as | find: '[...]/perf/1.0-r9/perf-1.0/plugin_mac80211.so': No such file or directory | find: '[...]/perf/1.0-r9/perf-1.0/plugin_mac80211.so': No such file or directory | find: find: '[...]/perf/1.0-r9/perf-1.0/libtraceevent.a''[...]/perf/1.0-r9/perf-1.0/libtraceevent.a': No such file or directory: No such file or directory | [...] | find: cannot delete '/mnt/xfs/devel/pil/yocto/tmp-glibc/work/wandboard-oe-linux-gnueabi/perf/1.0-r9/perf-1.0/util/.pstack.o.cmd': No such file or directory Apparently (despite the comment), 'make clean' ends up launching multiple sub-makes that all want to remove the same things - perhaps this only happens in combination with a O=... parameter. In any case, we don't lose much by explicitly disabling the parallelism for the clean target, and it makes automated builds much more reliable. Signed-off-by: Rasmus Villemoes Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180705131527.19749-1-linux@rasmusvillemoes.dk Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 225454416ed54..7902a5681fc89 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -84,10 +84,10 @@ endif # has_clean endif # MAKECMDGOALS # -# The clean target is not really parallel, don't print the jobs info: +# Explicitly disable parallelism for the clean target. # clean: - $(make) + $(make) -j1 # # The build-test target is not really parallel, don't print the jobs info, -- GitLab From b6534b3e08e8e10e871b53ce5a1e9ce4bcb94907 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 10 Apr 2018 13:33:12 +0100 Subject: [PATCH 1192/2269] drm/i915/gvt: fix memory leak of a cmd_entry struct on error exit path commit ffdf16edfbbe77f5f5c3c87fe8d7387ecd16241b upstream The error exit path when a duplicate is found does not kfree and cmd_entry struct and hence there is a small memory leak. Fix this by kfree'ing it. Detected by CoverityScan, CID#1370198 ("Resource Leak") Fixes: be1da7070aea ("drm/i915/gvt: vGPU command scanner") Signed-off-by: Colin Ian King Reviewed-by: Chris Wilson Signed-off-by: Zhenyu Wang Signed-off-by: Sudip Mukherjee Acked-by: Zhenyu Wang Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index d4726a3358a4a..d6993c2707d1b 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -2802,6 +2802,7 @@ static int init_cmd_table(struct intel_gvt *gvt) if (info) { gvt_err("%s %s duplicated\n", e->info->name, info->name); + kfree(e); return -EEXIST; } -- GitLab From 1bd68d5612af83ed392f066861eb5033bfcad28c Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 26 Oct 2018 10:28:43 +0800 Subject: [PATCH 1193/2269] bridge: do not add port to router list when receives query with source 0.0.0.0 commit 5a2de63fd1a59c30c02526d427bc014b98adf508 upstream. Based on RFC 4541, 2.1.1. IGMP Forwarding Rules The switch supporting IGMP snooping must maintain a list of multicast routers and the ports on which they are attached. This list can be constructed in any combination of the following ways: a) This list should be built by the snooping switch sending Multicast Router Solicitation messages as described in IGMP Multicast Router Discovery [MRDISC]. It may also snoop Multicast Router Advertisement messages sent by and to other nodes. b) The arrival port for IGMP Queries (sent by multicast routers) where the source address is not 0.0.0.0. We should not add the port to router list when receives query with source 0.0.0.0. Reported-by: Ying Xu Signed-off-by: Hangbin Liu Acked-by: Nikolay Aleksandrov Acked-by: Roopa Prabhu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_multicast.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 8dc5c8d69bcd7..efbcc811297cc 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1390,7 +1390,15 @@ static void br_multicast_query_received(struct net_bridge *br, return; br_multicast_update_query_timer(br, query, max_delay); - br_multicast_mark_router(br, port); + + /* Based on RFC4541, section 2.1.1 IGMP Forwarding Rules, + * the arrival port for IGMP Queries where the source address + * is 0.0.0.0 should not be added to router port list. + */ + if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) || + (saddr->proto == htons(ETH_P_IPV6) && + !ipv6_addr_any(&saddr->u.ip6))) + br_multicast_mark_router(br, port); } static int br_ip4_multicast_query(struct net_bridge *br, -- GitLab From c3cf86dac56dbacab38d27c072b492cbbe89f4f1 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 27 Oct 2018 12:07:47 +0300 Subject: [PATCH 1194/2269] net: bridge: remove ipv6 zero address check in mcast queries commit 0fe5119e267f3e3d8ac206895f5922195ec55a8a upstream. Recently a check was added which prevents marking of routers with zero source address, but for IPv6 that cannot happen as the relevant RFCs actually forbid such packets: RFC 2710 (MLDv1): "To be valid, the Query message MUST come from a link-local IPv6 Source Address, be at least 24 octets long, and have a correct MLD checksum." Same goes for RFC 3810. And also it can be seen as a requirement in ipv6_mc_check_mld_query() which is used by the bridge to validate the message before processing it. Thus any queries with :: source address won't be processed anyway. So just remove the check for zero IPv6 source address from the query processing function. Fixes: 5a2de63fd1a5 ("bridge: do not add port to router list when receives query with source 0.0.0.0") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Cc: Hangbin Liu Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_multicast.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index efbcc811297cc..a813dfe2dc2cf 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1396,8 +1396,7 @@ static void br_multicast_query_received(struct net_bridge *br, * is 0.0.0.0 should not be added to router port list. */ if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) || - (saddr->proto == htons(ETH_P_IPV6) && - !ipv6_addr_any(&saddr->u.ip6))) + saddr->proto == htons(ETH_P_IPV6)) br_multicast_mark_router(br, port); } -- GitLab From 5d4c5861211f62be25e36254981521c1e5030b19 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 12 Oct 2018 18:58:53 -0700 Subject: [PATCH 1195/2269] ipv6: mcast: fix a use-after-free in inet6_mc_check [ Upstream commit dc012f3628eaecfb5ba68404a5c30ef501daf63d ] syzbot found a use-after-free in inet6_mc_check [1] The problem here is that inet6_mc_check() uses rcu and read_lock(&iml->sflock) So the fact that ip6_mc_leave_src() is called under RTNL and the socket lock does not help us, we need to acquire iml->sflock in write mode. In the future, we should convert all this stuff to RCU. [1] BUG: KASAN: use-after-free in ipv6_addr_equal include/net/ipv6.h:521 [inline] BUG: KASAN: use-after-free in inet6_mc_check+0xae7/0xb40 net/ipv6/mcast.c:649 Read of size 8 at addr ffff8801ce7f2510 by task syz-executor0/22432 CPU: 1 PID: 22432 Comm: syz-executor0 Not tainted 4.19.0-rc7+ #280 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c4/0x2b4 lib/dump_stack.c:113 print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433 ipv6_addr_equal include/net/ipv6.h:521 [inline] inet6_mc_check+0xae7/0xb40 net/ipv6/mcast.c:649 __raw_v6_lookup+0x320/0x3f0 net/ipv6/raw.c:98 ipv6_raw_deliver net/ipv6/raw.c:183 [inline] raw6_local_deliver+0x3d3/0xcb0 net/ipv6/raw.c:240 ip6_input_finish+0x467/0x1aa0 net/ipv6/ip6_input.c:345 NF_HOOK include/linux/netfilter.h:289 [inline] ip6_input+0xe9/0x600 net/ipv6/ip6_input.c:426 ip6_mc_input+0x48a/0xd20 net/ipv6/ip6_input.c:503 dst_input include/net/dst.h:450 [inline] ip6_rcv_finish+0x17a/0x330 net/ipv6/ip6_input.c:76 NF_HOOK include/linux/netfilter.h:289 [inline] ipv6_rcv+0x120/0x640 net/ipv6/ip6_input.c:271 __netif_receive_skb_one_core+0x14d/0x200 net/core/dev.c:4913 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:5023 netif_receive_skb_internal+0x12c/0x620 net/core/dev.c:5126 napi_frags_finish net/core/dev.c:5664 [inline] napi_gro_frags+0x75a/0xc90 net/core/dev.c:5737 tun_get_user+0x3189/0x4250 drivers/net/tun.c:1923 tun_chr_write_iter+0xb9/0x154 drivers/net/tun.c:1968 call_write_iter include/linux/fs.h:1808 [inline] do_iter_readv_writev+0x8b0/0xa80 fs/read_write.c:680 do_iter_write+0x185/0x5f0 fs/read_write.c:959 vfs_writev+0x1f1/0x360 fs/read_write.c:1004 do_writev+0x11a/0x310 fs/read_write.c:1039 __do_sys_writev fs/read_write.c:1112 [inline] __se_sys_writev fs/read_write.c:1109 [inline] __x64_sys_writev+0x75/0xb0 fs/read_write.c:1109 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457421 Code: 75 14 b8 14 00 00 00 0f 05 48 3d 01 f0 ff ff 0f 83 34 b5 fb ff c3 48 83 ec 08 e8 1a 2d 00 00 48 89 04 24 b8 14 00 00 00 0f 05 <48> 8b 3c 24 48 89 c2 e8 63 2d 00 00 48 89 d0 48 83 c4 08 48 3d 01 RSP: 002b:00007f2d30ecaba0 EFLAGS: 00000293 ORIG_RAX: 0000000000000014 RAX: ffffffffffffffda RBX: 000000000000003e RCX: 0000000000457421 RDX: 0000000000000001 RSI: 00007f2d30ecabf0 RDI: 00000000000000f0 RBP: 0000000020000500 R08: 00000000000000f0 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000293 R12: 00007f2d30ecb6d4 R13: 00000000004c4890 R14: 00000000004d7b90 R15: 00000000ffffffff Allocated by task 22437: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553 __do_kmalloc mm/slab.c:3718 [inline] __kmalloc+0x14e/0x760 mm/slab.c:3727 kmalloc include/linux/slab.h:518 [inline] sock_kmalloc+0x15a/0x1f0 net/core/sock.c:1983 ip6_mc_source+0x14dd/0x1960 net/ipv6/mcast.c:427 do_ipv6_setsockopt.isra.9+0x3afb/0x45d0 net/ipv6/ipv6_sockglue.c:743 ipv6_setsockopt+0xbd/0x170 net/ipv6/ipv6_sockglue.c:933 rawv6_setsockopt+0x59/0x140 net/ipv6/raw.c:1069 sock_common_setsockopt+0x9a/0xe0 net/core/sock.c:3038 __sys_setsockopt+0x1ba/0x3c0 net/socket.c:1902 __do_sys_setsockopt net/socket.c:1913 [inline] __se_sys_setsockopt net/socket.c:1910 [inline] __x64_sys_setsockopt+0xbe/0x150 net/socket.c:1910 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 22430: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 __cache_free mm/slab.c:3498 [inline] kfree+0xcf/0x230 mm/slab.c:3813 __sock_kfree_s net/core/sock.c:2004 [inline] sock_kfree_s+0x29/0x60 net/core/sock.c:2010 ip6_mc_leave_src+0x11a/0x1d0 net/ipv6/mcast.c:2448 __ipv6_sock_mc_close+0x20b/0x4e0 net/ipv6/mcast.c:310 ipv6_sock_mc_close+0x158/0x1d0 net/ipv6/mcast.c:328 inet6_release+0x40/0x70 net/ipv6/af_inet6.c:452 __sock_release+0xd7/0x250 net/socket.c:579 sock_close+0x19/0x20 net/socket.c:1141 __fput+0x385/0xa30 fs/file_table.c:278 ____fput+0x15/0x20 fs/file_table.c:309 task_work_run+0x1e8/0x2a0 kernel/task_work.c:113 tracehook_notify_resume include/linux/tracehook.h:193 [inline] exit_to_usermode_loop+0x318/0x380 arch/x86/entry/common.c:166 prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline] syscall_return_slowpath arch/x86/entry/common.c:268 [inline] do_syscall_64+0x6be/0x820 arch/x86/entry/common.c:293 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff8801ce7f2500 which belongs to the cache kmalloc-192 of size 192 The buggy address is located 16 bytes inside of 192-byte region [ffff8801ce7f2500, ffff8801ce7f25c0) The buggy address belongs to the page: page:ffffea000739fc80 count:1 mapcount:0 mapping:ffff8801da800040 index:0x0 flags: 0x2fffc0000000100(slab) raw: 02fffc0000000100 ffffea0006f6e548 ffffea000737b948 ffff8801da800040 raw: 0000000000000000 ffff8801ce7f2000 0000000100000010 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8801ce7f2400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8801ce7f2480: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc >ffff8801ce7f2500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8801ce7f2580: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ffff8801ce7f2600: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/mcast.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index d112762b4cb86..bd269e78272a8 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2412,17 +2412,17 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, { int err; - /* callers have the socket lock and rtnl lock - * so no other readers or writers of iml or its sflist - */ + write_lock_bh(&iml->sflock); if (!iml->sflist) { /* any-source empty exclude case */ - return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0); + err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0); + } else { + err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, + iml->sflist->sl_count, iml->sflist->sl_addr, 0); + sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max)); + iml->sflist = NULL; } - err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, - iml->sflist->sl_count, iml->sflist->sl_addr, 0); - sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max)); - iml->sflist = NULL; + write_unlock_bh(&iml->sflock); return err; } -- GitLab From 1e42e97af9832848bbeba5c1f7f35dbcffec5642 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 24 Oct 2018 14:37:21 +0200 Subject: [PATCH 1196/2269] ipv6/ndisc: Preserve IPv6 control buffer if protocol error handlers are called [ Upstream commit ee1abcf689353f36d9322231b4320926096bdee0 ] Commit a61bbcf28a8c ("[NET]: Store skb->timestamp as offset to a base timestamp") introduces a neighbour control buffer and zeroes it out in ndisc_rcv(), as ndisc_recv_ns() uses it. Commit f2776ff04722 ("[IPV6]: Fix address/interface handling in UDP and DCCP, according to the scoping architecture.") introduces the usage of the IPv6 control buffer in protocol error handlers (e.g. inet6_iif() in present-day __udp6_lib_err()). Now, with commit b94f1c0904da ("ipv6: Use icmpv6_notify() to propagate redirect, instead of rt6_redirect()."), we call protocol error handlers from ndisc_redirect_rcv(), after the control buffer is already stolen and some parts are already zeroed out. This implies that inet6_iif() on this path will always return zero. This gives unexpected results on UDP socket lookup in __udp6_lib_err(), as we might actually need to match sockets for a given interface. Instead of always claiming the control buffer in ndisc_rcv(), do that only when needed. Fixes: b94f1c0904da ("ipv6: Use icmpv6_notify() to propagate redirect, instead of rt6_redirect().") Signed-off-by: Stefano Brivio Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ndisc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 528218460bc59..5f80e57e93ed9 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1722,10 +1722,9 @@ int ndisc_rcv(struct sk_buff *skb) return 0; } - memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); - switch (msg->icmph.icmp6_type) { case NDISC_NEIGHBOUR_SOLICITATION: + memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); ndisc_recv_ns(skb); break; -- GitLab From 6bb1381cc8650c9b86e41ea639db4dfaed750ad4 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 11 Oct 2018 11:15:13 -0700 Subject: [PATCH 1197/2269] llc: set SOCK_RCU_FREE in llc_sap_add_socket() [ Upstream commit 5a8e7aea953bdb6d4da13aff6f1e7f9c62023499 ] WHen an llc sock is added into the sk_laddr_hash of an llc_sap, it is not marked with SOCK_RCU_FREE. This causes that the sock could be freed while it is still being read by __llc_lookup_established() with RCU read lock. sock is refcounted, but with RCU read lock, nothing prevents the readers getting a zero refcnt. Fix it by setting SOCK_RCU_FREE in llc_sap_add_socket(). Reported-by: syzbot+11e05f04c15e03be5254@syzkaller.appspotmail.com Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/llc/llc_conn.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index b084fd19ad325..56c3fb5cc8058 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -734,6 +734,7 @@ void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk) llc_sk(sk)->sap = sap; spin_lock_bh(&sap->sk_lock); + sock_set_flag(sk, SOCK_RCU_FREE); sap->sk_count++; sk_nulls_add_node_rcu(sk, laddr_hb); hlist_add_head(&llc->dev_hash_node, dev_hb); -- GitLab From ff96a397744084e6eb13ab4afe47add5c03435bf Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Mon, 15 Oct 2018 05:19:00 +0000 Subject: [PATCH 1198/2269] net: fec: don't dump RX FIFO register when not available [ Upstream commit ec20a63aa8b8ec3223fb25cdb2a49f9f9dfda88c ] Commit db65f35f50e0 ("net: fec: add support of ethtool get_regs") introduce ethool "--register-dump" interface to dump all FEC registers. But not all silicon implementations of the Freescale FEC hardware module have the FRBR (FIFO Receive Bound Register) and FRSR (FIFO Receive Start Register) register, so we should not be trying to dump them on those that don't. To fix it we create a quirk flag, FEC_QUIRK_HAS_RFREG, and check it before dump those RX FIFO registers. Signed-off-by: Fugang Duan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/fec.h | 4 ++++ drivers/net/ethernet/freescale/fec_main.c | 16 ++++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 44720f83af273..4d4f16ad88c3b 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -451,6 +451,10 @@ struct bufdesc_ex { * initialisation. */ #define FEC_QUIRK_MIB_CLEAR (1 << 15) +/* Only i.MX25/i.MX27/i.MX28 controller supports FRBR,FRSR registers, + * those FIFO receive registers are resolved in other platforms. + */ +#define FEC_QUIRK_HAS_FRREG (1 << 16) struct bufdesc_prop { int qid; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 8bfa6ef826a9f..ce55c8f7f33a4 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -89,14 +89,16 @@ static struct platform_device_id fec_devtype[] = { .driver_data = 0, }, { .name = "imx25-fec", - .driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_MIB_CLEAR, + .driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_MIB_CLEAR | + FEC_QUIRK_HAS_FRREG, }, { .name = "imx27-fec", - .driver_data = FEC_QUIRK_MIB_CLEAR, + .driver_data = FEC_QUIRK_MIB_CLEAR | FEC_QUIRK_HAS_FRREG, }, { .name = "imx28-fec", .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME | - FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC, + FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC | + FEC_QUIRK_HAS_FRREG, }, { .name = "imx6q-fec", .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT | @@ -2166,7 +2168,13 @@ static void fec_enet_get_regs(struct net_device *ndev, memset(buf, 0, regs->len); for (i = 0; i < ARRAY_SIZE(fec_enet_register_offset); i++) { - off = fec_enet_register_offset[i] / 4; + off = fec_enet_register_offset[i]; + + if ((off == FEC_R_BOUND || off == FEC_R_FSTART) && + !(fep->quirks & FEC_QUIRK_HAS_FRREG)) + continue; + + off >>= 2; buf[off] = readl(&theregs[off]); } } -- GitLab From e581e28f9dcbb461751df6eeb6d24335f94e9f11 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 19 Oct 2018 10:00:19 -0700 Subject: [PATCH 1199/2269] net/ipv6: Fix index counter for unicast addresses in in6_dump_addrs [ Upstream commit 4ba4c566ba8448a05e6257e0b98a21f1a0d55315 ] The loop wants to skip previously dumped addresses, so loops until current index >= saved index. If the message fills it wants to save the index for the next address to dump - ie., the one that did not fit in the current message. Currently, it is incrementing the index counter before comparing to the saved index, and then the saved index is off by 1 - it assumes the current address is going to fit in the message. Change the index handling to increment only after a succesful dump. Fixes: 502a2ffd7376a ("ipv6: convert idev_list to list macros") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 569f7c3f6b956..9ac6f62322946 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4793,8 +4793,8 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, /* unicast address incl. temp addr */ list_for_each_entry(ifa, &idev->addr_list, if_list) { - if (++ip_idx < s_ip_idx) - continue; + if (ip_idx < s_ip_idx) + goto next; err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, @@ -4803,6 +4803,8 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, if (err < 0) break; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +next: + ip_idx++; } break; } -- GitLab From 29d871195e9e63ff567fe3fb02b01be60e015877 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 26 Oct 2018 15:51:06 -0700 Subject: [PATCH 1200/2269] net: sched: gred: pass the right attribute to gred_change_table_def() [ Upstream commit 38b4f18d56372e1e21771ab7b0357b853330186c ] gred_change_table_def() takes a pointer to TCA_GRED_DPS attribute, and expects it will be able to interpret its contents as struct tc_gred_sopt. Pass the correct gred attribute, instead of TCA_OPTIONS. This bug meant the table definition could never be changed after Qdisc was initialized (unless whatever TCA_OPTIONS contained both passed netlink validation and was a valid struct tc_gred_sopt...). Old behaviour: $ ip link add type dummy $ tc qdisc replace dev dummy0 parent root handle 7: \ gred setup vqs 4 default 0 $ tc qdisc replace dev dummy0 parent root handle 7: \ gred setup vqs 4 default 0 RTNETLINK answers: Invalid argument Now: $ ip link add type dummy $ tc qdisc replace dev dummy0 parent root handle 7: \ gred setup vqs 4 default 0 $ tc qdisc replace dev dummy0 parent root handle 7: \ gred setup vqs 4 default 0 $ tc qdisc replace dev dummy0 parent root handle 7: \ gred setup vqs 4 default 0 Fixes: f62d6b936df5 ("[PKT_SCHED]: GRED: Use central VQ change procedure") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_gred.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index bc30f9186ac67..d3105ee8decfb 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -411,7 +411,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) { if (tb[TCA_GRED_LIMIT] != NULL) sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]); - return gred_change_table_def(sch, opt); + return gred_change_table_def(sch, tb[TCA_GRED_DPS]); } if (tb[TCA_GRED_PARMS] == NULL || -- GitLab From 7d58456872c4a65926541e8aa983056bd91f8ed6 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Thu, 18 Oct 2018 09:36:46 -0500 Subject: [PATCH 1201/2269] net: socket: fix a missing-check bug [ Upstream commit b6168562c8ce2bd5a30e213021650422e08764dc ] In ethtool_ioctl(), the ioctl command 'ethcmd' is checked through a switch statement to see whether it is necessary to pre-process the ethtool structure, because, as mentioned in the comment, the structure ethtool_rxnfc is defined with padding. If yes, a user-space buffer 'rxnfc' is allocated through compat_alloc_user_space(). One thing to note here is that, if 'ethcmd' is ETHTOOL_GRXCLSRLALL, the size of the buffer 'rxnfc' is partially determined by 'rule_cnt', which is actually acquired from the user-space buffer 'compat_rxnfc', i.e., 'compat_rxnfc->rule_cnt', through get_user(). After 'rxnfc' is allocated, the data in the original user-space buffer 'compat_rxnfc' is then copied to 'rxnfc' through copy_in_user(), including the 'rule_cnt' field. However, after this copy, no check is re-enforced on 'rxnfc->rule_cnt'. So it is possible that a malicious user race to change the value in the 'compat_rxnfc->rule_cnt' between these two copies. Through this way, the attacker can bypass the previous check on 'rule_cnt' and inject malicious data. This can cause undefined behavior of the kernel and introduce potential security risk. This patch avoids the above issue via copying the value acquired by get_user() to 'rxnfc->rule_cn', if 'ethcmd' is ETHTOOL_GRXCLSRLALL. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/socket.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/socket.c b/net/socket.c index d27922639a209..a401578f3f284 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2879,9 +2879,14 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) copy_in_user(&rxnfc->fs.ring_cookie, &compat_rxnfc->fs.ring_cookie, (void __user *)(&rxnfc->fs.location + 1) - - (void __user *)&rxnfc->fs.ring_cookie) || - copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt, - sizeof(rxnfc->rule_cnt))) + (void __user *)&rxnfc->fs.ring_cookie)) + return -EFAULT; + if (ethcmd == ETHTOOL_GRXCLSRLALL) { + if (put_user(rule_cnt, &rxnfc->rule_cnt)) + return -EFAULT; + } else if (copy_in_user(&rxnfc->rule_cnt, + &compat_rxnfc->rule_cnt, + sizeof(rxnfc->rule_cnt))) return -EFAULT; } -- GitLab From 441d0e7540824732f25f38cc270343d34427bebe Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 31 Oct 2018 16:08:10 +0100 Subject: [PATCH 1202/2269] net: stmmac: Fix stmmac_mdio_reset() when building stmmac as modules [ Upstream commit 30549aab146ccb1275230c3b4b4bc6b4181fd54e ] When building stmmac, it is only possible to select CONFIG_DWMAC_GENERIC, or any of the glue drivers, when CONFIG_STMMAC_PLATFORM is set. The only exception is CONFIG_STMMAC_PCI. When calling of_mdiobus_register(), it will call our ->reset() callback, which is set to stmmac_mdio_reset(). Most of the code in stmmac_mdio_reset() is protected by a "#if defined(CONFIG_STMMAC_PLATFORM)", which will evaluate to false when CONFIG_STMMAC_PLATFORM=m. Because of this, the phy reset gpio will only be pulled when stmmac is built as built-in, but not when built as modules. Fix this by using "#if IS_ENABLED()" instead of "#if defined()". Signed-off-by: Niklas Cassel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index f5f37bfa1d588..ff2eeddf588e5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -133,7 +133,7 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, */ int stmmac_mdio_reset(struct mii_bus *bus) { -#if defined(CONFIG_STMMAC_PLATFORM) +#if IS_ENABLED(CONFIG_STMMAC_PLATFORM) struct net_device *ndev = bus->priv; struct stmmac_priv *priv = netdev_priv(ndev); unsigned int mii_address = priv->hw->mii.addr; -- GitLab From 0ecebdfb2e3a8a22ec157c5d388a6ac3a557919b Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Tue, 23 Oct 2018 16:04:31 -0600 Subject: [PATCH 1203/2269] net: udp: fix handling of CHECKSUM_COMPLETE packets [ Upstream commit db4f1be3ca9b0ef7330763d07bf4ace83ad6f913 ] Current handling of CHECKSUM_COMPLETE packets by the UDP stack is incorrect for any packet that has an incorrect checksum value. udp4/6_csum_init() will both make a call to __skb_checksum_validate_complete() to initialize/validate the csum field when receiving a CHECKSUM_COMPLETE packet. When this packet fails validation, skb->csum will be overwritten with the pseudoheader checksum so the packet can be fully validated by software, but the skb->ip_summed value will be left as CHECKSUM_COMPLETE so that way the stack can later warn the user about their hardware spewing bad checksums. Unfortunately, leaving the SKB in this state can cause problems later on in the checksum calculation. Since the the packet is still marked as CHECKSUM_COMPLETE, udp_csum_pull_header() will SUBTRACT the checksum of the UDP header from skb->csum instead of adding it, leaving us with a garbage value in that field. Once we try to copy the packet to userspace in the udp4/6_recvmsg(), we'll make a call to skb_copy_and_csum_datagram_msg() to checksum the packet data and add it in the garbage skb->csum value to perform our final validation check. Since the value we're validating is not the proper checksum, it's possible that the folded value could come out to 0, causing us not to drop the packet. Instead, we believe that the packet was checksummed incorrectly by hardware since skb->ip_summed is still CHECKSUM_COMPLETE, and we attempt to warn the user with netdev_rx_csum_fault(skb->dev); Unfortunately, since this is the UDP path, skb->dev has been overwritten by skb->dev_scratch and is no longer a valid pointer, so we end up reading invalid memory. This patch addresses this problem in two ways: 1) Do not use the dev pointer when calling netdev_rx_csum_fault() from skb_copy_and_csum_datagram_msg(). Since this gets called from the UDP path where skb->dev has been overwritten, we have no way of knowing if the pointer is still valid. Also for the sake of consistency with the other uses of netdev_rx_csum_fault(), don't attempt to call it if the packet was checksummed by software. 2) Add better CHECKSUM_COMPLETE handling to udp4/6_csum_init(). If we receive a packet that's CHECKSUM_COMPLETE that fails verification (i.e. skb->csum_valid == 0), check who performed the calculation. It's possible that the checksum was done in software by the network stack earlier (such as Netfilter's CONNTRACK module), and if that says the checksum is bad, we can drop the packet immediately instead of waiting until we try and copy it to userspace. Otherwise, we need to mark the SKB as CHECKSUM_NONE, since the skb->csum field no longer contains the full packet checksum after the call to __skb_checksum_validate_complete(). Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") Fixes: c84d949057ca ("udp: copy skb->truesize in the first cache line") Cc: Sam Kumar Cc: Eric Dumazet Signed-off-by: Sean Tranchetti Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/datagram.c | 5 +++-- net/ipv4/udp.c | 20 ++++++++++++++++++-- net/ipv6/ip6_checksum.c | 20 ++++++++++++++++++-- 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/net/core/datagram.c b/net/core/datagram.c index 3964c108b1698..d8a0774f76081 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -810,8 +810,9 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, return -EINVAL; } - if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) - netdev_rx_csum_fault(skb->dev); + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && + !skb->csum_complete_sw) + netdev_rx_csum_fault(NULL); } return 0; fault: diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index dc0ec227b9d21..b89920c0f2263 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2045,8 +2045,24 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, /* Note, we are only interested in != 0 or == 0, thus the * force to int. */ - return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, - inet_compute_pseudo); + err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, + inet_compute_pseudo); + if (err) + return err; + + if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) { + /* If SW calculated the value, we know it's bad */ + if (skb->csum_complete_sw) + return 1; + + /* HW says the value is bad. Let's validate that. + * skb->csum is no longer the full packet checksum, + * so don't treat it as such. + */ + skb_checksum_complete_unset(skb); + } + + return 0; } /* wrapper for udp_queue_rcv_skb tacking care of csum conversion and diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index 547515e8450a1..377717045f8f5 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -88,8 +88,24 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) * Note, we are only interested in != 0 or == 0, thus the * force to int. */ - return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, - ip6_compute_pseudo); + err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, + ip6_compute_pseudo); + if (err) + return err; + + if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) { + /* If SW calculated the value, we know it's bad */ + if (skb->csum_complete_sw) + return 1; + + /* HW says the value is bad. Let's validate that. + * skb->csum is no longer the full packet checksum, + * so don't treat is as such. + */ + skb_checksum_complete_unset(skb); + } + + return 0; } EXPORT_SYMBOL(udp6_csum_init); -- GitLab From aa8d067c8547ad146ef18baecabc032618f7efb6 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 18 Oct 2018 19:56:01 +0200 Subject: [PATCH 1204/2269] r8169: fix NAPI handling under high load [ Upstream commit 6b839b6cf9eada30b086effb51e5d6076bafc761 ] rtl_rx() and rtl_tx() are called only if the respective bits are set in the interrupt status register. Under high load NAPI may not be able to process all data (work_done == budget) and it will schedule subsequent calls to the poll callback. rtl_ack_events() however resets the bits in the interrupt status register, therefore subsequent calls to rtl8169_poll() won't call rtl_rx() and rtl_tx() - chip interrupts are still disabled. Fix this by calling rtl_rx() and rtl_tx() independent of the bits set in the interrupt status register. Both functions will detect if there's nothing to do for them. Fixes: da78dbff2e05 ("r8169: remove work from irq handler.") Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index f7e540eeb877e..1b61ce3101322 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -7579,17 +7579,15 @@ static int rtl8169_poll(struct napi_struct *napi, int budget) struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi); struct net_device *dev = tp->dev; u16 enable_mask = RTL_EVENT_NAPI | tp->event_slow; - int work_done= 0; + int work_done; u16 status; status = rtl_get_events(tp); rtl_ack_events(tp, status & ~tp->event_slow); - if (status & RTL_EVENT_NAPI_RX) - work_done = rtl_rx(dev, tp, (u32) budget); + work_done = rtl_rx(dev, tp, (u32) budget); - if (status & RTL_EVENT_NAPI_TX) - rtl_tx(dev, tp); + rtl_tx(dev, tp); if (status & tp->event_slow) { enable_mask &= ~tp->event_slow; -- GitLab From 606694e5ec81d598c0300f9e16db0464659df557 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 16 Oct 2018 15:18:17 -0300 Subject: [PATCH 1205/2269] sctp: fix race on sctp_id2asoc [ Upstream commit b336decab22158937975293aea79396525f92bb3 ] syzbot reported an use-after-free involving sctp_id2asoc. Dmitry Vyukov helped to root cause it and it is because of reading the asoc after it was freed: CPU 1 CPU 2 (working on socket 1) (working on socket 2) sctp_association_destroy sctp_id2asoc spin lock grab the asoc from idr spin unlock spin lock remove asoc from idr spin unlock free(asoc) if asoc->base.sk != sk ... [*] This can only be hit if trying to fetch asocs from different sockets. As we have a single IDR for all asocs, in all SCTP sockets, their id is unique on the system. An application can try to send stuff on an id that matches on another socket, and the if in [*] will protect from such usage. But it didn't consider that as that asoc may belong to another socket, it may be freed in parallel (read: under another socket lock). We fix it by moving the checks in [*] into the protected region. This fixes it because the asoc cannot be freed while the lock is held. Reported-by: syzbot+c7dd55d7aec49d48e49a@syzkaller.appspotmail.com Acked-by: Dmitry Vyukov Signed-off-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 7900943111431..d87d56978b4c9 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -250,11 +250,10 @@ struct sctp_association *sctp_id2assoc(struct sock *sk, sctp_assoc_t id) spin_lock_bh(&sctp_assocs_id_lock); asoc = (struct sctp_association *)idr_find(&sctp_assocs_id, (int)id); + if (asoc && (asoc->base.sk != sk || asoc->base.dead)) + asoc = NULL; spin_unlock_bh(&sctp_assocs_id_lock); - if (!asoc || (asoc->base.sk != sk) || asoc->base.dead) - return NULL; - return asoc; } -- GitLab From 189771d69e14404b0e6eb57aa40bed2a95f7e2c2 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 17 Oct 2018 11:44:04 +0200 Subject: [PATCH 1206/2269] udp6: fix encap return code for resubmitting [ Upstream commit 84dad55951b0d009372ec21760b650634246e144 ] The commit eb63f2964dbe ("udp6: add missing checks on edumux packet processing") used the same return code convention of the ipv4 counterpart, but ipv6 uses the opposite one: positive values means resubmit. This change addresses the issue, using positive return value for resubmitting. Also update the related comment, which was broken, too. Fixes: eb63f2964dbe ("udp6: add missing checks on edumux packet processing") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/udp.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 5cee941ab0a9c..8d185a0fc5af3 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -794,11 +794,9 @@ static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb, ret = udpv6_queue_rcv_skb(sk, skb); - /* a return value > 0 means to resubmit the input, but - * it wants the return to be -protocol, or 0 - */ + /* a return value > 0 means to resubmit the input */ if (ret > 0) - return -ret; + return ret; return 0; } -- GitLab From b522f279f91b8ba916d941e0e73a01ecee5da77a Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 30 Oct 2018 14:10:49 +0800 Subject: [PATCH 1207/2269] vhost: Fix Spectre V1 vulnerability [ Upstream commit ff002269a4ee9c769dbf9365acef633ebcbd6cbe ] The idx in vhost_vring_ioctl() was controlled by userspace, hence a potential exploitation of the Spectre variant 1 vulnerability. Fixing this by sanitizing idx before using it to index d->vqs. Cc: Michael S. Tsirkin Cc: Josh Poimboeuf Cc: Andrea Arcangeli Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vhost.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 7ee3167bc083e..ffdd4e937d1d8 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "vhost.h" @@ -1366,6 +1367,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) if (idx >= d->nvqs) return -ENOBUFS; + idx = array_index_nospec(idx, d->nvqs); vq = d->vqs[idx]; mutex_lock(&vq->mutex); -- GitLab From 574af67123c3da1e094813d122baa6cd37ccc4af Mon Sep 17 00:00:00 2001 From: Ake Koomsin Date: Wed, 17 Oct 2018 19:44:12 +0900 Subject: [PATCH 1208/2269] virtio_net: avoid using netif_tx_disable() for serializing tx routine [ Upstream commit 05c998b738fdd3e5d6a257bcacc8f34b6284d795 ] Commit 713a98d90c5e ("virtio-net: serialize tx routine during reset") introduces netif_tx_disable() after netif_device_detach() in order to avoid use-after-free of tx queues. However, there are two issues. 1) Its operation is redundant with netif_device_detach() in case the interface is running. 2) In case of the interface is not running before suspending and resuming, the tx does not get resumed by netif_device_attach(). This results in losing network connectivity. It is better to use netif_tx_lock_bh()/netif_tx_unlock_bh() instead for serializing tx routine during reset. This also preserves the symmetry of netif_device_detach() and netif_device_attach(). Fixes commit 713a98d90c5e ("virtio-net: serialize tx routine during reset") Signed-off-by: Ake Koomsin Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/virtio_net.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 910c46b477699..f528e9ac3413b 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1872,8 +1872,9 @@ static void virtnet_freeze_down(struct virtio_device *vdev) /* Make sure no work handler is accessing the device */ flush_work(&vi->config_work); + netif_tx_lock_bh(vi->dev); netif_device_detach(vi->dev); - netif_tx_disable(vi->dev); + netif_tx_unlock_bh(vi->dev); cancel_delayed_work_sync(&vi->refill); if (netif_running(vi->dev)) { @@ -1909,7 +1910,9 @@ static int virtnet_restore_up(struct virtio_device *vdev) } } + netif_tx_lock_bh(vi->dev); netif_device_attach(vi->dev); + netif_tx_unlock_bh(vi->dev); return err; } -- GitLab From 95c337646585c27b5e2d94a1daff53ed5e294cb7 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Mon, 8 Oct 2018 10:49:35 -0500 Subject: [PATCH 1209/2269] ethtool: fix a privilege escalation bug [ Upstream commit 58f5bbe331c566f49c9559568f982202a278aa78 ] In dev_ethtool(), the eth command 'ethcmd' is firstly copied from the use-space buffer 'useraddr' and checked to see whether it is ETHTOOL_PERQUEUE. If yes, the sub-command 'sub_cmd' is further copied from the user space. Otherwise, 'sub_cmd' is the same as 'ethcmd'. Next, according to 'sub_cmd', a permission check is enforced through the function ns_capable(). For example, the permission check is required if 'sub_cmd' is ETHTOOL_SCOALESCE, but it is not necessary if 'sub_cmd' is ETHTOOL_GCOALESCE, as suggested in the comment "Allow some commands to be done by anyone". The following execution invokes different handlers according to 'ethcmd'. Specifically, if 'ethcmd' is ETHTOOL_PERQUEUE, ethtool_set_per_queue() is called. In ethtool_set_per_queue(), the kernel object 'per_queue_opt' is copied again from the user-space buffer 'useraddr' and 'per_queue_opt.sub_command' is used to determine which operation should be performed. Given that the buffer 'useraddr' is in the user space, a malicious user can race to change the sub-command between the two copies. In particular, the attacker can supply ETHTOOL_PERQUEUE and ETHTOOL_GCOALESCE to bypass the permission check in dev_ethtool(). Then before ethtool_set_per_queue() is called, the attacker changes ETHTOOL_GCOALESCE to ETHTOOL_SCOALESCE. In this way, the attacker can bypass the permission check and execute ETHTOOL_SCOALESCE. This patch enforces a check in ethtool_set_per_queue() after the second copy from 'useraddr'. If the sub-command is different from the one obtained in the first copy in dev_ethtool(), an error code EINVAL will be returned. Fixes: f38d138a7da6 ("net/ethtool: support set coalesce per queue") Signed-off-by: Wenwen Wang Reviewed-by: Michal Kubecek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/ethtool.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 0ae5ac5e090fd..3469f5053c790 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -2410,13 +2410,17 @@ roll_back: return ret; } -static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr) +static int ethtool_set_per_queue(struct net_device *dev, + void __user *useraddr, u32 sub_cmd) { struct ethtool_per_queue_op per_queue_opt; if (copy_from_user(&per_queue_opt, useraddr, sizeof(per_queue_opt))) return -EFAULT; + if (per_queue_opt.sub_command != sub_cmd) + return -EINVAL; + switch (per_queue_opt.sub_command) { case ETHTOOL_GCOALESCE: return ethtool_get_per_queue_coalesce(dev, useraddr, &per_queue_opt); @@ -2787,7 +2791,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) rc = ethtool_get_phy_stats(dev, useraddr); break; case ETHTOOL_PERQUEUE: - rc = ethtool_set_per_queue(dev, useraddr); + rc = ethtool_set_per_queue(dev, useraddr, sub_cmd); break; case ETHTOOL_GLINKSETTINGS: rc = ethtool_get_link_ksettings(dev, useraddr); -- GitLab From 8a865095d399f3318f30176e5d2f677d0354f8d8 Mon Sep 17 00:00:00 2001 From: Tobias Jungel Date: Sun, 28 Oct 2018 12:54:10 +0100 Subject: [PATCH 1210/2269] bonding: fix length of actor system [ Upstream commit 414dd6fb9a1a1b59983aea7bf0f79f0085ecc5b8 ] The attribute IFLA_BOND_AD_ACTOR_SYSTEM is sent to user space having the length of sizeof(bond->params.ad_actor_system) which is 8 byte. This patch aligns the length to ETH_ALEN to have the same MAC address exposed as using sysfs. Fixes: f87fda00b6ed2 ("bonding: prevent out of bound accesses") Signed-off-by: Tobias Jungel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_netlink.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index a1b33aa6054a8..77babf1417a7e 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -638,8 +638,7 @@ static int bond_fill_info(struct sk_buff *skb, goto nla_put_failure; if (nla_put(skb, IFLA_BOND_AD_ACTOR_SYSTEM, - sizeof(bond->params.ad_actor_system), - &bond->params.ad_actor_system)) + ETH_ALEN, &bond->params.ad_actor_system)) goto nla_put_failure; } if (!bond_3ad_get_active_agg_info(bond, &info)) { -- GitLab From f288424ea85ef806c0f35adc9fd904ac636ffe81 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 18 Oct 2018 21:25:07 +0200 Subject: [PATCH 1211/2269] ip6_tunnel: Fix encapsulation layout [ Upstream commit d4d576f5ab7edcb757bb33e6a5600666a0b1232d ] Commit 058214a4d1df ("ip6_tun: Add infrastructure for doing encapsulation") added the ip6_tnl_encap() call in ip6_tnl_xmit(), before the call to ipv6_push_frag_opts() to append the IPv6 Tunnel Encapsulation Limit option (option 4, RFC 2473, par. 5.1) to the outer IPv6 header. As long as the option didn't actually end up in generated packets, this wasn't an issue. Then commit 89a23c8b528b ("ip6_tunnel: Fix missing tunnel encapsulation limit option") fixed sending of this option, and the resulting layout, e.g. for FoU, is: .-------------------.------------.----------.-------------------.----- - - | Outer IPv6 Header | UDP header | Option 4 | Inner IPv6 Header | Payload '-------------------'------------'----------'-------------------'----- - - Needless to say, FoU and GUE (at least) won't work over IPv6. The option is appended by default, and I couldn't find a way to disable it with the current iproute2. Turn this into a more reasonable: .-------------------.----------.------------.-------------------.----- - - | Outer IPv6 Header | Option 4 | UDP header | Inner IPv6 Header | Payload '-------------------'----------'------------'-------------------'----- - - With this, and with 84dad55951b0 ("udp6: fix encap return code for resubmitting"), FoU and GUE work again over IPv6. Fixes: 058214a4d1df ("ip6_tun: Add infrastructure for doing encapsulation") Signed-off-by: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_tunnel.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 0e9296f44ee47..948f304db0a31 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1185,10 +1185,6 @@ route_lookup: } skb_dst_set(skb, dst); - if (encap_limit >= 0) { - init_tel_txopt(&opt, encap_limit); - ipv6_push_frag_opts(skb, &opt.ops, &proto); - } hop_limit = hop_limit ? : ip6_dst_hoplimit(dst); /* Calculate max headroom for all the headers and adjust @@ -1203,6 +1199,11 @@ route_lookup: if (err) return err; + if (encap_limit >= 0) { + init_tel_txopt(&opt, encap_limit); + ipv6_push_frag_opts(skb, &opt.ops, &proto); + } + skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); -- GitLab From 1d50b74a33d0b201a4bef20461b585317cbf3eb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Wed, 31 Oct 2018 18:52:03 +0100 Subject: [PATCH 1212/2269] openvswitch: Fix push/pop ethernet validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 46ebe2834ba5b541f28ee72e556a3fed42c47570 ] When there are both pop and push ethernet header actions among the actions to be applied to a packet, an unexpected EINVAL (Invalid argument) error is obtained. This is due to mac_proto not being reset correctly when those actions are validated. Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2018-October/047554.html Fixes: 91820da6ae85 ("openvswitch: add Ethernet push and pop actions") Signed-off-by: Jaime Caamaño Ruiz Tested-by: Greg Rose Reviewed-by: Greg Rose Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/flow_netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 4c9c9458374a9..f70e9cbf33d5c 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2622,7 +2622,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, * is already present */ if (mac_proto != MAC_PROTO_NONE) return -EINVAL; - mac_proto = MAC_PROTO_NONE; + mac_proto = MAC_PROTO_ETHERNET; break; case OVS_ACTION_ATTR_POP_ETH: @@ -2630,7 +2630,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, return -EINVAL; if (vlan_tci & htons(VLAN_TAG_PRESENT)) return -EINVAL; - mac_proto = MAC_PROTO_ETHERNET; + mac_proto = MAC_PROTO_NONE; break; default: -- GitLab From bc478700368780c5460391282a098d5ba57464db Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Tue, 11 Sep 2018 14:58:22 -0500 Subject: [PATCH 1213/2269] net/mlx5: Take only bit 24-26 of wqe.pftype_wq for page fault type [ Upstream commit a48bc513159d4767f9988f0d857b2b0c38a4d614 ] The HW spec defines only bits 24-26 of pftype_wq as the page fault type, use the required mask to ensure that. Fixes: d9aaed838765 ("{net,IB}/mlx5: Refactor page fault handling") Signed-off-by: Huy Nguyen Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index eb91de86202b2..5da0b6e11530c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -262,7 +262,7 @@ static void eq_pf_process(struct mlx5_eq *eq) case MLX5_PFAULT_SUBTYPE_WQE: /* WQE based event */ pfault->type = - be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24; + (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7; pfault->token = be32_to_cpu(pf_eqe->wqe.token); pfault->wqe.wq_num = -- GitLab From fd090ba395fb60c71abb0c35bd026c4dcf5a4a1d Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 18 Oct 2018 10:34:26 +0200 Subject: [PATCH 1214/2269] net: sched: Fix for duplicate class dump [ Upstream commit 3c53ed8fef6881a864f0ee8240ed2793ef73ad0d ] When dumping classes by parent, kernel would return classes twice: | # tc qdisc add dev lo root prio | # tc class show dev lo | class prio 8001:1 parent 8001: | class prio 8001:2 parent 8001: | class prio 8001:3 parent 8001: | # tc class show dev lo parent 8001: | class prio 8001:1 parent 8001: | class prio 8001:2 parent 8001: | class prio 8001:3 parent 8001: | class prio 8001:1 parent 8001: | class prio 8001:2 parent 8001: | class prio 8001:3 parent 8001: This comes from qdisc_match_from_root() potentially returning the root qdisc itself if its handle matched. Though in that case, root's classes were already dumped a few lines above. Fixes: cb395b2010879 ("net: sched: optimize class dumps") Signed-off-by: Phil Sutter Reviewed-by: Jiri Pirko Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_api.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index cd69aa0675430..691ca96f7460b 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1917,7 +1917,8 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, if (tcm->tcm_parent) { q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent)); - if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) + if (q && q != root && + tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) return -1; return 0; } -- GitLab From 623670a9f20755b389f958b85bcb5b1eb17cdaed Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 1 Nov 2018 12:02:37 -0700 Subject: [PATCH 1215/2269] net: drop skb on failure in ip_check_defrag() [ Upstream commit 7de414a9dd91426318df7b63da024b2b07e53df5 ] Most callers of pskb_trim_rcsum() simply drop the skb when it fails, however, ip_check_defrag() still continues to pass the skb up to stack. This is suspicious. In ip_check_defrag(), after we learn the skb is an IP fragment, passing the skb to callers makes no sense, because callers expect fragments are defrag'ed on success. So, dropping the skb when we can't defrag it is reasonable. Note, prior to commit 88078d98d1bb, this is not a big problem as checksum will be fixed up anyway. After it, the checksum is not correct on failure. Found this during code review. Fixes: 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends") Cc: Eric Dumazet Signed-off-by: Cong Wang Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_fragment.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index e7227128df2c8..cb8fa5d7afe17 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -720,10 +720,14 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user) if (ip_is_fragment(&iph)) { skb = skb_share_check(skb, GFP_ATOMIC); if (skb) { - if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) - return skb; - if (pskb_trim_rcsum(skb, netoff + len)) - return skb; + if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) { + kfree_skb(skb); + return NULL; + } + if (pskb_trim_rcsum(skb, netoff + len)) { + kfree_skb(skb); + return NULL; + } memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); if (ip_defrag(net, skb, user)) return NULL; -- GitLab From 855cb69f4e6d3068ad18738f0cd6ff3cd099ccfc Mon Sep 17 00:00:00 2001 From: Dimitris Michailidis Date: Fri, 19 Oct 2018 17:07:13 -0700 Subject: [PATCH 1216/2269] net: fix pskb_trim_rcsum_slow() with odd trim offset [ Upstream commit d55bef5059dd057bd077155375c581b49d25be7e ] We've been getting checksum errors involving small UDP packets, usually 59B packets with 1 extra non-zero padding byte. netdev_rx_csum_fault() has been complaining that HW is providing bad checksums. Turns out the problem is in pskb_trim_rcsum_slow(), introduced in commit 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends"). The source of the problem is that when the bytes we are trimming start at an odd address, as in the case of the 1 padding byte above, skb_checksum() returns a byte-swapped value. We cannot just combine this with skb->csum using csum_sub(). We need to use csum_block_sub() here that takes into account the parity of the start address and handles the swapping. Matches existing code in __skb_postpull_rcsum() and esp_remove_trailer(). Fixes: 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends") Signed-off-by: Dimitris Michailidis Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9f80b947f53b1..c19a118f9f827 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1843,8 +1843,9 @@ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len) if (skb->ip_summed == CHECKSUM_COMPLETE) { int delta = skb->len - len; - skb->csum = csum_sub(skb->csum, - skb_checksum(skb, len, delta, 0)); + skb->csum = csum_block_sub(skb->csum, + skb_checksum(skb, len, delta, 0), + len); } return __pskb_trim(skb, len); } -- GitLab From ac65fd7094ac5ec86d167801f2101d02bbe1da6e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Oct 2018 00:57:25 -0700 Subject: [PATCH 1217/2269] net/mlx5e: fix csum adjustments caused by RXFCS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d48051c5b8376038c2b287c3b1bd55b8d391d567 ] As shown by Dmitris, we need to use csum_block_add() instead of csum_add() when adding the FCS contribution to skb csum. Before 4.18 (more exactly commit 88078d98d1bb "net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends"), the whole skb csum was thrown away, so RXFCS changes were ignored. Then before commit d55bef5059dd ("net: fix pskb_trim_rcsum_slow() with odd trim offset") both mlx5 and pskb_trim_rcsum_slow() bugs were canceling each other. Now we fixed pskb_trim_rcsum_slow() we need to fix mlx5. Note that this patch also rewrites mlx5e_get_fcs() to : - Use skb_header_pointer() instead of reinventing it. - Use __get_unaligned_cpu32() to avoid possible non aligned accesses as Dmitris pointed out. Fixes: 902a545904c7 ("net/mlx5e: When RXFCS is set, add FCS data into checksum calculation") Reported-by: Paweł Staszewski Signed-off-by: Eric Dumazet Cc: Eran Ben Elisha Cc: Saeed Mahameed Cc: Dimitris Michailidis Cc: Cong Wang Cc: Paweł Staszewski Reviewed-by: Eran Ben Elisha Tested-By: Maria Pasechnik Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 45 ++++--------------- 1 file changed, 9 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 8285e6d24f301..3d3fd03fa450c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -635,43 +635,15 @@ static inline bool is_first_ethertype_ip(struct sk_buff *skb) return (ethertype == htons(ETH_P_IP) || ethertype == htons(ETH_P_IPV6)); } -static __be32 mlx5e_get_fcs(struct sk_buff *skb) +static u32 mlx5e_get_fcs(const struct sk_buff *skb) { - int last_frag_sz, bytes_in_prev, nr_frags; - u8 *fcs_p1, *fcs_p2; - skb_frag_t *last_frag; - __be32 fcs_bytes; + const void *fcs_bytes; + u32 _fcs_bytes; - if (!skb_is_nonlinear(skb)) - return *(__be32 *)(skb->data + skb->len - ETH_FCS_LEN); + fcs_bytes = skb_header_pointer(skb, skb->len - ETH_FCS_LEN, + ETH_FCS_LEN, &_fcs_bytes); - nr_frags = skb_shinfo(skb)->nr_frags; - last_frag = &skb_shinfo(skb)->frags[nr_frags - 1]; - last_frag_sz = skb_frag_size(last_frag); - - /* If all FCS data is in last frag */ - if (last_frag_sz >= ETH_FCS_LEN) - return *(__be32 *)(skb_frag_address(last_frag) + - last_frag_sz - ETH_FCS_LEN); - - fcs_p2 = (u8 *)skb_frag_address(last_frag); - bytes_in_prev = ETH_FCS_LEN - last_frag_sz; - - /* Find where the other part of the FCS is - Linear or another frag */ - if (nr_frags == 1) { - fcs_p1 = skb_tail_pointer(skb); - } else { - skb_frag_t *prev_frag = &skb_shinfo(skb)->frags[nr_frags - 2]; - - fcs_p1 = skb_frag_address(prev_frag) + - skb_frag_size(prev_frag); - } - fcs_p1 -= bytes_in_prev; - - memcpy(&fcs_bytes, fcs_p1, bytes_in_prev); - memcpy(((u8 *)&fcs_bytes) + bytes_in_prev, fcs_p2, last_frag_sz); - - return fcs_bytes; + return __get_unaligned_cpu32(fcs_bytes); } static inline void mlx5e_handle_csum(struct net_device *netdev, @@ -693,8 +665,9 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); if (unlikely(netdev->features & NETIF_F_RXFCS)) - skb->csum = csum_add(skb->csum, - (__force __wsum)mlx5e_get_fcs(skb)); + skb->csum = csum_block_add(skb->csum, + (__force __wsum)mlx5e_get_fcs(skb), + skb->len - ETH_FCS_LEN); rq->stats.csum_complete++; return; } -- GitLab From 4ca72d6336df434171218f047dbcaf86d65253fe Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 29 Oct 2018 20:36:43 +0000 Subject: [PATCH 1218/2269] rtnetlink: Disallow FDB configuration for non-Ethernet device [ Upstream commit da71577545a52be3e0e9225a946e5fd79cfab015 ] When an FDB entry is configured, the address is validated to have the length of an Ethernet address, but the device for which the address is configured can be of any type. The above can result in the use of uninitialized memory when the address is later compared against existing addresses since 'dev->addr_len' is used and it may be greater than ETH_ALEN, as with ip6tnl devices. Fix this by making sure that FDB entries are only configured for Ethernet devices. BUG: KMSAN: uninit-value in memcmp+0x11d/0x180 lib/string.c:863 CPU: 1 PID: 4318 Comm: syz-executor998 Not tainted 4.19.0-rc3+ #49 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x14b/0x190 lib/dump_stack.c:113 kmsan_report+0x183/0x2b0 mm/kmsan/kmsan.c:956 __msan_warning+0x70/0xc0 mm/kmsan/kmsan_instr.c:645 memcmp+0x11d/0x180 lib/string.c:863 dev_uc_add_excl+0x165/0x7b0 net/core/dev_addr_lists.c:464 ndo_dflt_fdb_add net/core/rtnetlink.c:3463 [inline] rtnl_fdb_add+0x1081/0x1270 net/core/rtnetlink.c:3558 rtnetlink_rcv_msg+0xa0b/0x1530 net/core/rtnetlink.c:4715 netlink_rcv_skb+0x36e/0x5f0 net/netlink/af_netlink.c:2454 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4733 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] netlink_unicast+0x1638/0x1720 net/netlink/af_netlink.c:1343 netlink_sendmsg+0x1205/0x1290 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xe70/0x1290 net/socket.c:2114 __sys_sendmsg net/socket.c:2152 [inline] __do_sys_sendmsg net/socket.c:2161 [inline] __se_sys_sendmsg+0x2a3/0x3d0 net/socket.c:2159 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2159 do_syscall_64+0xb8/0x100 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 RIP: 0033:0x440ee9 Code: e8 cc ab 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 bb 0a fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fff6a93b518 EFLAGS: 00000213 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000440ee9 RDX: 0000000000000000 RSI: 0000000020000240 RDI: 0000000000000003 RBP: 0000000000000000 R08: 00000000004002c8 R09: 00000000004002c8 R10: 00000000004002c8 R11: 0000000000000213 R12: 000000000000b4b0 R13: 0000000000401ec0 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:256 [inline] kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:181 kmsan_kmalloc+0x98/0x100 mm/kmsan/kmsan_hooks.c:91 kmsan_slab_alloc+0x10/0x20 mm/kmsan/kmsan_hooks.c:100 slab_post_alloc_hook mm/slab.h:446 [inline] slab_alloc_node mm/slub.c:2718 [inline] __kmalloc_node_track_caller+0x9e7/0x1160 mm/slub.c:4351 __kmalloc_reserve net/core/skbuff.c:138 [inline] __alloc_skb+0x2f5/0x9e0 net/core/skbuff.c:206 alloc_skb include/linux/skbuff.h:996 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1189 [inline] netlink_sendmsg+0xb49/0x1290 net/netlink/af_netlink.c:1883 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xe70/0x1290 net/socket.c:2114 __sys_sendmsg net/socket.c:2152 [inline] __do_sys_sendmsg net/socket.c:2161 [inline] __se_sys_sendmsg+0x2a3/0x3d0 net/socket.c:2159 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2159 do_syscall_64+0xb8/0x100 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 v2: * Make error message more specific (David) Fixes: 090096bf3db1 ("net: generic fdb support for drivers without ndo_fdb_") Signed-off-by: Ido Schimmel Reported-and-tested-by: syzbot+3a288d5f5530b901310e@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+d53ab4e92a1db04110ff@syzkaller.appspotmail.com Cc: Vlad Yasevich Cc: David Ahern Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 760364526dc1a..c392a77ff7881 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3080,6 +3080,11 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } + if (dev->type != ARPHRD_ETHER) { + NL_SET_ERR_MSG(extack, "FDB add only supported for Ethernet devices"); + return -EINVAL; + } + addr = nla_data(tb[NDA_LLADDR]); err = fdb_vid_parse(tb[NDA_VLAN], &vid); @@ -3184,6 +3189,11 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } + if (dev->type != ARPHRD_ETHER) { + NL_SET_ERR_MSG(extack, "FDB delete only supported for Ethernet devices"); + return -EINVAL; + } + addr = nla_data(tb[NDA_LLADDR]); err = fdb_vid_parse(tb[NDA_VLAN], &vid); -- GitLab From 71944eb54288509c7c19ac54b485ac774c8253fe Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 17 Oct 2018 22:34:34 +0300 Subject: [PATCH 1219/2269] net: ipmr: fix unresolved entry dumps [ Upstream commit eddf016b910486d2123675a6b5fd7d64f77cdca8 ] If the skb space ends in an unresolved entry while dumping we'll miss some unresolved entries. The reason is due to zeroing the entry counter between dumping resolved and unresolved mfc entries. We should just keep counting until the whole table is dumped and zero when we move to the next as we have a separate table counter. Reported-by: Colin Ian King Fixes: 8fb472c09b9d ("ipmr: improve hash scalability") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ipmr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index cbd9c0d8a7880..9f314a5e9f27e 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2499,8 +2499,6 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) next_entry: e++; } - e = 0; - s_e = 0; spin_lock_bh(&mfc_unres_lock); list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { -- GitLab From c8c2df18eedfcb504ed2f5d8ab24e87da7fd3959 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 11 Oct 2018 15:06:33 -0700 Subject: [PATCH 1220/2269] net: bcmgenet: Poll internal PHY for GENETv5 [ Upstream commit 64bd9c8135751b561f27edaaffe93d07093f81af ] On GENETv5, there is a hardware issue which prevents the GENET hardware from generating a link UP interrupt when the link is operating at 10Mbits/sec. Since we do not have any way to configure the link detection logic, fallback to polling in that case. Fixes: 421380856d9c ("net: bcmgenet: add support for the GENETv5 hardware") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/genet/bcmmii.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 6ad0ca7ed3e91..abbd2894f870a 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -339,9 +339,12 @@ int bcmgenet_mii_probe(struct net_device *dev) phydev->advertising = phydev->supported; /* The internal PHY has its link interrupts routed to the - * Ethernet MAC ISRs - */ - if (priv->internal_phy) + * Ethernet MAC ISRs. On GENETv5 there is a hardware issue + * that prevents the signaling of link UP interrupts when + * the link operates at 10Mbps, so fallback to polling for + * those versions of GENET. + */ + if (priv->internal_phy && !GENET_IS_V5(priv)) priv->phydev->irq = PHY_IGNORE_INTERRUPT; return 0; -- GitLab From b0ee9bd40d74001cd98de6bac252dd728130dbaa Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 10 Oct 2018 22:00:58 +0200 Subject: [PATCH 1221/2269] net/sched: cls_api: add missing validation of netlink attributes [ Upstream commit e331473fee3d500bb0d2582a1fe598df3326d8cd ] Similarly to what has been done in 8b4c3cdd9dd8 ("net: sched: Add policy validation for tc attributes"), fix classifier code to add validation of TCA_CHAIN and TCA_KIND netlink attributes. tested with: # ./tdc.py -c filter v2: Let sch_api and cls_api share nla_policy they have in common, thanks to David Ahern. v3: Avoid EXPORT_SYMBOL(), as validation of those attributes is not done by TC modules, thanks to Cong Wang. While at it, restore the 'Delete / get qdisc' comment to its orginal position, just above tc_get_qdisc() function prototype. Fixes: 5bc1701881e39 ("net: sched: introduce multichain support for filters") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_api.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2f4e1483aced9..04a70793c1fe7 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -31,6 +31,8 @@ #include #include +extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; + /* The list of all installed classifier types */ static LIST_HEAD(tcf_proto_base); @@ -559,7 +561,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, replay: tp_created = 0; - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack); + err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; @@ -836,7 +838,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if (nlmsg_len(cb->nlh) < sizeof(*tcm)) return skb->len; - err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL); + err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, + NULL); if (err) return err; -- GitLab From fe54a7c4f0d1f313ea341ebd92d1f3ff9eb65007 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 14 May 2018 15:38:10 -0700 Subject: [PATCH 1222/2269] net/mlx5: Fix build break when CONFIG_SMP=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e3ca34880652250f524022ad89e516f8ba9a805b ] Avoid using the kernel's irq_descriptor and return IRQ vector affinity directly from the driver. This fixes the following build break when CONFIG_SMP=n include/linux/mlx5/driver.h: In function ‘mlx5_get_vector_affinity_hint’: include/linux/mlx5/driver.h:1299:13: error: ‘struct irq_desc’ has no member named ‘affinity_hint’ Fixes: 6082d9c9c94a ("net/mlx5: Fix mlx5_get_vector_affinity function") Signed-off-by: Saeed Mahameed CC: Randy Dunlap CC: Guenter Roeck CC: Thomas Gleixner Tested-by: Israel Rukshin Reported-by: kbuild test robot Reported-by: Randy Dunlap Tested-by: Randy Dunlap Acked-by: Thomas Gleixner Tested-by: Guenter Roeck Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/linux/mlx5/driver.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5eff332092bcc..fb677e4f902da 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1195,17 +1195,7 @@ enum { static inline const struct cpumask * mlx5_get_vector_affinity_hint(struct mlx5_core_dev *dev, int vector) { - struct irq_desc *desc; - unsigned int irq; - int eqn; - int err; - - err = mlx5_vector2eqn(dev, vector, &eqn, &irq); - if (err) - return NULL; - - desc = irq_to_desc(irq); - return desc->affinity_hint; + return dev->priv.irq_info[vector].mask; } #endif /* MLX5_DRIVER_H */ -- GitLab From 50961e4888a1d53544ac4ea6f185fc27ee4fee4f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 4 Nov 2018 14:52:51 +0100 Subject: [PATCH 1223/2269] Linux 4.14.79 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7f6579d53911c..57a007bf1181e 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 78 +SUBLEVEL = 79 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 2de3f80d5ba260199c92a6ab0ada08daa2ce54ae Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 11 Oct 2017 14:01:02 +0100 Subject: [PATCH 1224/2269] UPSTREAM: arm64: Expose support for optional ARMv8-A features ARMv8-A adds a few optional features for ARMv8.2 and ARMv8.3. Expose them to the userspace via HWCAPs and mrs emulation. SHA2-512 - Instruction support for SHA512 Hash algorithm (e.g SHA512H, SHA512H2, SHA512U0, SHA512SU1) SHA3 - SHA3 crypto instructions (EOR3, RAX1, XAR, BCAX). SM3 - Instruction support for Chinese cryptography algorithm SM3 SM4 - Instruction support for Chinese cryptography algorithm SM4 DP - Dot Product instructions (UDOT, SDOT). Cc: Will Deacon Cc: Mark Rutland Cc: Dave Martin Cc: Marc Zyngier Reviewed-by: Catalin Marinas Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon (cherry picked from commit f5e035f8694c3bdddc66ea46ecda965ee6853718) Bug: 119112014 Change-Id: Ic723fc8c743376d96a3cc2fea1f4855d0178a503 --- Documentation/arm64/cpu-feature-registers.txt | 12 +++++++++++- arch/arm64/include/asm/sysreg.h | 4 ++++ arch/arm64/include/uapi/asm/hwcap.h | 5 +++++ arch/arm64/kernel/cpufeature.c | 9 +++++++++ arch/arm64/kernel/cpuinfo.c | 5 +++++ 5 files changed, 34 insertions(+), 1 deletion(-) diff --git a/Documentation/arm64/cpu-feature-registers.txt b/Documentation/arm64/cpu-feature-registers.txt index dad411d635d88..011ddfc1e570f 100644 --- a/Documentation/arm64/cpu-feature-registers.txt +++ b/Documentation/arm64/cpu-feature-registers.txt @@ -110,10 +110,20 @@ infrastructure: x--------------------------------------------------x | Name | bits | visible | |--------------------------------------------------| - | RES0 | [63-32] | n | + | RES0 | [63-48] | n | + |--------------------------------------------------| + | DP | [47-44] | y | + |--------------------------------------------------| + | SM4 | [43-40] | y | + |--------------------------------------------------| + | SM3 | [39-36] | y | + |--------------------------------------------------| + | SHA3 | [35-32] | y | |--------------------------------------------------| | RDM | [31-28] | y | |--------------------------------------------------| + | RES0 | [27-24] | n | + |--------------------------------------------------| | ATOMICS | [23-20] | y | |--------------------------------------------------| | CRC32 | [19-16] | y | diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 3bdec2f5cbb42..7bbbba5ae6813 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -318,6 +318,10 @@ #define SCTLR_EL1_CP15BEN (1 << 5) /* id_aa64isar0 */ +#define ID_AA64ISAR0_DP_SHIFT 44 +#define ID_AA64ISAR0_SM4_SHIFT 40 +#define ID_AA64ISAR0_SM3_SHIFT 36 +#define ID_AA64ISAR0_SHA3_SHIFT 32 #define ID_AA64ISAR0_RDM_SHIFT 28 #define ID_AA64ISAR0_ATOMICS_SHIFT 20 #define ID_AA64ISAR0_CRC32_SHIFT 16 diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index b3fdeee739eab..f243c57d1670a 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -37,5 +37,10 @@ #define HWCAP_FCMA (1 << 14) #define HWCAP_LRCPC (1 << 15) #define HWCAP_DCPOP (1 << 16) +#define HWCAP_SHA3 (1 << 17) +#define HWCAP_SM3 (1 << 18) +#define HWCAP_SM4 (1 << 19) +#define HWCAP_ASIMDDP (1 << 20) +#define HWCAP_SHA512 (1 << 21) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f389c9a93c1ad..12941cfd19f28 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -107,6 +107,10 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused) * sync with the documentation of the CPU feature register ABI. */ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_DP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_SM4_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_SM3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_SHA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0), @@ -1031,9 +1035,14 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA1), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA2), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_SHA512), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_CRC32), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ATOMICS), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDRDM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA3), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM3), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM4), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDDP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD), diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 311885962830b..1ff1c5a670818 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -69,6 +69,11 @@ static const char *const hwcap_str[] = { "fcma", "lrcpc", "dcpop", + "sha3", + "sm3", + "sm4", + "asimddp", + "sha512", NULL }; -- GitLab From 87485dbe777b1114409acb43210000524ca01d15 Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Fri, 9 Nov 2018 10:26:38 -0800 Subject: [PATCH 1225/2269] Revert "ANDROID: Kbuild, LLVMLinux: allow overriding clang target triple" This reverts commit b9b964626ea839d4a93a47625c74d29a67aa926e. We will apply a better version backported from experimental/android-4.19. Change-Id: I62b9d7bf6cee3aac63758fbad35d68bf9b6c01d4 Signed-off-by: Alistair Strachan --- Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Makefile b/Makefile index ec567bc20c809..497685c302d70 100644 --- a/Makefile +++ b/Makefile @@ -480,8 +480,7 @@ endif ifeq ($(cc-name),clang) ifneq ($(CROSS_COMPILE),) -CLANG_TRIPLE ?= $(CROSS_COMPILE) -CLANG_TARGET := --target=$(notdir $(CLANG_TRIPLE:%-=%)) +CLANG_TARGET := --target=$(notdir $(CROSS_COMPILE:%-=%)) GCC_TOOLCHAIN := $(realpath $(dir $(shell which $(LD)))/..) endif ifneq ($(GCC_TOOLCHAIN),) -- GitLab From f63387b9ae1ddf1e121b220ebe917aeed7941054 Mon Sep 17 00:00:00 2001 From: Alan Chiang Date: Wed, 25 Jul 2018 11:20:22 +0800 Subject: [PATCH 1226/2269] eeprom: at24: Add support for address-width property [ Upstream commit a2b3bf4846e5eed62ea6abb096af2c950961033c ] Provide a flexible way to determine the addressing bits of eeprom. Pass the addressing bits to driver through address-width property. Signed-off-by: Alan Chiang Signed-off-by: Andy Yeh Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/misc/eeprom/at24.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 4cc0b42f2acc5..ded48a0c77eeb 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -577,6 +577,23 @@ static void at24_get_pdata(struct device *dev, struct at24_platform_data *chip) if (device_property_present(dev, "read-only")) chip->flags |= AT24_FLAG_READONLY; + err = device_property_read_u32(dev, "address-width", &val); + if (!err) { + switch (val) { + case 8: + if (chip->flags & AT24_FLAG_ADDR16) + dev_warn(dev, "Override address width to be 8, while default is 16\n"); + chip->flags &= ~AT24_FLAG_ADDR16; + break; + case 16: + chip->flags |= AT24_FLAG_ADDR16; + break; + default: + dev_warn(dev, "Bad \"address-width\" property: %u\n", + val); + } + } + err = device_property_read_u32(dev, "pagesize", &val); if (!err) { chip->page_size = val; -- GitLab From 8a6cee344cc0236d6171bc24506cb593d84b8214 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 22 Oct 2018 20:56:46 +0300 Subject: [PATCH 1227/2269] vfs: swap names of {do,vfs}_clone_file_range() commit a725356b6659469d182d662f22d770d83d3bc7b5 upstream. Commit 031a072a0b8a ("vfs: call vfs_clone_file_range() under freeze protection") created a wrapper do_clone_file_range() around vfs_clone_file_range() moving the freeze protection to former, so overlayfs could call the latter. The more common vfs practice is to call do_xxx helpers from vfs_xxx helpers, where freeze protecction is taken in the vfs_xxx helper, so this anomality could be a source of confusion. It seems that commit 8ede205541ff ("ovl: add reflink/copyfile/dedup support") may have fallen a victim to this confusion - ovl_clone_file_range() calls the vfs_clone_file_range() helper in the hope of getting freeze protection on upper fs, but in fact results in overlayfs allowing to bypass upper fs freeze protection. Swap the names of the two helpers to conform to common vfs practice and call the correct helpers from overlayfs and nfsd. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi Fixes: 031a072a0b8a ("vfs: call vfs_clone_file_range() under freeze...") Signed-off-by: Amir Goldstein Signed-off-by: Sasha Levin --- fs/ioctl.c | 2 +- fs/nfsd/vfs.c | 3 ++- fs/overlayfs/copy_up.c | 2 +- fs/read_write.c | 17 +++++++++++++++-- include/linux/fs.h | 17 +++-------------- 5 files changed, 22 insertions(+), 19 deletions(-) diff --git a/fs/ioctl.c b/fs/ioctl.c index 5ace7efb0d04d..9db5ddaf7ef02 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -229,7 +229,7 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd, ret = -EXDEV; if (src_file.file->f_path.mnt != dst_file->f_path.mnt) goto fdput; - ret = do_clone_file_range(src_file.file, off, dst_file, destoff, olen); + ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen); fdput: fdput(src_file); return ret; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index a3c9bfa77def8..f55527ef21e84 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -541,7 +541,8 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, u64 dst_pos, u64 count) { - return nfserrno(do_clone_file_range(src, src_pos, dst, dst_pos, count)); + return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos, + count)); } ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index c441f9387a1ba..321eae7401481 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -157,7 +157,7 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) } /* Try to use clone_file_range to clone up within the same fs */ - error = vfs_clone_file_range(old_file, 0, new_file, 0, len); + error = do_clone_file_range(old_file, 0, new_file, 0, len); if (!error) goto out; /* Couldn't clone, so now we try to copy the data */ diff --git a/fs/read_write.c b/fs/read_write.c index 0046d72efe946..57a00ef895b29 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1812,8 +1812,8 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, } EXPORT_SYMBOL(vfs_clone_file_prep_inodes); -int vfs_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len) +int do_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, u64 len) { struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); @@ -1860,6 +1860,19 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in, return ret; } +EXPORT_SYMBOL(do_clone_file_range); + +int vfs_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, u64 len) +{ + int ret; + + file_start_write(file_out); + ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len); + file_end_write(file_out); + + return ret; +} EXPORT_SYMBOL(vfs_clone_file_range); /* diff --git a/include/linux/fs.h b/include/linux/fs.h index cc613f20e5a61..7374639f0aa00 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1792,8 +1792,10 @@ extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, struct inode *inode_out, loff_t pos_out, u64 *len, bool is_dedupe); +extern int do_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, u64 len); extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len); + struct file *file_out, loff_t pos_out, u64 len); extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, struct inode *dest, loff_t destoff, loff_t len, bool *is_same); @@ -2712,19 +2714,6 @@ static inline void file_end_write(struct file *file) __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE); } -static inline int do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - u64 len) -{ - int ret; - - file_start_write(file_out); - ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len); - file_end_write(file_out); - - return ret; -} - /* * get_write_access() gets write permission for a file. * put_write_access() releases this write permission. -- GitLab From 86534d3d824351868fb0992e13777933edb91e6f Mon Sep 17 00:00:00 2001 From: Kristian Evensen Date: Thu, 1 Nov 2018 20:52:46 +0100 Subject: [PATCH 1228/2269] USB: serial: option: improve Quectel EP06 detection commit 36cae568404a298a19a6e8a3f18641075d4cab04 upstream The Quectel EP06 (and EM06/EG06) LTE modem supports updating the USB configuration, without the VID/PID or configuration number changing. When the configuration is updated and interfaces are added/removed, the interface numbers are updated. This causes our current code for matching EP06 not to work as intended, as the assumption about reserved interfaces no longer holds. If for example the diagnostic (first) interface is removed, option will (try to) bind to the QMI interface. This patch improves EP06 detection by replacing the current match with two matches, and those matches check class, subclass and protocol as well as VID and PID. The diag interface exports class, subclass and protocol as 0xff. For the other serial interfaces, class is 0xff and subclass and protocol are both 0x0. The modem can export the following devices and always in this order: diag, nmea, at, ppp. qmi and adb. This means that diag can only ever be interface 0, and interface numbers 1-5 should be marked as reserved. The three other serial devices can have interface numbers 0-3, but I have not marked any interfaces as reserved. The reason is that the serial devices are the only interfaces exported by the device where subclass and protocol is 0x0. QMI exports the same class, subclass and protocol values as the diag interface. However, the two interfaces have different number of endpoints, QMI has three and diag two. I have added a check for number of interfaces if VID/PID matches the EP06, and we ignore the device if number of interfaces equals three (and subclass is set). The upstream commit does not apply cleanly to the 4.14-tree because of differences in option_probe(). In order to make the commit apply, a slight reshuffeling of the code was needed. Signed-off-by: Kristian Evensen Acked-by: Dan Williams [ johan: drop uneeded RSVD(5) for ADB ] Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Kristian Evensen Signed-off-by: Sasha Levin --- drivers/usb/serial/option.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 0600dadd6a0c8..d8d3cb18e9eac 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1084,8 +1084,9 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(4) }, { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96), .driver_info = RSVD(4) }, - { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06), - .driver_info = RSVD(4) | RSVD(5) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff), + .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003), @@ -2010,6 +2011,18 @@ static int option_probe(struct usb_serial *serial, iface_desc->bInterfaceClass != USB_CLASS_CDC_DATA) return -ENODEV; + /* + * Don't bind to the QMI device of the Quectel EP06/EG06/EM06. Class, + * subclass and protocol is 0xff for both the diagnostic port and the + * QMI interface, but the diagnostic port only has two endpoints (QMI + * has three). + */ + if (dev_desc->idVendor == cpu_to_le16(QUECTEL_VENDOR_ID) && + dev_desc->idProduct == cpu_to_le16(QUECTEL_PRODUCT_EP06) && + iface_desc->bInterfaceSubClass && iface_desc->bNumEndpoints == 3) { + return -ENODEV; + } + /* Store the device flags so we can use them during attach. */ usb_set_serial_data(serial, (void *)device_flags); -- GitLab From cc2526f1f5544a440c88f48c4681707ca76e3df1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 1 Nov 2018 20:52:47 +0100 Subject: [PATCH 1229/2269] USB: serial: option: add two-endpoints device-id flag commit 35aecc02b5b621782111f64cbb032c7f6a90bb32 upstream Allow matching on interfaces having two endpoints by adding a new device-id flag. This allows for the handling of devices whose interface numbers can change (e.g. Quectel EP06) to be contained in the device-id table. The upstream commit removes a variable that is still in use in the 4.14 version of the option-driver, so the removal is undone. Tested-by: Kristian Evensen Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Kristian Evensen Signed-off-by: Sasha Levin --- drivers/usb/serial/option.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index d8d3cb18e9eac..392fddc80c444 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -564,6 +564,9 @@ static void option_instat_callback(struct urb *urb); /* Interface is reserved */ #define RSVD(ifnum) ((BIT(ifnum) & 0xff) << 0) +/* Interface must have two endpoints */ +#define NUMEP2 BIT(16) + static const struct usb_device_id option_ids[] = { { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) }, @@ -1085,7 +1088,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96), .driver_info = RSVD(4) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff), - .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) }, + .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, @@ -2012,16 +2015,11 @@ static int option_probe(struct usb_serial *serial, return -ENODEV; /* - * Don't bind to the QMI device of the Quectel EP06/EG06/EM06. Class, - * subclass and protocol is 0xff for both the diagnostic port and the - * QMI interface, but the diagnostic port only has two endpoints (QMI - * has three). + * Allow matching on bNumEndpoints for devices whose interface numbers + * can change (e.g. Quectel EP06). */ - if (dev_desc->idVendor == cpu_to_le16(QUECTEL_VENDOR_ID) && - dev_desc->idProduct == cpu_to_le16(QUECTEL_PRODUCT_EP06) && - iface_desc->bInterfaceSubClass && iface_desc->bNumEndpoints == 3) { + if (device_flags & NUMEP2 && iface_desc->bNumEndpoints != 2) return -ENODEV; - } /* Store the device flags so we can use them during attach. */ usb_set_serial_data(serial, (void *)device_flags); -- GitLab From eb9b195c53db75c694bf78576925fcb3eed9d0e1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 1 Nov 2018 22:30:38 +0100 Subject: [PATCH 1230/2269] bpf: fix partial copy of map_ptr when dst is scalar commit 0962590e553331db2cc0aef2dc35c57f6300dbbe upstream. ALU operations on pointers such as scalar_reg += map_value_ptr are handled in adjust_ptr_min_max_vals(). Problem is however that map_ptr and range in the register state share a union, so transferring state through dst_reg->range = ptr_reg->range is just buggy as any new map_ptr in the dst_reg is then truncated (or null) for subsequent checks. Fix this by adding a raw member and use it for copying state over to dst_reg. Fixes: f1174f77b50c ("bpf/verifier: rework value tracking") Signed-off-by: Daniel Borkmann Cc: Edward Cree Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov Acked-by: Edward Cree Signed-off-by: Sasha Levin --- include/linux/bpf_verifier.h | 3 +++ kernel/bpf/verifier.c | 10 ++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 73bec75b74c80..a3333004fd2b3 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -50,6 +50,9 @@ struct bpf_reg_state { * PTR_TO_MAP_VALUE_OR_NULL */ struct bpf_map *map_ptr; + + /* Max size from any of the above. */ + unsigned long raw; }; /* Fixed part of pointer offset, pointer types only */ s32 off; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a0ffc62e76774..013b0cd1958ea 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1935,7 +1935,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, dst_reg->umax_value = umax_ptr; dst_reg->var_off = ptr_reg->var_off; dst_reg->off = ptr_reg->off + smin_val; - dst_reg->range = ptr_reg->range; + dst_reg->raw = ptr_reg->raw; break; } /* A new variable offset is created. Note that off_reg->off @@ -1965,10 +1965,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, } dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off); dst_reg->off = ptr_reg->off; + dst_reg->raw = ptr_reg->raw; if (ptr_reg->type == PTR_TO_PACKET) { dst_reg->id = ++env->id_gen; /* something was added to pkt_ptr, set range to zero */ - dst_reg->range = 0; + dst_reg->raw = 0; } break; case BPF_SUB: @@ -1999,7 +2000,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, dst_reg->var_off = ptr_reg->var_off; dst_reg->id = ptr_reg->id; dst_reg->off = ptr_reg->off - smin_val; - dst_reg->range = ptr_reg->range; + dst_reg->raw = ptr_reg->raw; break; } /* A new variable offset is created. If the subtrahend is known @@ -2025,11 +2026,12 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, } dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off); dst_reg->off = ptr_reg->off; + dst_reg->raw = ptr_reg->raw; if (ptr_reg->type == PTR_TO_PACKET) { dst_reg->id = ++env->id_gen; /* something was added to pkt_ptr, set range to zero */ if (smin_val < 0) - dst_reg->range = 0; + dst_reg->raw = 0; } break; case BPF_AND: -- GitLab From 0502f1366921284a8f418db753d9fd7af4cd5ba8 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Tue, 6 Nov 2018 01:23:39 -0500 Subject: [PATCH 1231/2269] Revert "ARM: tegra: Fix ULPI regression on Tegra20" This reverts commit b39ac54215190bc178ae7de799e74d327a3c1a33. The issue was fixed by upstream commit 5d797111afe1 ("clk: tegra: Add quirk for getting CDEV1/2 clocks on Tegra20"). Signed-off-by: Sasha Levin --- arch/arm/boot/dts/tegra20.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi index 2780e68a853bf..914f59166a995 100644 --- a/arch/arm/boot/dts/tegra20.dtsi +++ b/arch/arm/boot/dts/tegra20.dtsi @@ -706,7 +706,7 @@ phy_type = "ulpi"; clocks = <&tegra_car TEGRA20_CLK_USB2>, <&tegra_car TEGRA20_CLK_PLL_U>, - <&tegra_car TEGRA20_CLK_PLL_P_OUT4>; + <&tegra_car TEGRA20_CLK_CDEV2>; clock-names = "reg", "pll_u", "ulpi-link"; resets = <&tegra_car 58>, <&tegra_car 22>; reset-names = "usb", "utmi-pads"; -- GitLab From 6eb5633da44ef52e37c27e25400d427b9774ae6e Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 1 Sep 2018 09:40:01 +0300 Subject: [PATCH 1232/2269] fsnotify: fix ignore mask logic in fsnotify() [ Upstream commit 9bdda4e9cf2dcecb60a0683b10ffb8cd7e5f2f45 ] Commit 92183a42898d ("fsnotify: fix ignore mask logic in send_to_group()") acknoledges the use case of ignoring an event on an inode mark, because of an ignore mask on a mount mark of the same group (i.e. I want to get all events on this file, except for the events that came from that mount). This change depends on correctly merging the inode marks and mount marks group lists, so that the mount mark ignore mask would be tested in send_to_group(). Alas, the merging of the lists did not take into account the case where event in question is not in the mask of any of the mount marks. To fix this, completely remove the tests for inode and mount event masks from the lists merging code. Fixes: 92183a42898d ("fsnotify: fix ignore mask logic in send_to_group") Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara [amir: backport to v4.14.y] Signed-off-by: Amir Goldstein Signed-off-by: Sasha Levin --- fs/notify/fsnotify.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index d76c81323dc16..2bc61e7543dd7 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -286,17 +286,13 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); - if ((mask & FS_MODIFY) || - (test_mask & to_tell->i_fsnotify_mask)) { - inode_conn = srcu_dereference(to_tell->i_fsnotify_marks, + inode_conn = srcu_dereference(to_tell->i_fsnotify_marks, + &fsnotify_mark_srcu); + if (inode_conn) + inode_node = srcu_dereference(inode_conn->list.first, &fsnotify_mark_srcu); - if (inode_conn) - inode_node = srcu_dereference(inode_conn->list.first, - &fsnotify_mark_srcu); - } - if (mnt && ((mask & FS_MODIFY) || - (test_mask & mnt->mnt_fsnotify_mask))) { + if (mnt) { inode_conn = srcu_dereference(to_tell->i_fsnotify_marks, &fsnotify_mark_srcu); if (inode_conn) -- GitLab From 43607eb9f70823072e1bfc27f84ed81551ac2adb Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 29 Aug 2018 17:02:16 +0200 Subject: [PATCH 1233/2269] gpio: mxs: Get rid of external API call [ Upstream commit 833eacc7b5913da9896bacd30db7d490aa777868 ] The MXS driver was calling back into the GPIO API from its irqchip. This is not very elegant, as we are a driver, let's just shortcut back into the gpio_chip .get() function instead. This is a tricky case since the .get() callback is not in this file, instead assigned by bgpio_init(). Calling the function direcly in the gpio_chip is however the lesser evil. Cc: Sascha Hauer Cc: Janusz Uzycki Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/gpio/gpio-mxs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-mxs.c b/drivers/gpio/gpio-mxs.c index 435def22445d9..f66395524d0e6 100644 --- a/drivers/gpio/gpio-mxs.c +++ b/drivers/gpio/gpio-mxs.c @@ -32,8 +32,6 @@ #include #include #include -/* FIXME: for gpio_get_value(), replace this by direct register read */ -#include #include #define MXS_SET 0x4 @@ -100,7 +98,7 @@ static int mxs_gpio_set_irq_type(struct irq_data *d, unsigned int type) port->both_edges &= ~pin_mask; switch (type) { case IRQ_TYPE_EDGE_BOTH: - val = gpio_get_value(port->gc.base + d->hwirq); + val = port->gc.get(&port->gc, d->hwirq); if (val) edge = GPIO_INT_FALL_EDGE; else -- GitLab From 33e52eb2d00582fad17177918b83a8df19958989 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Sat, 3 Nov 2018 19:15:24 +0200 Subject: [PATCH 1234/2269] xfs: truncate transaction does not modify the inobt [ Upstream commit a606ebdb859e78beb757dfefa08001df366e2ef5 ] The truncate transaction does not ever modify the inode btree, but includes an associated log reservation. Update xfs_calc_itruncate_reservation() to remove the reservation associated with inobt updates. [Amir: This commit was merged for kernel v4.16 and a twin commit was merged for xfsprogs v4.16. As a result, a small xfs filesystem formatted with features -m rmapbt=1,reflink=1 using mkfs.xfs version >= v4.16 cannot be mounted with kernel < v4.16. For example, xfstests generic/17{1,2,3} format a small fs and when trying to mount it, they fail with an assert on this very demonic line: XFS (vdc): Log size 3075 blocks too small, minimum size is 3717 blocks XFS (vdc): AAIEEE! Log failed size checks. Abort! XFS: Assertion failed: 0, file: src/linux/fs/xfs/xfs_log.c, line: 666 The simple solution for stable kernels is to apply this patch, because mkfs.xfs v4.16 is already in the wild, so we have to assume that xfs filesystems with a "too small" log exist. Regardless, xfsprogs maintainers should also consider reverting the twin patch to stop creating those filesystems for the sake of users with unpatched kernels.] Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Cc: # v4.9+ Signed-off-by: Amir Goldstein Reviewed-by: Darrick J . Wong Signed-off-by: Sasha Levin --- fs/xfs/libxfs/xfs_trans_resv.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 6bd916bd35e24..48eff18c54960 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -232,8 +232,6 @@ xfs_calc_write_reservation( * the super block to reflect the freed blocks: sector size * worst case split in allocation btrees per extent assuming 4 extents: * 4 exts * 2 trees * (2 * max depth - 1) * block size - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size */ STATIC uint xfs_calc_itruncate_reservation( @@ -245,12 +243,7 @@ xfs_calc_itruncate_reservation( XFS_FSB_TO_B(mp, 1))), (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(5, 0) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(2 + mp->m_ialloc_blks + - mp->m_in_maxlevels, 0))); + XFS_FSB_TO_B(mp, 1)))); } /* -- GitLab From ddb595dfe47278fd8ff3fd171b3729ec45d51b74 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 17 Oct 2018 15:23:26 +0100 Subject: [PATCH 1235/2269] cachefiles: fix the race between cachefiles_bury_object() and rmdir(2) commit 169b803397499be85bdd1e3d07d6f5e3d4bd669e upstream. the victim might've been rmdir'ed just before the lock_rename(); unlike the normal callers, we do not look the source up after the parents are locked - we know it beforehand and just recheck that it's still the child of what used to be its parent. Unfortunately, the check is too weak - we don't spot a dead directory since its ->d_parent is unchanged, dentry is positive, etc. So we sail all the way to ->rename(), with hosting filesystems _not_ expecting to be asked renaming an rmdir'ed subdirectory. The fix is easy, fortunately - the lock on parent is sufficient for making IS_DEADDIR() on child safe. Cc: stable@vger.kernel.org Fixes: 9ae326a69004 (CacheFiles: A cache that backs onto a mounted filesystem) Signed-off-by: Al Viro Signed-off-by: David Howells Signed-off-by: Greg Kroah-Hartman --- fs/cachefiles/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 5f2f67d220fa9..f6ed795f4af6f 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -340,7 +340,7 @@ try_again: trap = lock_rename(cache->graveyard, dir); /* do some checks before getting the grave dentry */ - if (rep->d_parent != dir) { + if (rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) { /* the entry was probably culled when we dropped the parent dir * lock */ unlock_rename(cache->graveyard, dir); -- GitLab From 7a4b046b0ce973f641d1b6ba4c6947ed94d77a80 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 16 Oct 2018 15:06:41 +0200 Subject: [PATCH 1236/2269] ptp: fix Spectre v1 vulnerability commit efa61c8cf2950ab5c0e66cff3cabe2a2b24e81ba upstream. pin_index can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/ptp/ptp_chardev.c:253 ptp_ioctl() warn: potential spectre issue 'ops->pin_config' [r] (local cap) Fix this by sanitizing pin_index before using it to index ops->pin_config, and before passing it as an argument to function ptp_set_pinfunc(), in which it is used to index info->pin_config. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Acked-by: Richard Cochran Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/ptp/ptp_chardev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 51364621f77ce..a421d6c551b66 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -24,6 +24,8 @@ #include #include +#include + #include "ptp_private.h" static int ptp_disable_pinfunc(struct ptp_clock_info *ops, @@ -248,6 +250,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) err = -EINVAL; break; } + pin_index = array_index_nospec(pin_index, ops->n_pins); if (mutex_lock_interruptible(&ptp->pincfg_mux)) return -ERESTARTSYS; pd = ops->pin_config[pin_index]; @@ -266,6 +269,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) err = -EINVAL; break; } + pin_index = array_index_nospec(pin_index, ops->n_pins); if (mutex_lock_interruptible(&ptp->pincfg_mux)) return -ERESTARTSYS; err = ptp_set_pinfunc(ptp, pin_index, pd.func, pd.chan); -- GitLab From fd3127215babb993c7470dc556e547f5572ca371 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 2 Oct 2018 23:29:11 +0800 Subject: [PATCH 1237/2269] drm/edid: Add 6 bpc quirk for BOE panel in HP Pavilion 15-n233sl commit 0711a43b6d84ff9189adfbf83c8bbf56eef794bf upstream. There's another panel that reports "DFP 1.x compliant TMDS" but it supports 6bpc instead of 8 bpc. Apply 6 bpc quirk for the panel to fix it. BugLink: https://bugs.launchpad.net/bugs/1794387 Cc: # v4.8+ Signed-off-by: Kai-Heng Feng Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20181002152911.4370-1-kai.heng.feng@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_edid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index c29dea8956056..12be61308ba20 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -111,6 +111,9 @@ static const struct edid_quirk { /* AEO model 0 reports 8 bpc, but is a 6 bpc panel */ { "AEO", 0, EDID_QUIRK_FORCE_6BPC }, + /* BOE model on HP Pavilion 15-n233sl reports 8 bpc, but is a 6 bpc panel */ + { "BOE", 0x78b, EDID_QUIRK_FORCE_6BPC }, + /* CPT panel of Asus UX303LA reports 8 bpc, but is a 6 bpc panel */ { "CPT", 0x17df, EDID_QUIRK_FORCE_6BPC }, -- GitLab From 20ff18553ed84b2bc57567e1ac51bf432719c289 Mon Sep 17 00:00:00 2001 From: Clint Taylor Date: Fri, 5 Oct 2018 14:52:15 -0700 Subject: [PATCH 1238/2269] drm/edid: VSDB yCBCr420 Deep Color mode bit definitions commit 9068e02f58740778d8270840657f1e250a2cc60f upstream. HDMI Forum VSDB YCBCR420 deep color capability bits are 2:0. Correct definitions in the header for the mask to work correctly. Fixes: e6a9a2c3dc43 ("drm/edid: parse ycbcr 420 deep color information") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107893 Cc: # v4.14+ Signed-off-by: Clint Taylor Reviewed-by: Jani Nikula Reviewed-by: Shashank Sharma Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1538776335-12569-1-git-send-email-clinton.a.taylor@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_edid.c | 2 +- include/drm/drm_edid.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 12be61308ba20..d1191ebed0720 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -4223,7 +4223,7 @@ static void drm_parse_ycbcr420_deep_color_info(struct drm_connector *connector, struct drm_hdmi_info *hdmi = &connector->display_info.hdmi; dc_mask = db[7] & DRM_EDID_YCBCR420_DC_MASK; - hdmi->y420_dc_modes |= dc_mask; + hdmi->y420_dc_modes = dc_mask; } static void drm_parse_hdmi_forum_vsdb(struct drm_connector *connector, diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index a992434ded999..267e0426c4791 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -214,9 +214,9 @@ struct detailed_timing { #define DRM_EDID_HDMI_DC_Y444 (1 << 3) /* YCBCR 420 deep color modes */ -#define DRM_EDID_YCBCR420_DC_48 (1 << 6) -#define DRM_EDID_YCBCR420_DC_36 (1 << 5) -#define DRM_EDID_YCBCR420_DC_30 (1 << 4) +#define DRM_EDID_YCBCR420_DC_48 (1 << 2) +#define DRM_EDID_YCBCR420_DC_36 (1 << 1) +#define DRM_EDID_YCBCR420_DC_30 (1 << 0) #define DRM_EDID_YCBCR420_DC_MASK (DRM_EDID_YCBCR420_DC_48 | \ DRM_EDID_YCBCR420_DC_36 | \ DRM_EDID_YCBCR420_DC_30) -- GitLab From c219c2cd34cc1bbf7dc9ffe04c5d7d411349e7c0 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 3 Oct 2018 19:45:38 +0300 Subject: [PATCH 1239/2269] drm: fb-helper: Reject all pixel format changing requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit db05c481977599236f12a85e55de9f5ab37b0a2c upstream. drm fbdev emulation doesn't support changing the pixel format at all, so reject all pixel format changing requests. Cc: stable@vger.kernel.org Signed-off-by: Eugeniy Paltsev Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20181003164538.5534-1-Eugeniy.Paltsev@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_fb_helper.c | 91 ++++++++++----------------------- 1 file changed, 26 insertions(+), 65 deletions(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 5e93589c335cb..29d1d3df31642 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1490,6 +1490,25 @@ unlock: } EXPORT_SYMBOL(drm_fb_helper_ioctl); +static bool drm_fb_pixel_format_equal(const struct fb_var_screeninfo *var_1, + const struct fb_var_screeninfo *var_2) +{ + return var_1->bits_per_pixel == var_2->bits_per_pixel && + var_1->grayscale == var_2->grayscale && + var_1->red.offset == var_2->red.offset && + var_1->red.length == var_2->red.length && + var_1->red.msb_right == var_2->red.msb_right && + var_1->green.offset == var_2->green.offset && + var_1->green.length == var_2->green.length && + var_1->green.msb_right == var_2->green.msb_right && + var_1->blue.offset == var_2->blue.offset && + var_1->blue.length == var_2->blue.length && + var_1->blue.msb_right == var_2->blue.msb_right && + var_1->transp.offset == var_2->transp.offset && + var_1->transp.length == var_2->transp.length && + var_1->transp.msb_right == var_2->transp.msb_right; +} + /** * drm_fb_helper_check_var - implementation for &fb_ops.fb_check_var * @var: screeninfo to check @@ -1500,7 +1519,6 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, { struct drm_fb_helper *fb_helper = info->par; struct drm_framebuffer *fb = fb_helper->fb; - int depth; if (var->pixclock != 0 || in_dbg_master()) return -EINVAL; @@ -1520,72 +1538,15 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, return -EINVAL; } - switch (var->bits_per_pixel) { - case 16: - depth = (var->green.length == 6) ? 16 : 15; - break; - case 32: - depth = (var->transp.length > 0) ? 32 : 24; - break; - default: - depth = var->bits_per_pixel; - break; - } - - switch (depth) { - case 8: - var->red.offset = 0; - var->green.offset = 0; - var->blue.offset = 0; - var->red.length = 8; - var->green.length = 8; - var->blue.length = 8; - var->transp.length = 0; - var->transp.offset = 0; - break; - case 15: - var->red.offset = 10; - var->green.offset = 5; - var->blue.offset = 0; - var->red.length = 5; - var->green.length = 5; - var->blue.length = 5; - var->transp.length = 1; - var->transp.offset = 15; - break; - case 16: - var->red.offset = 11; - var->green.offset = 5; - var->blue.offset = 0; - var->red.length = 5; - var->green.length = 6; - var->blue.length = 5; - var->transp.length = 0; - var->transp.offset = 0; - break; - case 24: - var->red.offset = 16; - var->green.offset = 8; - var->blue.offset = 0; - var->red.length = 8; - var->green.length = 8; - var->blue.length = 8; - var->transp.length = 0; - var->transp.offset = 0; - break; - case 32: - var->red.offset = 16; - var->green.offset = 8; - var->blue.offset = 0; - var->red.length = 8; - var->green.length = 8; - var->blue.length = 8; - var->transp.length = 8; - var->transp.offset = 24; - break; - default: + /* + * drm fbdev emulation doesn't support changing the pixel format at all, + * so reject all pixel format changing requests. + */ + if (!drm_fb_pixel_format_equal(var, &info->var)) { + DRM_DEBUG("fbdev emulation doesn't support changing the pixel format\n"); return -EINVAL; } + return 0; } EXPORT_SYMBOL(drm_fb_helper_check_var); -- GitLab From 66448066c2b1ec587e6168a9206ffedd11d94444 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 16 Oct 2018 16:59:01 +0200 Subject: [PATCH 1240/2269] RDMA/ucma: Fix Spectre v1 vulnerability commit a3671a4f973ee9d9621d60166cc3b037c397d604 upstream. hdr.cmd can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/infiniband/core/ucma.c:1686 ucma_write() warn: potential spectre issue 'ucma_cmd_table' [r] (local cap) Fix this by sanitizing hdr.cmd before using it to index ucm_cmd_table. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/ucma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 17144a781aebc..c3e5f921da12e 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -44,6 +44,8 @@ #include #include +#include + #include #include #include @@ -1659,6 +1661,7 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf, if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) return -EINVAL; + hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucma_cmd_table)); if (hdr.in + sizeof(hdr) > len) return -EINVAL; -- GitLab From f0e3b74a4dc0ce27abccdfdcd41eae73cee4b9ac Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 16 Oct 2018 16:32:40 +0200 Subject: [PATCH 1241/2269] IB/ucm: Fix Spectre v1 vulnerability commit 0295e39595e1146522f2722715dba7f7fba42217 upstream. hdr.cmd can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/infiniband/core/ucm.c:1127 ib_ucm_write() warn: potential spectre issue 'ucm_cmd_table' [r] (local cap) Fix this by sanitizing hdr.cmd before using it to index ucm_cmd_table. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/ucm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index f2a7f62c2834f..09cb24353be39 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -46,6 +46,8 @@ #include #include +#include + #include #include @@ -1118,6 +1120,7 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) return -EINVAL; + hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucm_cmd_table)); if (hdr.in + sizeof(hdr) > len) return -EINVAL; -- GitLab From 8686f337ca17db405ca770c490e725088be58414 Mon Sep 17 00:00:00 2001 From: Tobias Herzog Date: Sat, 22 Sep 2018 22:11:10 +0200 Subject: [PATCH 1242/2269] cdc-acm: do not reset notification buffer index upon urb unlinking commit dae3ddba36f8c337fb59cef07d564da6fc9b7551 upstream. Resetting the write index of the notification buffer on urb unlink (e.g. closing a cdc-acm device from userspace) may lead to wrong interpretation of further received notifications, in case the index is not 0 when urb unlink happens (i.e. when parts of a notification already have been transferred). On the device side there is no "reset" of the notification transimission and thus we would get out of sync with the device. Signed-off-by: Tobias Herzog Acked-by: Oliver Neukum Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 9f6f402470ac1..9f52859583cb7 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -367,7 +367,6 @@ static void acm_ctrl_irq(struct urb *urb) case -ENOENT: case -ESHUTDOWN: /* this urb is terminated, clean up */ - acm->nb_index = 0; dev_dbg(&acm->control->dev, "%s - urb shutting down with status: %d\n", __func__, status); -- GitLab From 32772ef3f5ed990404c7664782ad77c551d19693 Mon Sep 17 00:00:00 2001 From: Tobias Herzog Date: Sat, 22 Sep 2018 22:11:11 +0200 Subject: [PATCH 1243/2269] cdc-acm: correct counting of UART states in serial state notification commit f976d0e5747ca65ccd0fb2a4118b193d70aa1836 upstream. The usb standard ("Universal Serial Bus Class Definitions for Communication Devices") distiguishes between "consistent signals" (DSR, DCD), and "irregular signals" (break, ring, parity error, framing error, overrun). The bits of "irregular signals" are set, if this error/event occurred on the device side and are immeadeatly unset, if the serial state notification was sent. Like other drivers of real serial ports do, just the occurence of those events should be counted in serial_icounter_struct (but no 1->0 transitions). Signed-off-by: Tobias Herzog Acked-by: Oliver Neukum Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 9f52859583cb7..5a703c33de1c9 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -322,17 +322,17 @@ static void acm_process_notification(struct acm *acm, unsigned char *buf) if (difference & ACM_CTRL_DSR) acm->iocount.dsr++; - if (difference & ACM_CTRL_BRK) - acm->iocount.brk++; - if (difference & ACM_CTRL_RI) - acm->iocount.rng++; if (difference & ACM_CTRL_DCD) acm->iocount.dcd++; - if (difference & ACM_CTRL_FRAMING) + if (newctrl & ACM_CTRL_BRK) + acm->iocount.brk++; + if (newctrl & ACM_CTRL_RI) + acm->iocount.rng++; + if (newctrl & ACM_CTRL_FRAMING) acm->iocount.frame++; - if (difference & ACM_CTRL_PARITY) + if (newctrl & ACM_CTRL_PARITY) acm->iocount.parity++; - if (difference & ACM_CTRL_OVERRUN) + if (newctrl & ACM_CTRL_OVERRUN) acm->iocount.overrun++; spin_unlock(&acm->read_lock); -- GitLab From 6f053e36bda96461245d0d01f374dac4f7cab54c Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 4 Oct 2018 15:49:06 +0200 Subject: [PATCH 1244/2269] cdc-acm: fix race between reset and control messaging commit 9397940ed812b942c520e0c25ed4b2c64d57e8b9 upstream. If a device splits up a control message and a reset() happens between the parts, the message is lost and already recieved parts must be dropped. Signed-off-by: Oliver Neukum Fixes: 1aba579f3cf51 ("cdc-acm: handle read pipe errors") Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 5a703c33de1c9..e41d00bc7e974 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1654,6 +1654,7 @@ static int acm_pre_reset(struct usb_interface *intf) struct acm *acm = usb_get_intfdata(intf); clear_bit(EVENT_RX_STALL, &acm->flags); + acm->nb_index = 0; /* pending control transfers are lost */ return 0; } -- GitLab From 89cd15c962405428bcd11c5f34127698f29bbd60 Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Fri, 5 Oct 2018 16:17:44 -0600 Subject: [PATCH 1245/2269] usb: usbip: Fix BUG: KASAN: slab-out-of-bounds in vhci_hub_control() commit 81f7567c51ad97668d1c3a48e8ecc482e64d4161 upstream. vhci_hub_control() accesses port_status array with out of bounds port value. Fix it to reference port_status[] only with a valid rhport value when invalid_rhport flag is true. The invalid_rhport flag is set early on after detecting in port value is within the bounds or not. The following is used reproduce the problem and verify the fix: C reproducer: https://syzkaller.appspot.com/x/repro.c?x=14ed8ab6400000 Reported-by: syzbot+bccc1fe10b70fadc78d0@syzkaller.appspotmail.com Cc: stable Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vhci_hcd.c | 57 ++++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 15 deletions(-) diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index 05aa1ba351b6d..84e2d7edaa5c7 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -332,8 +332,9 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, struct vhci_hcd *vhci_hcd; struct vhci *vhci; int retval = 0; - int rhport; + int rhport = -1; unsigned long flags; + bool invalid_rhport = false; u32 prev_port_status[VHCI_HC_PORTS]; @@ -348,9 +349,19 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, usbip_dbg_vhci_rh("typeReq %x wValue %x wIndex %x\n", typeReq, wValue, wIndex); - if (wIndex > VHCI_HC_PORTS) - pr_err("invalid port number %d\n", wIndex); - rhport = wIndex - 1; + /* + * wIndex can be 0 for some request types (typeReq). rhport is + * in valid range when wIndex >= 1 and < VHCI_HC_PORTS. + * + * Reference port_status[] only with valid rhport when + * invalid_rhport is false. + */ + if (wIndex < 1 || wIndex > VHCI_HC_PORTS) { + invalid_rhport = true; + if (wIndex > VHCI_HC_PORTS) + pr_err("invalid port number %d\n", wIndex); + } else + rhport = wIndex - 1; vhci_hcd = hcd_to_vhci_hcd(hcd); vhci = vhci_hcd->vhci; @@ -359,8 +370,9 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, /* store old status and compare now and old later */ if (usbip_dbg_flag_vhci_rh) { - memcpy(prev_port_status, vhci_hcd->port_status, - sizeof(prev_port_status)); + if (!invalid_rhport) + memcpy(prev_port_status, vhci_hcd->port_status, + sizeof(prev_port_status)); } switch (typeReq) { @@ -368,8 +380,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, usbip_dbg_vhci_rh(" ClearHubFeature\n"); break; case ClearPortFeature: - if (rhport < 0) + if (invalid_rhport) { + pr_err("invalid port number %d\n", wIndex); goto error; + } switch (wValue) { case USB_PORT_FEAT_SUSPEND: if (hcd->speed == HCD_USB3) { @@ -429,9 +443,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, break; case GetPortStatus: usbip_dbg_vhci_rh(" GetPortStatus port %x\n", wIndex); - if (wIndex < 1) { + if (invalid_rhport) { pr_err("invalid port number %d\n", wIndex); retval = -EPIPE; + goto error; } /* we do not care about resume. */ @@ -527,16 +542,20 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, goto error; } - if (rhport < 0) + if (invalid_rhport) { + pr_err("invalid port number %d\n", wIndex); goto error; + } vhci_hcd->port_status[rhport] |= USB_PORT_STAT_SUSPEND; break; case USB_PORT_FEAT_POWER: usbip_dbg_vhci_rh( " SetPortFeature: USB_PORT_FEAT_POWER\n"); - if (rhport < 0) + if (invalid_rhport) { + pr_err("invalid port number %d\n", wIndex); goto error; + } if (hcd->speed == HCD_USB3) vhci_hcd->port_status[rhport] |= USB_SS_PORT_STAT_POWER; else @@ -545,8 +564,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, case USB_PORT_FEAT_BH_PORT_RESET: usbip_dbg_vhci_rh( " SetPortFeature: USB_PORT_FEAT_BH_PORT_RESET\n"); - if (rhport < 0) + if (invalid_rhport) { + pr_err("invalid port number %d\n", wIndex); goto error; + } /* Applicable only for USB3.0 hub */ if (hcd->speed != HCD_USB3) { pr_err("USB_PORT_FEAT_BH_PORT_RESET req not " @@ -557,8 +578,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, case USB_PORT_FEAT_RESET: usbip_dbg_vhci_rh( " SetPortFeature: USB_PORT_FEAT_RESET\n"); - if (rhport < 0) + if (invalid_rhport) { + pr_err("invalid port number %d\n", wIndex); goto error; + } /* if it's already enabled, disable */ if (hcd->speed == HCD_USB3) { vhci_hcd->port_status[rhport] = 0; @@ -579,8 +602,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, default: usbip_dbg_vhci_rh(" SetPortFeature: default %d\n", wValue); - if (rhport < 0) + if (invalid_rhport) { + pr_err("invalid port number %d\n", wIndex); goto error; + } if (hcd->speed == HCD_USB3) { if ((vhci_hcd->port_status[rhport] & USB_SS_PORT_STAT_POWER) != 0) { @@ -622,7 +647,7 @@ error: if (usbip_dbg_flag_vhci_rh) { pr_debug("port %d\n", rhport); /* Only dump valid port status */ - if (rhport >= 0) { + if (!invalid_rhport) { dump_port_status_diff(prev_port_status[rhport], vhci_hcd->port_status[rhport], hcd->speed == HCD_USB3); @@ -632,8 +657,10 @@ error: spin_unlock_irqrestore(&vhci->lock, flags); - if ((vhci_hcd->port_status[rhport] & PORT_C_MASK) != 0) + if (!invalid_rhport && + (vhci_hcd->port_status[rhport] & PORT_C_MASK) != 0) { usb_hcd_poll_rh_status(hcd); + } return retval; } -- GitLab From 509015954a0c67ba7c34dadaf0d3fffde8c482c7 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 16 Oct 2018 12:16:45 +0200 Subject: [PATCH 1246/2269] usb: gadget: storage: Fix Spectre v1 vulnerability commit 9ae24af3669111d418242caec8dd4ebd9ba26860 upstream. num can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/usb/gadget/function/f_mass_storage.c:3177 fsg_lun_make() warn: potential spectre issue 'fsg_opts->common->luns' [r] (local cap) Fix this by sanitizing num before using it to index fsg_opts->common->luns Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Acked-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_mass_storage.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 5153e29870c39..25ba303295332 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -221,6 +221,8 @@ #include #include +#include + #include "configfs.h" @@ -3170,6 +3172,7 @@ static struct config_group *fsg_lun_make(struct config_group *group, fsg_opts = to_fsg_opts(&group->cg_item); if (num >= FSG_MAX_LUNS) return ERR_PTR(-ERANGE); + num = array_index_nospec(num, FSG_MAX_LUNS); mutex_lock(&fsg_opts->lock); if (fsg_opts->refcnt || fsg_opts->common->luns[num]) { -- GitLab From dad71807595250788821346805ee8976efcc7f29 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 15 Oct 2018 16:55:04 -0400 Subject: [PATCH 1247/2269] USB: fix the usbfs flag sanitization for control transfers commit 665c365a77fbfeabe52694aedf3446d5f2f1ce42 upstream. Commit 7a68d9fb8510 ("USB: usbdevfs: sanitize flags more") checks the transfer flags for URBs submitted from userspace via usbfs. However, the check for whether the USBDEVFS_URB_SHORT_NOT_OK flag should be allowed for a control transfer was added in the wrong place, before the code has properly determined the direction of the control transfer. (Control transfers are special because for them, the direction is set by the bRequestType byte of the Setup packet rather than direction bit of the endpoint address.) This patch moves code which sets up the allow_short flag for control transfers down after is_in has been set to the correct value. Signed-off-by: Alan Stern Reported-and-tested-by: syzbot+24a30223a4b609bb802e@syzkaller.appspotmail.com Fixes: 7a68d9fb8510 ("USB: usbdevfs: sanitize flags more") CC: Oliver Neukum CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 76cb9b3649b49..492977f78fdef 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1491,8 +1491,6 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb u = 0; switch (uurb->type) { case USBDEVFS_URB_TYPE_CONTROL: - if (is_in) - allow_short = true; if (!usb_endpoint_xfer_control(&ep->desc)) return -EINVAL; /* min 8 byte setup packet */ @@ -1522,6 +1520,8 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb is_in = 0; uurb->endpoint &= ~USB_DIR_IN; } + if (is_in) + allow_short = true; snoop(&ps->dev->dev, "control urb: bRequestType=%02x " "bRequest=%02x wValue=%04x " "wIndex=%04x wLength=%04x\n", -- GitLab From e5c0a5893c0443d943b0a411802e3e463664c737 Mon Sep 17 00:00:00 2001 From: Mikhail Nikiforov Date: Mon, 15 Oct 2018 11:17:56 -0700 Subject: [PATCH 1248/2269] Input: elan_i2c - add ACPI ID for Lenovo IdeaPad 330-15IGM commit 13c1c5e4d7f887cba36c5e3df3faa22071c1469f upstream. Add ELAN061C to the ACPI table to support Elan touchpad found in Lenovo IdeaPad 330-15IGM. Signed-off-by: Mikhail Nikiforov Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 696e540304fd9..766d30a7b0857 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1262,6 +1262,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0611", 0 }, { "ELAN0612", 0 }, { "ELAN0618", 0 }, + { "ELAN061C", 0 }, { "ELAN061D", 0 }, { "ELAN0622", 0 }, { "ELAN1000", 0 }, -- GitLab From 1220de22227bbb2773f31c29c33c1271f2f42890 Mon Sep 17 00:00:00 2001 From: Phil Auld Date: Mon, 8 Oct 2018 10:36:40 -0400 Subject: [PATCH 1249/2269] sched/fair: Fix throttle_list starvation with low CFS quota commit baa9be4ffb55876923dc9716abc0a448e510ba30 upstream. With a very low cpu.cfs_quota_us setting, such as the minimum of 1000, distribute_cfs_runtime may not empty the throttled_list before it runs out of runtime to distribute. In that case, due to the change from c06f04c7048 to put throttled entries at the head of the list, later entries on the list will starve. Essentially, the same X processes will get pulled off the list, given CPU time and then, when expired, get put back on the head of the list where distribute_cfs_runtime will give runtime to the same set of processes leaving the rest. Fix the issue by setting a bit in struct cfs_bandwidth when distribute_cfs_runtime is running, so that the code in throttle_cfs_rq can decide to put the throttled entry on the tail or the head of the list. The bit is set/cleared by the callers of distribute_cfs_runtime while they hold cfs_bandwidth->lock. This is easy to reproduce with a handful of CPU consumers. I use 'crash' on the live system. In some cases you can simply look at the throttled list and see the later entries are not changing: crash> list cfs_rq.throttled_list -H 0xffff90b54f6ade40 -s cfs_rq.runtime_remaining | paste - - | awk '{print $1" "$4}' | pr -t -n3 1 ffff90b56cb2d200 -976050 2 ffff90b56cb2cc00 -484925 3 ffff90b56cb2bc00 -658814 4 ffff90b56cb2ba00 -275365 5 ffff90b166a45600 -135138 6 ffff90b56cb2da00 -282505 7 ffff90b56cb2e000 -148065 8 ffff90b56cb2fa00 -872591 9 ffff90b56cb2c000 -84687 10 ffff90b56cb2f000 -87237 11 ffff90b166a40a00 -164582 crash> list cfs_rq.throttled_list -H 0xffff90b54f6ade40 -s cfs_rq.runtime_remaining | paste - - | awk '{print $1" "$4}' | pr -t -n3 1 ffff90b56cb2d200 -994147 2 ffff90b56cb2cc00 -306051 3 ffff90b56cb2bc00 -961321 4 ffff90b56cb2ba00 -24490 5 ffff90b166a45600 -135138 6 ffff90b56cb2da00 -282505 7 ffff90b56cb2e000 -148065 8 ffff90b56cb2fa00 -872591 9 ffff90b56cb2c000 -84687 10 ffff90b56cb2f000 -87237 11 ffff90b166a40a00 -164582 Sometimes it is easier to see by finding a process getting starved and looking at the sched_info: crash> task ffff8eb765994500 sched_info PID: 7800 TASK: ffff8eb765994500 CPU: 16 COMMAND: "cputest" sched_info = { pcount = 8, run_delay = 697094208, last_arrival = 240260125039, last_queued = 240260327513 }, crash> task ffff8eb765994500 sched_info PID: 7800 TASK: ffff8eb765994500 CPU: 16 COMMAND: "cputest" sched_info = { pcount = 8, run_delay = 697094208, last_arrival = 240260125039, last_queued = 240260327513 }, Signed-off-by: Phil Auld Reviewed-by: Ben Segall Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: c06f04c70489 ("sched: Fix potential near-infinite distribute_cfs_runtime() loop") Link: http://lkml.kernel.org/r/20181008143639.GA4019@pauld.bos.csb Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/fair.c | 22 +++++++++++++++++++--- kernel/sched/sched.h | 2 ++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b2d699f283046..19bfa21f7197f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4299,9 +4299,13 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq) /* * Add to the _head_ of the list, so that an already-started - * distribute_cfs_runtime will not see us + * distribute_cfs_runtime will not see us. If disribute_cfs_runtime is + * not running add to the tail so that later runqueues don't get starved. */ - list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); + if (cfs_b->distribute_running) + list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); + else + list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); /* * If we're the first throttled task, make sure the bandwidth @@ -4445,14 +4449,16 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) * in us over-using our runtime if it is all used during this loop, but * only by limited amounts in that extreme case. */ - while (throttled && cfs_b->runtime > 0) { + while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) { runtime = cfs_b->runtime; + cfs_b->distribute_running = 1; raw_spin_unlock(&cfs_b->lock); /* we can't nest cfs_b->lock while distributing bandwidth */ runtime = distribute_cfs_runtime(cfs_b, runtime, runtime_expires); raw_spin_lock(&cfs_b->lock); + cfs_b->distribute_running = 0; throttled = !list_empty(&cfs_b->throttled_cfs_rq); cfs_b->runtime -= min(runtime, cfs_b->runtime); @@ -4563,6 +4569,11 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) /* confirm we're still not at a refresh boundary */ raw_spin_lock(&cfs_b->lock); + if (cfs_b->distribute_running) { + raw_spin_unlock(&cfs_b->lock); + return; + } + if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) { raw_spin_unlock(&cfs_b->lock); return; @@ -4572,6 +4583,9 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) runtime = cfs_b->runtime; expires = cfs_b->runtime_expires; + if (runtime) + cfs_b->distribute_running = 1; + raw_spin_unlock(&cfs_b->lock); if (!runtime) @@ -4582,6 +4596,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) raw_spin_lock(&cfs_b->lock); if (expires == cfs_b->runtime_expires) cfs_b->runtime -= min(runtime, cfs_b->runtime); + cfs_b->distribute_running = 0; raw_spin_unlock(&cfs_b->lock); } @@ -4690,6 +4705,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) cfs_b->period_timer.function = sched_cfs_period_timer; hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); cfs_b->slack_timer.function = sched_cfs_slack_timer; + cfs_b->distribute_running = 0; } static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b29376169f3f8..63d999dfec80c 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -288,6 +288,8 @@ struct cfs_bandwidth { /* statistics */ int nr_periods, nr_throttled; u64 throttled_time; + + bool distribute_running; #endif }; -- GitLab From 1595a964d7afb79530770a2dd6fc83b1676faef0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Oct 2018 12:38:26 +0200 Subject: [PATCH 1250/2269] x86/tsc: Force inlining of cyc2ns bits commit 4907c68abd3f60f650f98d5a69d4ec77c0bde44f upstream. Looking at the asm for native_sched_clock() I noticed we don't inline enough. Mostly caused by sharing code with cyc2ns_read_begin(), which we didn't used to do. So mark all that __force_inline to make it DTRT. Fixes: 59eaef78bfea ("x86/tsc: Remodel cyc2ns to use seqcount_latch()") Reported-by: Eric Dumazet Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: hpa@zytor.com Cc: eric.dumazet@gmail.com Cc: bp@alien8.de Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181011104019.695196158@infradead.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/tsc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 0bf06fa3027e0..36d02484e384d 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -60,7 +60,7 @@ struct cyc2ns { static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns); -void cyc2ns_read_begin(struct cyc2ns_data *data) +void __always_inline cyc2ns_read_begin(struct cyc2ns_data *data) { int seq, idx; @@ -77,7 +77,7 @@ void cyc2ns_read_begin(struct cyc2ns_data *data) } while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence))); } -void cyc2ns_read_end(void) +void __always_inline cyc2ns_read_end(void) { preempt_enable_notrace(); } @@ -123,7 +123,7 @@ static void cyc2ns_init(int cpu) seqcount_init(&c2n->seq); } -static inline unsigned long long cycles_2_ns(unsigned long long cyc) +static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc) { struct cyc2ns_data data; unsigned long long ns; -- GitLab From 7f6273f584fd74bcee9aa16db3f0e2fbcae48e86 Mon Sep 17 00:00:00 2001 From: Zhimin Gu Date: Fri, 21 Sep 2018 14:26:24 +0800 Subject: [PATCH 1251/2269] x86, hibernate: Fix nosave_regions setup for hibernation commit cc55f7537db6af371e9c1c6a71161ee40f918824 upstream. On 32bit systems, nosave_regions(non RAM areas) located between max_low_pfn and max_pfn are not excluded from hibernation snapshot currently, which may result in a machine check exception when trying to access these unsafe regions during hibernation: [ 612.800453] Disabling lock debugging due to kernel taint [ 612.805786] mce: [Hardware Error]: CPU 0: Machine Check Exception: 5 Bank 6: fe00000000801136 [ 612.814344] mce: [Hardware Error]: RIP !INEXACT! 60:<00000000d90be566> {swsusp_save+0x436/0x560} [ 612.823167] mce: [Hardware Error]: TSC 1f5939fe276 ADDR dd000000 MISC 30e0000086 [ 612.830677] mce: [Hardware Error]: PROCESSOR 0:306c3 TIME 1529487426 SOCKET 0 APIC 0 microcode 24 [ 612.839581] mce: [Hardware Error]: Run the above through 'mcelog --ascii' [ 612.846394] mce: [Hardware Error]: Machine check: Processor context corrupt [ 612.853380] Kernel panic - not syncing: Fatal machine check [ 612.858978] Kernel Offset: 0x18000000 from 0xc1000000 (relocation range: 0xc0000000-0xf7ffdfff) This is because on 32bit systems, pages above max_low_pfn are regarded as high memeory, and accessing unsafe pages might cause expected MCE. On the problematic 32bit system, there are reserved memory above low memory, which triggered the MCE: e820 memory mapping: [ 0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000009d7ff] usable [ 0.000000] BIOS-e820: [mem 0x000000000009d800-0x000000000009ffff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000000e0000-0x00000000000fffff] reserved [ 0.000000] BIOS-e820: [mem 0x0000000000100000-0x00000000d160cfff] usable [ 0.000000] BIOS-e820: [mem 0x00000000d160d000-0x00000000d1613fff] ACPI NVS [ 0.000000] BIOS-e820: [mem 0x00000000d1614000-0x00000000d1a44fff] usable [ 0.000000] BIOS-e820: [mem 0x00000000d1a45000-0x00000000d1ecffff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000d1ed0000-0x00000000d7eeafff] usable [ 0.000000] BIOS-e820: [mem 0x00000000d7eeb000-0x00000000d7ffffff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000d8000000-0x00000000d875ffff] usable [ 0.000000] BIOS-e820: [mem 0x00000000d8760000-0x00000000d87fffff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000d8800000-0x00000000d8fadfff] usable [ 0.000000] BIOS-e820: [mem 0x00000000d8fae000-0x00000000d8ffffff] ACPI data [ 0.000000] BIOS-e820: [mem 0x00000000d9000000-0x00000000da71bfff] usable [ 0.000000] BIOS-e820: [mem 0x00000000da71c000-0x00000000da7fffff] ACPI NVS [ 0.000000] BIOS-e820: [mem 0x00000000da800000-0x00000000dbb8bfff] usable [ 0.000000] BIOS-e820: [mem 0x00000000dbb8c000-0x00000000dbffffff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000dd000000-0x00000000df1fffff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000f8000000-0x00000000fbffffff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000fec00000-0x00000000fec00fff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000fed00000-0x00000000fed03fff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000fed1c000-0x00000000fed1ffff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000fee00000-0x00000000fee00fff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000ff000000-0x00000000ffffffff] reserved [ 0.000000] BIOS-e820: [mem 0x0000000100000000-0x000000041edfffff] usable Fix this problem by changing pfn limit from max_low_pfn to max_pfn. This fix does not impact 64bit system because on 64bit max_low_pfn is the same as max_pfn. Signed-off-by: Zhimin Gu Acked-by: Pavel Machek Signed-off-by: Chen Yu Acked-by: Thomas Gleixner Cc: All applicable Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index dcb00acb6583c..4bc12447a50fc 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1287,7 +1287,7 @@ void __init setup_arch(char **cmdline_p) kvm_guest_init(); e820__reserve_resources(); - e820__register_nosave_regions(max_low_pfn); + e820__register_nosave_regions(max_pfn); x86_init.resources.reserve_resources(); -- GitLab From 577bfbe31fa9da8c8ed1c045a9be8d3cc21f1664 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Oct 2018 12:38:27 +0200 Subject: [PATCH 1252/2269] x86/percpu: Fix this_cpu_read() commit b59167ac7bafd804c91e49ad53c6d33a7394d4c8 upstream. Eric reported that a sequence count loop using this_cpu_read() got optimized out. This is wrong, this_cpu_read() must imply READ_ONCE() because the interface is IRQ-safe, therefore an interrupt can have changed the per-cpu value. Fixes: 7c3576d261ce ("[PATCH] i386: Convert PDA into the percpu section") Reported-by: Eric Dumazet Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Acked-by: Eric Dumazet Cc: hpa@zytor.com Cc: eric.dumazet@gmail.com Cc: bp@alien8.de Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181011104019.748208519@infradead.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/percpu.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index ba3c523aaf161..12aa2bb6bac4b 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -185,22 +185,22 @@ do { \ typeof(var) pfo_ret__; \ switch (sizeof(var)) { \ case 1: \ - asm(op "b "__percpu_arg(1)",%0" \ + asm volatile(op "b "__percpu_arg(1)",%0"\ : "=q" (pfo_ret__) \ : "m" (var)); \ break; \ case 2: \ - asm(op "w "__percpu_arg(1)",%0" \ + asm volatile(op "w "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ case 4: \ - asm(op "l "__percpu_arg(1)",%0" \ + asm volatile(op "l "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ case 8: \ - asm(op "q "__percpu_arg(1)",%0" \ + asm volatile(op "q "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ -- GitLab From 70931375b112460ab15d601a1e89c6f2d0701012 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 12 Oct 2018 17:53:12 -0700 Subject: [PATCH 1253/2269] x86/time: Correct the attribute on jiffies' definition commit 53c13ba8ed39e89f21a0b98f4c8a241bb44e483d upstream. Clang warns that the declaration of jiffies in include/linux/jiffies.h doesn't match the definition in arch/x86/time/kernel.c: arch/x86/kernel/time.c:29:42: warning: section does not match previous declaration [-Wsection] __visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES; ^ ./include/linux/cache.h:49:4: note: expanded from macro '__cacheline_aligned' __section__(".data..cacheline_aligned"))) ^ ./include/linux/jiffies.h:81:31: note: previous attribute is here extern unsigned long volatile __cacheline_aligned_in_smp __jiffy_arch_data jiffies; ^ ./arch/x86/include/asm/cache.h:20:2: note: expanded from macro '__cacheline_aligned_in_smp' __page_aligned_data ^ ./include/linux/linkage.h:39:29: note: expanded from macro '__page_aligned_data' #define __page_aligned_data __section(.data..page_aligned) __aligned(PAGE_SIZE) ^ ./include/linux/compiler_attributes.h:233:56: note: expanded from macro '__section' #define __section(S) __attribute__((__section__(#S))) ^ 1 warning generated. The declaration was changed in commit 7c30f352c852 ("jiffies.h: declare jiffies and jiffies_64 with ____cacheline_aligned_in_smp") but wasn't updated here. Make them match so Clang no longer warns. Fixes: 7c30f352c852 ("jiffies.h: declare jiffies and jiffies_64 with ____cacheline_aligned_in_smp") Signed-off-by: Nathan Chancellor Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Nick Desaulniers Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181013005311.28617-1-natechancellor@gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 49a5c394f3ed4..ab0176ae985b8 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -25,7 +25,7 @@ #include #ifdef CONFIG_X86_64 -__visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES; +__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES; #endif unsigned long profile_pc(struct pt_regs *regs) -- GitLab From ce4454ff2b2de585f059b011dc1b77205f76d607 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 16 Oct 2018 22:25:25 +0200 Subject: [PATCH 1254/2269] x86/fpu: Fix i486 + no387 boot crash by only saving FPU registers on context switch if there is an FPU commit 2224d616528194b02424c91c2ee254b3d29942c3 upstream. Booting an i486 with "no387 nofxsr" ends with with the following crash: math_emulate: 0060:c101987d Kernel panic - not syncing: Math emulation needed in kernel on the first context switch in user land. The reason is that copy_fpregs_to_fpstate() tries FNSAVE which does not work as the FPU is turned off. This bug was introduced in: f1c8cd0176078 ("x86/fpu: Change fpu->fpregs_active users to fpu->fpstate_active") Add a check for X86_FEATURE_FPU before trying to save FPU registers (we have such a check in switch_fpu_finish() already). Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: f1c8cd0176078 ("x86/fpu: Change fpu->fpregs_active users to fpu->fpstate_active") Link: http://lkml.kernel.org/r/20181016202525.29437-4-bigeasy@linutronix.de Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/fpu/internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index a38bf5a1e37ad..69dcdf195b611 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -528,7 +528,7 @@ static inline void fpregs_activate(struct fpu *fpu) static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { - if (old_fpu->initialized) { + if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) { if (!copy_fpregs_to_fpstate(old_fpu)) old_fpu->last_cpu = -1; else -- GitLab From 3a2b1d50bb294b7d5c31308ae4c04a87b6c82176 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 16 Jan 2018 10:33:05 +0100 Subject: [PATCH 1255/2269] net: fs_enet: do not call phy_stop() in interrupts [ Upstream commit f8b39039cbf2a15f2b8c9f081e1cbd5dee00aaf5 ] In case of TX timeout, fs_timeout() calls phy_stop(), which triggers the following BUG_ON() as we are in interrupt. [92708.199889] kernel BUG at drivers/net/phy/mdio_bus.c:482! [92708.204985] Oops: Exception in kernel mode, sig: 5 [#1] [92708.210119] PREEMPT [92708.212107] CMPC885 [92708.214216] CPU: 0 PID: 3 Comm: ksoftirqd/0 Tainted: G W 4.9.61 #39 [92708.223227] task: c60f0a40 task.stack: c6104000 [92708.227697] NIP: c02a84bc LR: c02a947c CTR: c02a93d8 [92708.232614] REGS: c6105c70 TRAP: 0700 Tainted: G W (4.9.61) [92708.241193] MSR: 00021032 [92708.244818] CR: 24000822 XER: 20000000 [92708.248767] GPR00: c02a947c c6105d20 c60f0a40 c62b4c00 00000005 0000001f c069aad8 0001a688 GPR08: 00000007 00000100 c02a93d8 00000000 000005fc 00000000 c6213240 c06338e4 GPR16: 00000001 c06330d4 c0633094 00000000 c0680000 c6104000 c6104000 00000000 GPR24: 00000200 00000000 ffffffff 00000004 00000078 00009032 00000000 c62b4c00 NIP [c02a84bc] mdiobus_read+0x20/0x74 [92708.281517] LR [c02a947c] kszphy_config_intr+0xa4/0xc4 [92708.286547] Call Trace: [92708.288980] [c6105d20] [c6104000] 0xc6104000 (unreliable) [92708.294339] [c6105d40] [c02a947c] kszphy_config_intr+0xa4/0xc4 [92708.300098] [c6105d50] [c02a5330] phy_stop+0x60/0x9c [92708.305007] [c6105d60] [c02c84d0] fs_timeout+0xdc/0x110 [92708.310197] [c6105d80] [c035cd48] dev_watchdog+0x268/0x2a0 [92708.315593] [c6105db0] [c0060288] call_timer_fn+0x34/0x17c [92708.321014] [c6105dd0] [c00605f0] run_timer_softirq+0x21c/0x2e4 [92708.326887] [c6105e50] [c001e19c] __do_softirq+0xf4/0x2f4 [92708.332207] [c6105eb0] [c001e3c8] run_ksoftirqd+0x2c/0x40 [92708.337560] [c6105ec0] [c003b420] smpboot_thread_fn+0x1f0/0x258 [92708.343405] [c6105ef0] [c003745c] kthread+0xbc/0xd0 [92708.348217] [c6105f40] [c000c400] ret_from_kernel_thread+0x5c/0x64 [92708.354275] Instruction dump: [92708.357207] 7c0803a6 bbc10018 38210020 4e800020 7c0802a6 9421ffe0 54290024 bfc10018 [92708.364865] 90010024 7c7f1b78 81290008 552902ee <0f090000> 3bc3002c 7fc3f378 90810008 [92708.372711] ---[ end trace 42b05441616fafd7 ]--- This patch moves fs_timeout() actions into an async worker. Fixes: commit 48257c4f168e5 ("Add fs_enet ethernet network driver, for several embedded platforms") Signed-off-by: Christophe Leroy Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../ethernet/freescale/fs_enet/fs_enet-main.c | 16 +++++++++++++--- drivers/net/ethernet/freescale/fs_enet/fs_enet.h | 1 + 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index 753259091b227..28bd4cf61741b 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -613,9 +613,11 @@ static int fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static void fs_timeout(struct net_device *dev) +static void fs_timeout_work(struct work_struct *work) { - struct fs_enet_private *fep = netdev_priv(dev); + struct fs_enet_private *fep = container_of(work, struct fs_enet_private, + timeout_work); + struct net_device *dev = fep->ndev; unsigned long flags; int wake = 0; @@ -627,7 +629,6 @@ static void fs_timeout(struct net_device *dev) phy_stop(dev->phydev); (*fep->ops->stop)(dev); (*fep->ops->restart)(dev); - phy_start(dev->phydev); } phy_start(dev->phydev); @@ -639,6 +640,13 @@ static void fs_timeout(struct net_device *dev) netif_wake_queue(dev); } +static void fs_timeout(struct net_device *dev) +{ + struct fs_enet_private *fep = netdev_priv(dev); + + schedule_work(&fep->timeout_work); +} + /*----------------------------------------------------------------------------- * generic link-change handler - should be sufficient for most cases *-----------------------------------------------------------------------------*/ @@ -759,6 +767,7 @@ static int fs_enet_close(struct net_device *dev) netif_stop_queue(dev); netif_carrier_off(dev); napi_disable(&fep->napi); + cancel_work_sync(&fep->timeout_work); phy_stop(dev->phydev); spin_lock_irqsave(&fep->lock, flags); @@ -1019,6 +1028,7 @@ static int fs_enet_probe(struct platform_device *ofdev) ndev->netdev_ops = &fs_enet_netdev_ops; ndev->watchdog_timeo = 2 * HZ; + INIT_WORK(&fep->timeout_work, fs_timeout_work); netif_napi_add(ndev, &fep->napi, fs_enet_napi, fpi->napi_weight); ndev->ethtool_ops = &fs_ethtool_ops; diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h index 168e10ea487f6..837c802ca3024 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h @@ -125,6 +125,7 @@ struct fs_enet_private { spinlock_t lock; /* during all ops except TX pckt processing */ spinlock_t tx_lock; /* during fs_start_xmit and fs_tx */ struct fs_platform_info *fpi; + struct work_struct timeout_work; const struct fs_ops *ops; int rx_ring, tx_ring; dma_addr_t ring_mem_addr; -- GitLab From 0b047cbc44ae7d0cea41a99cd7ec1f009360a605 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 10 Nov 2018 07:48:36 -0800 Subject: [PATCH 1256/2269] Linux 4.14.80 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 57a007bf1181e..f4cad5e035617 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 79 +SUBLEVEL = 80 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 97c308ca4091e113432090ceaf242b9985d8c7c4 Mon Sep 17 00:00:00 2001 From: Peter Kalauskas Date: Thu, 8 Nov 2018 11:15:41 -0800 Subject: [PATCH 1257/2269] ANDROID: zram: set comp_len to PAGE_SIZE when page is huge This bug was introduced when two patches were applied out of order. * zram: drop max_zpage_size and use zs_huge_class_size() * zram: mark incompressible page as ZRAM_HUGE Signed-off-by: Peter Kalauskas Bug: 119260394 Change-Id: I7b34670b1fcb749c23d3765cd1c1592df25ad741 --- drivers/block/zram/zram_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 06f2623d01990..fc1c7845f1416 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1102,6 +1102,7 @@ compress_again: } if (unlikely(comp_len >= huge_class_size)) { + comp_len = PAGE_SIZE; if (zram_wb_enabled(zram) && allow_wb) { zcomp_stream_put(zram->comp); ret = write_to_bdev(zram, bvec, index, bio, &element); -- GitLab From 31354285b2467a5838f131786181dd706b28688b Mon Sep 17 00:00:00 2001 From: Liu Xiang Date: Tue, 28 Aug 2018 22:32:57 +0800 Subject: [PATCH 1258/2269] mtd: spi-nor: fsl-quadspi: fix read error for flash size larger than 16MB commit 41fe242979e463d6ad251077ded01b825a330b7e upstream. If the size of spi-nor flash is larger than 16MB, the read_opcode is set to SPINOR_OP_READ_1_1_4_4B, and fsl_qspi_get_seqid() will return -EINVAL when cmd is SPINOR_OP_READ_1_1_4_4B. This can cause read operation fail. Fixes: e46ecda764dc ("mtd: spi-nor: Add Freescale QuadSPI driver") Cc: Signed-off-by: Liu Xiang Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/spi-nor/fsl-quadspi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c index f17d22435bfcf..62f5763482b3b 100644 --- a/drivers/mtd/spi-nor/fsl-quadspi.c +++ b/drivers/mtd/spi-nor/fsl-quadspi.c @@ -468,6 +468,7 @@ static int fsl_qspi_get_seqid(struct fsl_qspi *q, u8 cmd) { switch (cmd) { case SPINOR_OP_READ_1_1_4: + case SPINOR_OP_READ_1_1_4_4B: return SEQID_READ; case SPINOR_OP_WREN: return SEQID_WREN; -- GitLab From 615bda31cc01cfe937c35c8b969807a38614a0a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 11 Oct 2018 09:42:17 +0200 Subject: [PATCH 1259/2269] spi: bcm-qspi: switch back to reading flash using smaller chunks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 940ec770c295682993d1cccce3081fd7c74fece8 upstream. Fixing/optimizing bcm_qspi_bspi_read() performance introduced two changes: 1) It added a loop to read all requested data using multiple BSPI ops. 2) It bumped max size of a single BSPI block request from 256 to 512 B. The later change resulted in occasional BSPI timeouts causing a regression. For some unknown reason hardware doesn't always handle reads as expected when using 512 B chunks. In such cases it may happen that BSPI returns amount of requested bytes without the last 1-3 ones. It provides the remaining bytes later but doesn't raise an interrupt until another LR start. Switching back to 256 B reads fixes that problem and regression. Fixes: 345309fa7c0c ("spi: bcm-qspi: Fix bcm_qspi_bspi_read() performance") Signed-off-by: Rafał Miłecki Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-bcm-qspi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index 6573152ce8936..0316fae20cfec 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -88,7 +88,7 @@ #define BSPI_BPP_MODE_SELECT_MASK BIT(8) #define BSPI_BPP_ADDR_SELECT_MASK BIT(16) -#define BSPI_READ_LENGTH 512 +#define BSPI_READ_LENGTH 256 /* MSPI register offsets */ #define MSPI_SPCR0_LSB 0x000 -- GitLab From 82239bda9dac002ffb68c6019134c5245aeb4cae Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Mon, 8 Oct 2018 20:41:08 +0800 Subject: [PATCH 1260/2269] bcache: trace missed reading by cache_missed commit 502b291568fc7faf1ebdb2c2590f12851db0ff76 upstream. Missed reading IOs are identified by s->cache_missed, not the s->cache_miss, so in trace_bcache_read() using trace_bcache_read to identify whether the IO is missed or not. Signed-off-by: Tang Junhui Cc: stable@vger.kernel.org Signed-off-by: Coly Li Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 5b63afff46d58..69b336d8c05a7 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -792,7 +792,7 @@ static void cached_dev_read_done_bh(struct closure *cl) bch_mark_cache_accounting(s->iop.c, s->d, !s->cache_missed, s->iop.bypass); - trace_bcache_read(s->orig_bio, !s->cache_miss, s->iop.bypass); + trace_bcache_read(s->orig_bio, !s->cache_missed, s->iop.bypass); if (s->iop.status) continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq); -- GitLab From dcd048d6bb08d3da1e9cb1923bce80dbc0f90fed Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Mon, 8 Oct 2018 20:41:14 +0800 Subject: [PATCH 1261/2269] bcache: fix miss key refill->end in writeback commit 2d6cb6edd2c7fb4f40998895bda45006281b1ac5 upstream. refill->end record the last key of writeback, for example, at the first time, keys (1,128K) to (1,1024K) are flush to the backend device, but the end key (1,1024K) is not included, since the bellow code: if (bkey_cmp(k, refill->end) >= 0) { ret = MAP_DONE; goto out; } And in the next time when we refill writeback keybuf again, we searched key start from (1,1024K), and got a key bigger than it, so the key (1,1024K) missed. This patch modify the above code, and let the end key to be included to the writeback key buffer. Signed-off-by: Tang Junhui Cc: stable@vger.kernel.org Signed-off-by: Coly Li Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/btree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 89d088cf95d96..9406326216f17 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -2371,7 +2371,7 @@ static int refill_keybuf_fn(struct btree_op *op, struct btree *b, struct keybuf *buf = refill->buf; int ret = MAP_CONTINUE; - if (bkey_cmp(k, refill->end) >= 0) { + if (bkey_cmp(k, refill->end) > 0) { ret = MAP_DONE; goto out; } -- GitLab From 0be4ef1290cfe370caf52e3dac2325cf7ee12007 Mon Sep 17 00:00:00 2001 From: Dmitry Bazhenov Date: Mon, 15 Oct 2018 14:21:22 +0500 Subject: [PATCH 1262/2269] hwmon: (pmbus) Fix page count auto-detection. commit e7c6a55606b5c46b449d76588968b4d8caae903f upstream. Devices with compatible="pmbus" field have zero initial page count, and pmbus_clear_faults() being called before the page count auto- detection does not actually clear faults because it depends on the page count. Non-cleared faults in its turn may fail the subsequent page count auto-detection. This patch fixes this problem by calling pmbus_clear_fault_page() for currently set page and calling pmbus_clear_faults() after the page count was detected. Cc: stable@vger.kernel.org Signed-off-by: Dmitry Bazhenov Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/pmbus/pmbus.c | 2 ++ drivers/hwmon/pmbus/pmbus_core.c | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/pmbus/pmbus.c b/drivers/hwmon/pmbus/pmbus.c index 7718e58dbda54..7688dab32f6e6 100644 --- a/drivers/hwmon/pmbus/pmbus.c +++ b/drivers/hwmon/pmbus/pmbus.c @@ -118,6 +118,8 @@ static int pmbus_identify(struct i2c_client *client, } else { info->pages = 1; } + + pmbus_clear_faults(client); } if (pmbus_check_byte_register(client, 0, PMBUS_VOUT_MODE)) { diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index a139940cd991a..924f3ca41c65a 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -1802,7 +1802,10 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data, if (ret >= 0 && (ret & PB_CAPABILITY_ERROR_CHECK)) client->flags |= I2C_CLIENT_PEC; - pmbus_clear_faults(client); + if (data->info->pages) + pmbus_clear_faults(client); + else + pmbus_clear_fault_page(client, -1); if (info->identify) { ret = (*info->identify)(client, info); -- GitLab From 0035cb9d7366487205aba8cc0a2c03dd9dd75742 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Sat, 6 Oct 2018 17:09:35 +0800 Subject: [PATCH 1263/2269] jffs2: free jffs2_sb_info through jffs2_kill_sb() commit 92e2921f7eee63450a5f953f4b15dc6210219430 upstream. When an invalid mount option is passed to jffs2, jffs2_parse_options() will fail and jffs2_sb_info will be freed, but then jffs2_sb_info will be used (use-after-free) and freeed (double-free) in jffs2_kill_sb(). Fix it by removing the buggy invocation of kfree() when getting invalid mount options. Fixes: 92abc475d8de ("jffs2: implement mount option parsing and compression overriding") Cc: stable@kernel.org Signed-off-by: Hou Tao Reviewed-by: Richard Weinberger Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- fs/jffs2/super.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 33e01de576d26..bc00cc385b772 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -285,10 +285,8 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent) sb->s_fs_info = c; ret = jffs2_parse_options(c, data); - if (ret) { - kfree(c); + if (ret) return -EINVAL; - } /* Initialize JFFS2 superblock locks, the further initialization will * be done later */ -- GitLab From f721267b1f28c13efab07bc80c28753e9fbec00e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Oct 2018 23:21:05 +0200 Subject: [PATCH 1264/2269] cpufreq: conservative: Take limits changes into account properly commit da5e79bc70b84971d2b3a55fb252e34e51d81d48 upstream. If the policy limits change between invocations of cs_dbs_update(), the requested frequency value stored in dbs_info may not be updated and the function may use a stale value of it next time. Moreover, if idle periods are takem into account by cs_dbs_update(), the requested frequency value stored in dbs_info may be below the min policy limit, which is incorrect. To fix these problems, always update the requested frequency value in dbs_info along with the local copy of it when the previous requested frequency is beyond the policy limits and avoid decreasing the requested frequency below the min policy limit when taking idle periods into account. Fixes: abb6627910a1 (cpufreq: conservative: Fix next frequency selection) Fixes: 00bfe05889e9 (cpufreq: conservative: Decrease frequency faster for deferred updates) Reported-by: Waldemar Rymarkiewicz Cc: All applicable Signed-off-by: Rafael J. Wysocki Acked-by: Waldemar Rymarkiewicz Acked-by: Viresh Kumar Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq_conservative.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index f20f20a77d4d3..4268f87e99fcf 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -80,8 +80,10 @@ static unsigned int cs_dbs_update(struct cpufreq_policy *policy) * changed in the meantime, so fall back to current frequency in that * case. */ - if (requested_freq > policy->max || requested_freq < policy->min) + if (requested_freq > policy->max || requested_freq < policy->min) { requested_freq = policy->cur; + dbs_info->requested_freq = requested_freq; + } freq_step = get_freq_step(cs_tuners, policy); @@ -92,7 +94,7 @@ static unsigned int cs_dbs_update(struct cpufreq_policy *policy) if (policy_dbs->idle_periods < UINT_MAX) { unsigned int freq_steps = policy_dbs->idle_periods * freq_step; - if (requested_freq > freq_steps) + if (requested_freq > policy->min + freq_steps) requested_freq -= freq_steps; else requested_freq = policy->min; -- GitLab From e3e48da87330397de4e50b9958e5694a6764e38a Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Sun, 9 Sep 2018 01:21:06 +0200 Subject: [PATCH 1265/2269] pcmcia: Implement CLKRUN protocol disabling for Ricoh bridges commit 95691e3eddc41da2d1cd3cca51fecdfb46bd85bc upstream. Currently, "disable_clkrun" yenta_socket module parameter is only implemented for TI CardBus bridges. Add also an implementation for Ricoh bridges that have the necessary setting documented in publicly available datasheets. Tested on a RL5C476II with a Sunrich C-160 CardBus NIC that doesn't work correctly unless the CLKRUN protocol is disabled. Let's also make it clear in its description that the "disable_clkrun" module parameter only works on these two previously mentioned brands of CardBus bridges. Signed-off-by: Maciej S. Szmigiero Cc: stable@vger.kernel.org Signed-off-by: Dominik Brodowski Signed-off-by: Greg Kroah-Hartman --- drivers/pcmcia/ricoh.h | 35 +++++++++++++++++++++++++++++++++++ drivers/pcmcia/yenta_socket.c | 3 ++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/drivers/pcmcia/ricoh.h b/drivers/pcmcia/ricoh.h index 01098c841f877..8ac7b138c0948 100644 --- a/drivers/pcmcia/ricoh.h +++ b/drivers/pcmcia/ricoh.h @@ -119,6 +119,10 @@ #define RL5C4XX_MISC_CONTROL 0x2F /* 8 bit */ #define RL5C4XX_ZV_ENABLE 0x08 +/* Misc Control 3 Register */ +#define RL5C4XX_MISC3 0x00A2 /* 16 bit */ +#define RL5C47X_MISC3_CB_CLKRUN_DIS BIT(1) + #ifdef __YENTA_H #define rl_misc(socket) ((socket)->private[0]) @@ -156,6 +160,35 @@ static void ricoh_set_zv(struct yenta_socket *socket) } } +static void ricoh_set_clkrun(struct yenta_socket *socket, bool quiet) +{ + u16 misc3; + + /* + * RL5C475II likely has this setting, too, however no datasheet + * is publicly available for this chip + */ + if (socket->dev->device != PCI_DEVICE_ID_RICOH_RL5C476 && + socket->dev->device != PCI_DEVICE_ID_RICOH_RL5C478) + return; + + if (socket->dev->revision < 0x80) + return; + + misc3 = config_readw(socket, RL5C4XX_MISC3); + if (misc3 & RL5C47X_MISC3_CB_CLKRUN_DIS) { + if (!quiet) + dev_dbg(&socket->dev->dev, + "CLKRUN feature already disabled\n"); + } else if (disable_clkrun) { + if (!quiet) + dev_info(&socket->dev->dev, + "Disabling CLKRUN feature\n"); + misc3 |= RL5C47X_MISC3_CB_CLKRUN_DIS; + config_writew(socket, RL5C4XX_MISC3, misc3); + } +} + static void ricoh_save_state(struct yenta_socket *socket) { rl_misc(socket) = config_readw(socket, RL5C4XX_MISC); @@ -172,6 +205,7 @@ static void ricoh_restore_state(struct yenta_socket *socket) config_writew(socket, RL5C4XX_16BIT_IO_0, rl_io(socket)); config_writew(socket, RL5C4XX_16BIT_MEM_0, rl_mem(socket)); config_writew(socket, RL5C4XX_CONFIG, rl_config(socket)); + ricoh_set_clkrun(socket, true); } @@ -197,6 +231,7 @@ static int ricoh_override(struct yenta_socket *socket) config_writew(socket, RL5C4XX_CONFIG, config); ricoh_set_zv(socket); + ricoh_set_clkrun(socket, false); return 0; } diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c index 5d6d9b1549bc4..5034422a1d969 100644 --- a/drivers/pcmcia/yenta_socket.c +++ b/drivers/pcmcia/yenta_socket.c @@ -26,7 +26,8 @@ static bool disable_clkrun; module_param(disable_clkrun, bool, 0444); -MODULE_PARM_DESC(disable_clkrun, "If PC card doesn't function properly, please try this option"); +MODULE_PARM_DESC(disable_clkrun, + "If PC card doesn't function properly, please try this option (TI and Ricoh bridges only)"); static bool isa_probe = 1; module_param(isa_probe, bool, 0444); -- GitLab From bbde82fdb6d083d6e4c2b33c65e8752f12ce03e1 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Thu, 11 Oct 2018 12:13:01 +0200 Subject: [PATCH 1266/2269] ipmi: Fix timer race with module unload commit 0711e8c1b4572d076264e71b0002d223f2666ed7 upstream. Please note that below oops is from an older kernel, but the same race seems to be present in the upstream kernel too. ---8<--- The following panic was encountered during removing the ipmi_ssif module: [ 526.352555] Unable to handle kernel paging request at virtual address ffff000006923090 [ 526.360464] Mem abort info: [ 526.363257] ESR = 0x86000007 [ 526.366304] Exception class = IABT (current EL), IL = 32 bits [ 526.372221] SET = 0, FnV = 0 [ 526.375269] EA = 0, S1PTW = 0 [ 526.378405] swapper pgtable: 4k pages, 48-bit VAs, pgd = 000000008ae60416 [ 526.385185] [ffff000006923090] *pgd=000000bffcffe803, *pud=000000bffcffd803, *pmd=0000009f4731a003, *pte=0000000000000000 [ 526.396141] Internal error: Oops: 86000007 [#1] SMP [ 526.401008] Modules linked in: nls_iso8859_1 ipmi_devintf joydev input_leds ipmi_msghandler shpchp sch_fq_codel ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ip_tables x_tables autofs4 btrfs zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear i2c_smbus hid_generic usbhid uas hid usb_storage ast aes_ce_blk i2c_algo_bit aes_ce_cipher qede ttm crc32_ce ptp crct10dif_ce drm_kms_helper ghash_ce syscopyarea sha2_ce sysfillrect sysimgblt pps_core fb_sys_fops sha256_arm64 sha1_ce mpt3sas qed drm raid_class ahci scsi_transport_sas libahci gpio_xlp i2c_xlp9xx aes_neon_bs aes_neon_blk crypto_simd cryptd aes_arm64 [last unloaded: ipmi_ssif] [ 526.468085] CPU: 125 PID: 0 Comm: swapper/125 Not tainted 4.15.0-35-generic #38~lp1775396+build.1 [ 526.476942] Hardware name: To be filled by O.E.M. Saber/Saber, BIOS 0ACKL022 08/14/2018 [ 526.484932] pstate: 00400009 (nzcv daif +PAN -UAO) [ 526.489713] pc : 0xffff000006923090 [ 526.493198] lr : call_timer_fn+0x34/0x178 [ 526.497194] sp : ffff000009b0bdd0 [ 526.500496] x29: ffff000009b0bdd0 x28: 0000000000000082 [ 526.505796] x27: 0000000000000002 x26: ffff000009515188 [ 526.511096] x25: ffff000009515180 x24: ffff0000090f1018 [ 526.516396] x23: ffff000009519660 x22: dead000000000200 [ 526.521696] x21: ffff000006923090 x20: 0000000000000100 [ 526.526995] x19: ffff809eeb466a40 x18: 0000000000000000 [ 526.532295] x17: 000000000000000e x16: 0000000000000007 [ 526.537594] x15: 0000000000000000 x14: 071c71c71c71c71c [ 526.542894] x13: 0000000000000000 x12: 0000000000000000 [ 526.548193] x11: 0000000000000001 x10: ffff000009b0be88 [ 526.553493] x9 : 0000000000000000 x8 : 0000000000000005 [ 526.558793] x7 : ffff80befc1f8528 x6 : 0000000000000020 [ 526.564092] x5 : 0000000000000040 x4 : 0000000020001b20 [ 526.569392] x3 : 0000000000000000 x2 : ffff809eeb466a40 [ 526.574692] x1 : ffff000006923090 x0 : ffff809eeb466a40 [ 526.579992] Process swapper/125 (pid: 0, stack limit = 0x000000002eb50acc) [ 526.586854] Call trace: [ 526.589289] 0xffff000006923090 [ 526.592419] expire_timers+0xc8/0x130 [ 526.596070] run_timer_softirq+0xec/0x1b0 [ 526.600070] __do_softirq+0x134/0x328 [ 526.603726] irq_exit+0xc8/0xe0 [ 526.606857] __handle_domain_irq+0x6c/0xc0 [ 526.610941] gic_handle_irq+0x84/0x188 [ 526.614679] el1_irq+0xe8/0x180 [ 526.617822] cpuidle_enter_state+0xa0/0x328 [ 526.621993] cpuidle_enter+0x34/0x48 [ 526.625564] call_cpuidle+0x44/0x70 [ 526.629040] do_idle+0x1b8/0x1f0 [ 526.632256] cpu_startup_entry+0x2c/0x30 [ 526.636174] secondary_start_kernel+0x11c/0x130 [ 526.640694] Code: bad PC value [ 526.643800] ---[ end trace d020b0b8417c2498 ]--- [ 526.648404] Kernel panic - not syncing: Fatal exception in interrupt [ 526.654778] SMP: stopping secondary CPUs [ 526.658734] Kernel Offset: disabled [ 526.662211] CPU features: 0x5800c38 [ 526.665688] Memory Limit: none [ 526.668768] ---[ end Kernel panic - not syncing: Fatal exception in interrupt Prevent mod_timer from arming a timer that was already removed by del_timer during module unload. Signed-off-by: Jan Glauber Cc: # 3.19 Signed-off-by: Corey Minyard Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_ssif.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 932678617dfa7..0904ab442d31d 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -621,8 +621,9 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, flags = ipmi_ssif_lock_cond(ssif_info, &oflags); ssif_info->waiting_alert = true; ssif_info->rtc_us_timer = SSIF_MSG_USEC; - mod_timer(&ssif_info->retry_timer, - jiffies + SSIF_MSG_JIFFIES); + if (!ssif_info->stopping) + mod_timer(&ssif_info->retry_timer, + jiffies + SSIF_MSG_JIFFIES); ipmi_ssif_unlock_cond(ssif_info, flags); return; } @@ -954,8 +955,9 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result, ssif_info->waiting_alert = true; ssif_info->retries_left = SSIF_RECV_RETRIES; ssif_info->rtc_us_timer = SSIF_MSG_PART_USEC; - mod_timer(&ssif_info->retry_timer, - jiffies + SSIF_MSG_PART_JIFFIES); + if (!ssif_info->stopping) + mod_timer(&ssif_info->retry_timer, + jiffies + SSIF_MSG_PART_JIFFIES); ipmi_ssif_unlock_cond(ssif_info, flags); } } -- GitLab From 472e300014bc0b3dde10b7ff224197c678a6d3d9 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sat, 6 Oct 2018 13:11:30 -0400 Subject: [PATCH 1267/2269] parisc: Fix address in HPMC IVA commit 1138b6718ff74d2a934459643e3754423d23b5e2 upstream. Helge noticed that the address of the os_hpmc handler was not being correctly calculated in the hpmc macro. As a result, PDCE_CHECK would fail to call os_hpmc: e800009802e00000 0000000000000000 CC_ERR_CHECK_HPMC 37000f7302e00000 8040004000000000 CC_ERR_CPU_CHECK_SUMMARY f600105e02e00000 fffffff0f0c00000 CC_MC_HPMC_MONARCH_SELECTED 140003b202e00000 000000000000000b CC_ERR_HPMC_STATE_ENTRY 5600100b02e00000 00000000000001a0 CC_MC_OS_HPMC_LEN_ERR 5600106402e00000 fffffff0f0438e70 CC_MC_BR_TO_OS_HPMC_FAILED e800009802e00000 0000000000000000 CC_ERR_CHECK_HPMC 37000f7302e00000 8040004000000000 CC_ERR_CPU_CHECK_SUMMARY 4000109f02e00000 0000000000000000 CC_MC_HPMC_INITIATED 4000101902e00000 0000000000000000 CC_MC_MULTIPLE_HPMCS 030010d502e00000 0000000000000000 CC_CPU_STOP The address problem can be seen by dumping the fault vector: 0000000040159000 : 40159000: 63 6f 77 73 stb r15,-2447(dp) 40159004: 20 63 61 6e ldil L%b747000,r3 40159008: 20 66 6c 79 ldil L%-1c3b3000,r3 ... 40159020: 08 00 02 40 nop 40159024: 20 6e 60 02 ldil L%15d000,r3 40159028: 34 63 00 00 ldo 0(r3),r3 4015902c: e8 60 c0 02 bv,n r0(r3) 40159030: 08 00 02 40 nop 40159034: 00 00 00 00 break 0,0 40159038: c0 00 70 00 bb,*< r0,sar,40159840 4015903c: 00 00 00 00 break 0,0 Location 40159038 should contain the physical address of os_hpmc: 000000004015d000 : 4015d000: 08 1a 02 43 copy r26,r3 4015d004: 01 c0 08 a4 mfctl iva,r4 4015d008: 48 85 00 68 ldw 34(r4),r5 This patch moves the address setup into initialize_ivt to resolve the above problem. I tested the change by dumping the HPMC entry after setup: 0000000040209020: 8000240 0000000040209024: 206a2004 0000000040209028: 34630ac0 000000004020902c: e860c002 0000000040209030: 8000240 0000000040209034: 1bdddce6 0000000040209038: 15d000 000000004020903c: 1a0 Signed-off-by: John David Anglin Cc: Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/entry.S | 2 +- arch/parisc/kernel/traps.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 1b4732e201374..843825a7e6e2a 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -185,7 +185,7 @@ bv,n 0(%r3) nop .word 0 /* checksum (will be patched) */ - .word PA(os_hpmc) /* address of handler */ + .word 0 /* address of handler */ .word 0 /* length of handler */ .endm diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 8453724b8009f..9a898d68f4a0d 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -836,7 +836,8 @@ void __init initialize_ivt(const void *iva) if (pdc_instr(&instr) == PDC_OK) ivap[0] = instr; - /* Compute Checksum for HPMC handler */ + /* Setup IVA and compute checksum for HPMC handler */ + ivap[6] = (u32)__pa(os_hpmc); length = os_hpmc_size; ivap[7] = length; -- GitLab From e5475f5fc5f881d547a16cc7c1249e50a48a1a80 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 12 Oct 2018 22:37:46 +0200 Subject: [PATCH 1268/2269] parisc: Fix map_pages() to not overwrite existing pte entries commit 3c229b3f2dd8133f61bb81d3cb018be92f4bba39 upstream. Fix a long-existing small nasty bug in the map_pages() implementation which leads to overwriting already written pte entries with zero, *if* map_pages() is called a second time with an end address which isn't aligned on a pmd boundry. This happens for example if we want to remap only the text segment read/write in order to run alternative patching on the code. Exiting the loop when we reach the end address fixes this. Cc: stable@vger.kernel.org Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/mm/init.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 13f7854e0d49c..cc700f7dda544 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -495,12 +495,8 @@ static void __init map_pages(unsigned long start_vaddr, pte = pte_mkhuge(pte); } - if (address >= end_paddr) { - if (force) - break; - else - pte_val(pte) = 0; - } + if (address >= end_paddr) + break; set_pte(pg_table, pte); -- GitLab From d665f97f859a355618653c016c824e82ec29d288 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 14 Oct 2018 21:58:00 +0200 Subject: [PATCH 1269/2269] parisc: Fix exported address of os_hpmc handler commit 99a3ae51d557d8e38a7aece65678a31f9db215ee upstream. In the C-code we need to put the physical address of the hpmc handler in the interrupt vector table (IVA) in order to get HPMCs working. Since on parisc64 function pointers are indirect (in fact they are function descriptors) we instead export the address as variable and not as function. This reverts a small part of commit f39cce654f9a ("parisc: Add cfi_startproc and cfi_endproc to assembly code"). Signed-off-by: Helge Deller Cc: [4.9+] Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/hpmc.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/parisc/kernel/hpmc.S b/arch/parisc/kernel/hpmc.S index 781c3b9a3e46a..fde6541155645 100644 --- a/arch/parisc/kernel/hpmc.S +++ b/arch/parisc/kernel/hpmc.S @@ -85,7 +85,7 @@ END(hpmc_pim_data) .import intr_save, code .align 16 -ENTRY_CFI(os_hpmc) +ENTRY(os_hpmc) .os_hpmc: /* @@ -302,7 +302,6 @@ os_hpmc_6: b . nop .align 16 /* make function length multiple of 16 bytes */ -ENDPROC_CFI(os_hpmc) .os_hpmc_end: -- GitLab From f80215f62eb7f82247b51cf1f098a4ee2643d8fe Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 7 Oct 2018 09:44:17 +0200 Subject: [PATCH 1270/2269] ALSA: hda - Add quirk for ASUS G751 laptop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 11ba6111160290ccd35562f4e05cec08942a6c4c upstream. ASUS G751 requires the extra COEF initialization to make it microphone working properly. Reported-and-tested-by: Håvard Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index fe5c741fcc6a2..201d4f55b6c29 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7515,6 +7515,7 @@ enum { ALC662_FIXUP_ASUS_Nx50, ALC668_FIXUP_ASUS_Nx51_HEADSET_MODE, ALC668_FIXUP_ASUS_Nx51, + ALC668_FIXUP_ASUS_G751, ALC891_FIXUP_HEADSET_MODE, ALC891_FIXUP_DELL_MIC_NO_PRESENCE, ALC662_FIXUP_ACER_VERITON, @@ -7784,6 +7785,14 @@ static const struct hda_fixup alc662_fixups[] = { .chained = true, .chain_id = ALC668_FIXUP_ASUS_Nx51_HEADSET_MODE, }, + [ALC668_FIXUP_ASUS_G751] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + { 0x20, AC_VERB_SET_COEF_INDEX, 0xc3 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x4000 }, + {} + }, + }, [ALC891_FIXUP_HEADSET_MODE] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_headset_mode, @@ -7857,6 +7866,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_ASUS_Nx50), SND_PCI_QUIRK(0x1043, 0x13df, "Asus N550JX", ALC662_FIXUP_BASS_1A), SND_PCI_QUIRK(0x1043, 0x129d, "Asus N750", ALC662_FIXUP_ASUS_Nx50), + SND_PCI_QUIRK(0x1043, 0x12ff, "ASUS G751", ALC668_FIXUP_ASUS_G751), SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_BASS_MODE4_CHMAP), SND_PCI_QUIRK(0x1043, 0x15a7, "ASUS UX51VZH", ALC662_FIXUP_BASS_16), SND_PCI_QUIRK(0x1043, 0x177d, "ASUS N551", ALC668_FIXUP_ASUS_Nx51), -- GitLab From 2ad912e2b2c18784b52de195d403dc269aecfd8f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 9 Oct 2018 14:20:17 +0200 Subject: [PATCH 1271/2269] ALSA: hda - Fix headphone pin config for ASUS G751 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5b7c5e1f4c36b99d0f694f38b9ad910f520cb7ef upstream. BIOS on ASUS G751 doesn't seem to map the headphone pin (NID 0x16) correctly. Add a quirk to address it, as well as chaining to the previous fix for the microphone. Reported-by: Håvard Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 201d4f55b6c29..e625eee03c9d5 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7515,6 +7515,7 @@ enum { ALC662_FIXUP_ASUS_Nx50, ALC668_FIXUP_ASUS_Nx51_HEADSET_MODE, ALC668_FIXUP_ASUS_Nx51, + ALC668_FIXUP_MIC_COEF, ALC668_FIXUP_ASUS_G751, ALC891_FIXUP_HEADSET_MODE, ALC891_FIXUP_DELL_MIC_NO_PRESENCE, @@ -7785,7 +7786,7 @@ static const struct hda_fixup alc662_fixups[] = { .chained = true, .chain_id = ALC668_FIXUP_ASUS_Nx51_HEADSET_MODE, }, - [ALC668_FIXUP_ASUS_G751] = { + [ALC668_FIXUP_MIC_COEF] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { { 0x20, AC_VERB_SET_COEF_INDEX, 0xc3 }, @@ -7793,6 +7794,15 @@ static const struct hda_fixup alc662_fixups[] = { {} }, }, + [ALC668_FIXUP_ASUS_G751] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x16, 0x0421101f }, /* HP */ + {} + }, + .chained = true, + .chain_id = ALC668_FIXUP_MIC_COEF + }, [ALC891_FIXUP_HEADSET_MODE] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_headset_mode, -- GitLab From 7b50b3d70994010c478f74b3ae15389c4b974272 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Wed, 10 Oct 2018 11:57:25 +0800 Subject: [PATCH 1272/2269] ALSA: hda/realtek - Fix the problem of the front MIC on the Lenovo M715 commit d06fb562bff5d14defdacbd92449bacbaedd5cdf upstream. The front MIC on the Lenovo M715 can't record sound, after applying the ALC294_FIXUP_LENOVO_MIC_LOCATION, the problem is fixed. So add the pin configuration of this machine to the pin quirk table. Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e625eee03c9d5..eb8807de3ebc0 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6629,6 +6629,12 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x1a, 0x02a11040}, {0x1b, 0x01014020}, {0x21, 0x0221101f}), + SND_HDA_PIN_QUIRK(0x10ec0235, 0x17aa, "Lenovo", ALC294_FIXUP_LENOVO_MIC_LOCATION, + {0x14, 0x90170110}, + {0x19, 0x02a11030}, + {0x1a, 0x02a11040}, + {0x1b, 0x01011020}, + {0x21, 0x0221101f}), SND_HDA_PIN_QUIRK(0x10ec0235, 0x17aa, "Lenovo", ALC294_FIXUP_LENOVO_MIC_LOCATION, {0x14, 0x90170110}, {0x19, 0x02a11020}, -- GitLab From e0404d81ab842b8d4246662c1c46bb20525dd743 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Thu, 11 Oct 2018 15:49:17 -0400 Subject: [PATCH 1273/2269] ALSA: hda - Add mic quirk for the Lenovo G50-30 (17aa:3905) commit e7bb6ad5685f05685dd8a6a5eda7bfcd14d5f95b upstream. The Lenovo G50-30, like other G50 models, has a Conexant codec that requires a quirk for its inverted stereo dmic. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1249364 Reported-by: Alexander Ploumistos Tested-by: Alexander Ploumistos Cc: stable@vger.kernel.org Signed-off-by: Jeremy Cline Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 16197ad4512a4..0cc0ced1f2ed1 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -981,6 +981,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x17aa, 0x21da, "Lenovo X220", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21db, "Lenovo X220-tablet", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo IdeaPad Z560", CXT_FIXUP_MUTE_LED_EAPD), + SND_PCI_QUIRK(0x17aa, 0x3905, "Lenovo G50-30", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x390b, "Lenovo G50-80", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x3975, "Lenovo U300s", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x3977, "Lenovo IdeaPad U310", CXT_FIXUP_STEREO_DMIC), -- GitLab From a436f6b2db64cd9e853ee6b54fba0499d8f80f93 Mon Sep 17 00:00:00 2001 From: Alex Stanoev Date: Sun, 28 Oct 2018 16:55:12 +0000 Subject: [PATCH 1274/2269] ALSA: ca0106: Disable IZD on SB0570 DAC to fix audio pops commit ac237c28d5ac1b241d58b1b7b4b9fa10efb22fb5 upstream. The Creative Audigy SE (SB0570) card currently exhibits an audible pop whenever playback is stopped or resumed, or during silent periods of an audio stream. Initialise the IZD bit to the 0 to eliminate these pops. The Infinite Zero Detection (IZD) feature on the DAC causes the output to be shunted to Vcap after 2048 samples of silence. This discharges the AC coupling capacitor through the output and causes the aforementioned pop/click noise. The behaviour of the IZD bit is described on page 15 of the WM8768GEDS datasheet: "With IZD=1, applying MUTE for 1024 consecutive input samples will cause all outputs to be connected directly to VCAP. This also happens if 2048 consecutive zero input samples are applied to all 6 channels, and IZD=0. It will be removed as soon as any channel receives a non-zero input". I believe the second sentence might be referring to IZD=1 instead of IZD=0 given the observed behaviour of the card. This change should make the DAC initialisation consistent with Creative's Windows driver, as this popping persists when initialising the card in Linux and soft rebooting into Windows, but is not present on a cold boot to Windows. Signed-off-by: Alex Stanoev Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/ca0106/ca0106.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/ca0106/ca0106.h b/sound/pci/ca0106/ca0106.h index 04402c14cb239..9847b669cf3cf 100644 --- a/sound/pci/ca0106/ca0106.h +++ b/sound/pci/ca0106/ca0106.h @@ -582,7 +582,7 @@ #define SPI_PL_BIT_R_R (2<<7) /* right channel = right */ #define SPI_PL_BIT_R_C (3<<7) /* right channel = (L+R)/2 */ #define SPI_IZD_REG 2 -#define SPI_IZD_BIT (1<<4) /* infinite zero detect */ +#define SPI_IZD_BIT (0<<4) /* infinite zero detect */ #define SPI_FMT_REG 3 #define SPI_FMT_BIT_RJ (0<<0) /* right justified mode */ -- GitLab From 8a13906ae519b3ed95cd0fb73f1098b46362f6c4 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 25 Sep 2018 14:38:55 +0200 Subject: [PATCH 1275/2269] x86/speculation: Enable cross-hyperthread spectre v2 STIBP mitigation commit 53c613fe6349994f023245519265999eed75957f upstream. STIBP is a feature provided by certain Intel ucodes / CPUs. This feature (once enabled) prevents cross-hyperthread control of decisions made by indirect branch predictors. Enable this feature if - the CPU is vulnerable to spectre v2 - the CPU supports SMT and has SMT siblings online - spectre_v2 mitigation autoselection is enabled (default) After some previous discussion, this leaves STIBP on all the time, as wrmsr on crossing kernel boundary is a no-no. This could perhaps later be a bit more optimized (like disabling it in NOHZ, experiment with disabling it in idle, etc) if needed. Note that the synchronization of the mask manipulation via newly added spec_ctrl_mutex is currently not strictly needed, as the only updater is already being serialized by cpu_add_remove_lock, but let's make this a little bit more future-proof. Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: "WoodhouseDavid" Cc: Andi Kleen Cc: Tim Chen Cc: "SchauflerCasey" Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/nycvar.YFH.7.76.1809251438240.15880@cbobk.fhfr.pm Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 57 ++++++++++++++++++++++++++++++++++---- kernel/cpu.c | 11 +++++++- 2 files changed, 61 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3e435f88621d2..90fcf16249eaf 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -34,12 +34,10 @@ static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); -/* - * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any - * writes to SPEC_CTRL contain whatever reserved bits have been set. - */ -u64 __ro_after_init x86_spec_ctrl_base; +/* The base value of the SPEC_CTRL MSR that always has to be preserved. */ +u64 x86_spec_ctrl_base; EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); +static DEFINE_MUTEX(spec_ctrl_mutex); /* * The vendor and possibly platform specific bits which can be modified in @@ -322,6 +320,46 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return cmd; } +static bool stibp_needed(void) +{ + if (spectre_v2_enabled == SPECTRE_V2_NONE) + return false; + + if (!boot_cpu_has(X86_FEATURE_STIBP)) + return false; + + return true; +} + +static void update_stibp_msr(void *info) +{ + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); +} + +void arch_smt_update(void) +{ + u64 mask; + + if (!stibp_needed()) + return; + + mutex_lock(&spec_ctrl_mutex); + mask = x86_spec_ctrl_base; + if (cpu_smt_control == CPU_SMT_ENABLED) + mask |= SPEC_CTRL_STIBP; + else + mask &= ~SPEC_CTRL_STIBP; + + if (mask != x86_spec_ctrl_base) { + pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", + cpu_smt_control == CPU_SMT_ENABLED ? + "Enabling" : "Disabling"); + x86_spec_ctrl_base = mask; + on_each_cpu(update_stibp_msr, NULL, 1); + } + mutex_unlock(&spec_ctrl_mutex); +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -406,6 +444,9 @@ retpoline_auto: setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); pr_info("Enabling Restricted Speculation for firmware calls\n"); } + + /* Enable STIBP if appropriate */ + arch_smt_update(); } #undef pr_fmt @@ -798,6 +839,8 @@ static ssize_t l1tf_show_state(char *buf) static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { + int ret; + if (!boot_cpu_has_bug(bug)) return sprintf(buf, "Not affected\n"); @@ -812,10 +855,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return sprintf(buf, "Mitigation: __user pointer sanitization\n"); case X86_BUG_SPECTRE_V2: - return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + ret = sprintf(buf, "%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "", spectre_v2_module_string()); + return ret; case X86_BUG_SPEC_STORE_BYPASS: return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); diff --git a/kernel/cpu.c b/kernel/cpu.c index f3f389e333430..90cf6a04e08a2 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2045,6 +2045,12 @@ static void cpuhp_online_cpu_device(unsigned int cpu) kobject_uevent(&dev->kobj, KOBJ_ONLINE); } +/* + * Architectures that need SMT-specific errata handling during SMT hotplug + * should override this. + */ +void __weak arch_smt_update(void) { }; + static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { int cpu, ret = 0; @@ -2071,8 +2077,10 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) */ cpuhp_offline_cpu_device(cpu); } - if (!ret) + if (!ret) { cpu_smt_control = ctrlval; + arch_smt_update(); + } cpu_maps_update_done(); return ret; } @@ -2083,6 +2091,7 @@ static int cpuhp_smt_enable(void) cpu_maps_update_begin(); cpu_smt_control = CPU_SMT_ENABLED; + arch_smt_update(); for_each_present_cpu(cpu) { /* Skip online CPUs and CPUs on offline nodes */ if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) -- GitLab From ed2df6687223bd1d7f82b19cbd52d79c4263ff0f Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 10 Oct 2018 08:14:54 +0200 Subject: [PATCH 1276/2269] x86/xen: Fix boot loader version reported for PVH guests commit 357d291ce035d1b757568058f3c9898c60d125b1 upstream. The boot loader version reported via sysfs is wrong in case of the kernel being booted via the Xen PVH boot entry. it should be 2.12 (0x020c), but it is reported to be 2.18 (0x0212). As the current way to set the version is error prone use the more readable variant (2 << 8) | 12. Signed-off-by: Juergen Gross Cc: # 4.12 Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: boris.ostrovsky@oracle.com Cc: bp@alien8.de Cc: corbet@lwn.net Cc: linux-doc@vger.kernel.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/20181010061456.22238-2-jgross@suse.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/enlighten_pvh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 7bd3ee08393e6..d6d7b29b3be0e 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -76,7 +76,7 @@ static void __init init_pvh_bootparams(void) * Version 2.12 supports Xen entry point but we will use default x86/PC * environment (i.e. hardware_subarch 0). */ - pvh_bootparams.hdr.version = 0x212; + pvh_bootparams.hdr.version = (2 << 8) | 12; pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */ } -- GitLab From f2e58b0414523cac77c52ab1f7b4f470e6e69ada Mon Sep 17 00:00:00 2001 From: He Zhe Date: Tue, 14 Aug 2018 23:33:42 +0800 Subject: [PATCH 1277/2269] x86/corruption-check: Fix panic in memory_corruption_check() when boot option without value is provided commit ccde460b9ae5c2bd5e4742af0a7f623c2daad566 upstream. memory_corruption_check[{_period|_size}]()'s handlers do not check input argument before passing it to kstrtoul() or simple_strtoull(). The argument would be a NULL pointer if each of the kernel parameters, without its value, is set in command line and thus cause the following panic. PANIC: early exception 0xe3 IP 10:ffffffff73587c22 error 0 cr2 0x0 [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 4.18-rc8+ #2 [ 0.000000] RIP: 0010:kstrtoull+0x2/0x10 ... [ 0.000000] Call Trace [ 0.000000] ? set_corruption_check+0x21/0x49 [ 0.000000] ? do_early_param+0x4d/0x82 [ 0.000000] ? parse_args+0x212/0x330 [ 0.000000] ? rdinit_setup+0x26/0x26 [ 0.000000] ? parse_early_options+0x20/0x23 [ 0.000000] ? rdinit_setup+0x26/0x26 [ 0.000000] ? parse_early_param+0x2d/0x39 [ 0.000000] ? setup_arch+0x2f7/0xbf4 [ 0.000000] ? start_kernel+0x5e/0x4c2 [ 0.000000] ? load_ucode_bsp+0x113/0x12f [ 0.000000] ? secondary_startup_64+0xa5/0xb0 This patch adds checks to prevent the panic. Signed-off-by: He Zhe Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: gregkh@linuxfoundation.org Cc: kstewart@linuxfoundation.org Cc: pombredanne@nexb.com Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1534260823-87917-1-git-send-email-zhe.he@windriver.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/check.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c index 33399426793e0..cc8258a5378b0 100644 --- a/arch/x86/kernel/check.c +++ b/arch/x86/kernel/check.c @@ -31,6 +31,11 @@ static __init int set_corruption_check(char *arg) ssize_t ret; unsigned long val; + if (!arg) { + pr_err("memory_corruption_check config string not provided\n"); + return -EINVAL; + } + ret = kstrtoul(arg, 10, &val); if (ret) return ret; @@ -45,6 +50,11 @@ static __init int set_corruption_check_period(char *arg) ssize_t ret; unsigned long val; + if (!arg) { + pr_err("memory_corruption_check_period config string not provided\n"); + return -EINVAL; + } + ret = kstrtoul(arg, 10, &val); if (ret) return ret; @@ -59,6 +69,11 @@ static __init int set_corruption_check_size(char *arg) char *end; unsigned size; + if (!arg) { + pr_err("memory_corruption_check_size config string not provided\n"); + return -EINVAL; + } + size = memparse(arg, &end); if (*end == '\0') -- GitLab From b80a2dd4d7a1e51ed34c91b4e03e425dc0c21223 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 17 Oct 2018 12:34:32 +0200 Subject: [PATCH 1278/2269] x86/mm/pat: Disable preemption around __flush_tlb_all() commit f77084d96355f5fba8e2c1fb3a51a393b1570de7 upstream. The WARN_ON_ONCE(__read_cr3() != build_cr3()) in switch_mm_irqs_off() triggers every once in a while during a snapshotted system upgrade. The warning triggers since commit decab0888e6e ("x86/mm: Remove preempt_disable/enable() from __native_flush_tlb()"). The callchain is: get_page_from_freelist() -> post_alloc_hook() -> __kernel_map_pages() with CONFIG_DEBUG_PAGEALLOC enabled. Disable preemption during CR3 reset / __flush_tlb_all() and add a comment why preemption has to be disabled so it won't be removed accidentaly. Add another preemptible() check in __flush_tlb_all() to catch callers with enabled preemption when PGE is enabled, because PGE enabled does not trigger the warning in __native_flush_tlb(). Suggested by Andy Lutomirski. Fixes: decab0888e6e ("x86/mm: Remove preempt_disable/enable() from __native_flush_tlb()") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Dave Hansen Cc: Peter Zijlstra Cc: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181017103432.zgv46nlu3hc7k4rq@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/tlbflush.h | 6 ++++++ arch/x86/mm/pageattr.c | 6 +++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 5f00ecb9d2515..2501be609b821 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -466,6 +466,12 @@ static inline void __native_flush_tlb_one_user(unsigned long addr) */ static inline void __flush_tlb_all(void) { + /* + * This is to catch users with enabled preemption and the PGE feature + * and don't trigger the warning in __native_flush_tlb(). + */ + VM_WARN_ON_ONCE(preemptible()); + if (boot_cpu_has(X86_FEATURE_PGE)) { __flush_tlb_global(); } else { diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 464f53da3a6f5..835620ab435fd 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -2037,9 +2037,13 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) /* * We should perform an IPI and flush all tlbs, - * but that can deadlock->flush only current cpu: + * but that can deadlock->flush only current cpu. + * Preemption needs to be disabled around __flush_tlb_all() due to + * CR3 reload in __native_flush_tlb(). */ + preempt_disable(); __flush_tlb_all(); + preempt_enable(); arch_flush_lazy_mmu_mode(); } -- GitLab From a7051265f1e648d627e684ff903650439b89882c Mon Sep 17 00:00:00 2001 From: Sai Praneeth Date: Wed, 1 Aug 2018 11:42:25 -0700 Subject: [PATCH 1279/2269] x86/speculation: Support Enhanced IBRS on future CPUs commit 706d51681d636a0c4a5ef53395ec3b803e45ed4d upstream. Future Intel processors will support "Enhanced IBRS" which is an "always on" mode i.e. IBRS bit in SPEC_CTRL MSR is enabled once and never disabled. From the specification [1]: "With enhanced IBRS, the predicted targets of indirect branches executed cannot be controlled by software that was executed in a less privileged predictor mode or on another logical processor. As a result, software operating on a processor with enhanced IBRS need not use WRMSR to set IA32_SPEC_CTRL.IBRS after every transition to a more privileged predictor mode. Software can isolate predictor modes effectively simply by setting the bit once. Software need not disable enhanced IBRS prior to entering a sleep state such as MWAIT or HLT." If Enhanced IBRS is supported by the processor then use it as the preferred spectre v2 mitigation mechanism instead of Retpoline. Intel's Retpoline white paper [2] states: "Retpoline is known to be an effective branch target injection (Spectre variant 2) mitigation on Intel processors belonging to family 6 (enumerated by the CPUID instruction) that do not have support for enhanced IBRS. On processors that support enhanced IBRS, it should be used for mitigation instead of retpoline." The reason why Enhanced IBRS is the recommended mitigation on processors which support it is that these processors also support CET which provides a defense against ROP attacks. Retpoline is very similar to ROP techniques and might trigger false positives in the CET defense. If Enhanced IBRS is selected as the mitigation technique for spectre v2, the IBRS bit in SPEC_CTRL MSR is set once at boot time and never cleared. Kernel also has to make sure that IBRS bit remains set after VMEXIT because the guest might have cleared the bit. This is already covered by the existing x86_spec_ctrl_set_guest() and x86_spec_ctrl_restore_host() speculation control functions. Enhanced IBRS still requires IBPB for full mitigation. [1] Speculative-Execution-Side-Channel-Mitigations.pdf [2] Retpoline-A-Branch-Target-Injection-Mitigation.pdf Both documents are available at: https://bugzilla.kernel.org/show_bug.cgi?id=199511 Originally-by: David Woodhouse Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Thomas Gleixner Cc: Tim C Chen Cc: Dave Hansen Cc: Ravi Shankar Link: https://lkml.kernel.org/r/1533148945-24095-1-git-send-email-sai.praneeth.prakhya@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 20 ++++++++++++++++++-- arch/x86/kernel/cpu/common.c | 3 +++ 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 8418462298e71..673d6e988196b 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -220,6 +220,7 @@ #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ #define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ +#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 8b38df98548e8..1b4132161c1fe 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -215,6 +215,7 @@ enum spectre_v2_mitigation { SPECTRE_V2_RETPOLINE_GENERIC, SPECTRE_V2_RETPOLINE_AMD, SPECTRE_V2_IBRS, + SPECTRE_V2_IBRS_ENHANCED, }; /* The Speculative Store Bypass disable variants */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 90fcf16249eaf..aa6e7f75bccc9 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -138,6 +138,7 @@ static const char *spectre_v2_strings[] = { [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline", [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", + [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", }; #undef pr_fmt @@ -379,6 +380,13 @@ static void __init spectre_v2_select_mitigation(void) case SPECTRE_V2_CMD_FORCE: case SPECTRE_V2_CMD_AUTO: + if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) { + mode = SPECTRE_V2_IBRS_ENHANCED; + /* Force it so VMEXIT will restore correctly */ + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + goto specv2_set_mode; + } if (IS_ENABLED(CONFIG_RETPOLINE)) goto retpoline_auto; break; @@ -416,6 +424,7 @@ retpoline_auto: setup_force_cpu_cap(X86_FEATURE_RETPOLINE); } +specv2_set_mode: spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); @@ -438,9 +447,16 @@ retpoline_auto: /* * Retpoline means the kernel is safe because it has no indirect - * branches. But firmware isn't, so use IBRS to protect that. + * branches. Enhanced IBRS protects firmware too, so, enable restricted + * speculation around firmware calls only when Enhanced IBRS isn't + * supported. + * + * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because + * the user might select retpoline on the kernel command line and if + * the CPU supports Enhanced IBRS, kernel might un-intentionally not + * enable IBRS around firmware calls. */ - if (boot_cpu_has(X86_FEATURE_IBRS)) { + if (boot_cpu_has(X86_FEATURE_IBRS) && mode != SPECTRE_V2_IBRS_ENHANCED) { setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); pr_info("Enabling Restricted Speculation for firmware calls\n"); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7d2a7890a823e..96643e2c75b8b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -967,6 +967,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SPECTRE_V1); setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + if (ia32_cap & ARCH_CAP_IBRS_ALL) + setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); + if (x86_match_cpu(cpu_no_meltdown)) return; -- GitLab From 43c7313ec7cbe3a31643a3d2b3c3e2d20312dc8b Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 5 Sep 2018 12:02:15 +0200 Subject: [PATCH 1280/2269] ARM: dts: exynos: Disable pull control for MAX8997 interrupts on Origen commit f5e758b8358f6c27e8a351ddf0b441a64cdabb94 upstream. PMIC_IRQB and PMIC_KEYINB lines on Exynos4210-based Origen board have external pull-up resistors, so disable any pull control for those lines in respective pin controller node. This fixes support for MAX8997 interrupts and enables operation of wakeup from MAX8997 RTC alarm. Signed-off-by: Marek Szyprowski Fixes: 17419726aaa1 ("ARM: dts: add max8997 device node for exynos4210-origen board") Cc: Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/exynos4210-origen.dts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm/boot/dts/exynos4210-origen.dts b/arch/arm/boot/dts/exynos4210-origen.dts index 084fcc5574ef9..e4876186d5cde 100644 --- a/arch/arm/boot/dts/exynos4210-origen.dts +++ b/arch/arm/boot/dts/exynos4210-origen.dts @@ -152,6 +152,8 @@ reg = <0x66>; interrupt-parent = <&gpx0>; interrupts = <4 IRQ_TYPE_NONE>, <3 IRQ_TYPE_NONE>; + pinctrl-names = "default"; + pinctrl-0 = <&max8997_irq>; max8997,pmic-buck1-dvs-voltage = <1350000>; max8997,pmic-buck2-dvs-voltage = <1100000>; @@ -289,6 +291,13 @@ }; }; +&pinctrl_1 { + max8997_irq: max8997-irq { + samsung,pins = "gpx0-3", "gpx0-4"; + samsung,pin-pud = ; + }; +}; + &sdhci_0 { bus-width = <4>; pinctrl-0 = <&sd0_clk &sd0_cmd &sd0_bus4 &sd0_cd>; -- GitLab From 9c9cd35cb3e63f71af7c95a918c1586b21f5c3ee Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 5 Oct 2018 19:38:46 -0700 Subject: [PATCH 1281/2269] bpf: do not blindly change rlimit in reuseport net selftest [ Upstream commit 262f9d811c7608f1e74258ceecfe1fa213bdf912 ] If the current process has unlimited RLIMIT_MEMLOCK, we should should leave it as is. Fixes: 941ff6f11c02 ("bpf: fix rlimit in reuseport net selftest") Signed-off-by: John Sperbeck Signed-off-by: Eric Dumazet Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/reuseport_bpf.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c index cad14cd0ea922..b5277106df1fd 100644 --- a/tools/testing/selftests/net/reuseport_bpf.c +++ b/tools/testing/selftests/net/reuseport_bpf.c @@ -437,14 +437,19 @@ void enable_fastopen(void) } } -static struct rlimit rlim_old, rlim_new; +static struct rlimit rlim_old; static __attribute__((constructor)) void main_ctor(void) { getrlimit(RLIMIT_MEMLOCK, &rlim_old); - rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20); - rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20); - setrlimit(RLIMIT_MEMLOCK, &rlim_new); + + if (rlim_old.rlim_cur != RLIM_INFINITY) { + struct rlimit rlim_new; + + rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20); + rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20); + setrlimit(RLIMIT_MEMLOCK, &rlim_new); + } } static __attribute__((destructor)) void main_dtor(void) -- GitLab From 0c7cd9fe35497975c9590accd84273a4f95fb1e6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 3 Oct 2018 09:20:46 +0200 Subject: [PATCH 1282/2269] Revert "perf tools: Fix PMU term format max value calculation" [ Upstream commit 1b9caa10b31dda0866f4028e4bfb923fb6e4072f ] This reverts commit ac0e2cd555373ae6f8f3a3ad3fbbf5b6d1e7aaaa. Michael reported an issue with oversized terms values assignment and I noticed there was actually a misunderstanding of the max value check in the past. The above commit's changelog says: If bit 21 is set, there is parsing issues as below. $ perf stat -a -e uncore_qpi_0/event=0x200002,umask=0x8/ event syntax error: '..pi_0/event=0x200002,umask=0x8/' \___ value too big for format, maximum is 511 But there's no issue there, because the event value is distributed along the value defined by the format. Even if the format defines separated bit, the value is treated as a continual number, which should follow the format definition. In above case it's 9-bit value with last bit separated: $ cat uncore_qpi_0/format/event config:0-7,21 Hence the value 0x200002 is correctly reported as format violation, because it exceeds 9 bits. It should have been 0x102 instead, which sets the 9th bit - the bit 21 of the format. $ perf stat -vv -a -e uncore_qpi_0/event=0x102,umask=0x8/ Using CPUID GenuineIntel-6-2D ... ------------------------------------------------------------ perf_event_attr: type 10 size 112 config 0x200802 sample_type IDENTIFIER ... Reported-by: Michael Petlan Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: ac0e2cd55537 ("perf tools: Fix PMU term format max value calculation") Link: http://lkml.kernel.org/r/20181003072046.29276-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/pmu.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index d87d458996b7a..dceef4725d33e 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -754,13 +754,14 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v, static __u64 pmu_format_max_value(const unsigned long *format) { - __u64 w = 0; - int fbit; - - for_each_set_bit(fbit, format, PERF_PMU_FORMAT_BITS) - w |= (1ULL << fbit); + int w; - return w; + w = bitmap_weight(format, PERF_PMU_FORMAT_BITS); + if (!w) + return 0; + if (w < 64) + return (1ULL << w) - 1; + return -1; } /* -- GitLab From 11896963b7b5977616b2010e89d532559950de73 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 10 Oct 2018 18:02:21 +0200 Subject: [PATCH 1283/2269] xfrm: policy: use hlist rcu variants on insert [ Upstream commit 9dffff200fd178f11dd50eb1fd8ccd0650c9284e ] bydst table/list lookups use rcu, so insertions must use rcu versions. Fixes: a7c44247f704e ("xfrm: policy: make xfrm_policy_lookup_bytype lockless") Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_policy.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 37c32e73aaef3..70ec57b887f63 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -626,9 +626,9 @@ static void xfrm_hash_rebuild(struct work_struct *work) break; } if (newpos) - hlist_add_behind(&policy->bydst, newpos); + hlist_add_behind_rcu(&policy->bydst, newpos); else - hlist_add_head(&policy->bydst, chain); + hlist_add_head_rcu(&policy->bydst, chain); } spin_unlock_bh(&net->xfrm.xfrm_policy_lock); @@ -767,9 +767,9 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) break; } if (newpos) - hlist_add_behind(&policy->bydst, newpos); + hlist_add_behind_rcu(&policy->bydst, newpos); else - hlist_add_head(&policy->bydst, chain); + hlist_add_head_rcu(&policy->bydst, chain); __xfrm_policy_link(policy, dir); /* After previous checking, family can either be AF_INET or AF_INET6 */ -- GitLab From 9eedfdf17234f4cf800cc1814054c65ebd9712bb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 10 Oct 2018 10:03:39 +0200 Subject: [PATCH 1284/2269] perf vendor events intel: Fix wrong filter_band* values for uncore events [ Upstream commit 94aafb74cee0002e2f2eb6dc5376f54d5951ab4d ] Michael reported that he could not stat following event: $ perf stat -e unc_p_freq_ge_1200mhz_cycles -a -- ls event syntax error: '..e_1200mhz_cycles' \___ value too big for format, maximum is 255 Run 'perf list' for a list of valid events The event is unwrapped into: uncore_pcu/event=0xb,filter_band0=1200/ where filter_band0 format says it's one byte only: # cat uncore_pcu/format/filter_band0 config1:0-7 while JSON files specifies bigger number: "Filter": "filter_band0=1200", all the filter_band* formats show 1 byte width: # cat uncore_pcu/format/filter_band1 config1:8-15 # cat uncore_pcu/format/filter_band2 config1:16-23 # cat uncore_pcu/format/filter_band3 config1:24-31 The reason of the issue is that filter_band* values are supposed to be in 100Mhz units.. it's stated in the JSON help for the events, like: filter_band3=XXX, with XXX in 100Mhz units This patch divides the filter_band* values by 100, plus there's couple of changes that actually change the number completely, like: - "Filter": "edge=1,filter_band2=4000", + "Filter": "edge=1,filter_band2=30", Reported-by: Michael Petlan Signed-off-by: Jiri Olsa Acked-by: Andi Kleen Cc: Alexander Shishkin Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20181010080339.GB15790@krava Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../arch/x86/ivytown/uncore-power.json | 16 ++++++++-------- .../arch/x86/jaketown/uncore-power.json | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json index d40498f2cb1e9..635c09fda1d94 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json @@ -188,7 +188,7 @@ "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES", - "Filter": "filter_band0=1200", + "Filter": "filter_band0=12", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", @@ -199,7 +199,7 @@ "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES", - "Filter": "filter_band1=2000", + "Filter": "filter_band1=20", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", @@ -210,7 +210,7 @@ "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES", - "Filter": "filter_band2=3000", + "Filter": "filter_band2=30", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", @@ -221,7 +221,7 @@ "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES", - "Filter": "filter_band3=4000", + "Filter": "filter_band3=40", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", @@ -232,7 +232,7 @@ "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band0=1200", + "Filter": "edge=1,filter_band0=12", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", @@ -243,7 +243,7 @@ "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band1=2000", + "Filter": "edge=1,filter_band1=20", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", @@ -254,7 +254,7 @@ "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band2=4000", + "Filter": "edge=1,filter_band2=30", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", @@ -265,7 +265,7 @@ "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band3=4000", + "Filter": "edge=1,filter_band3=40", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json index 16034bfd06dd9..8755693d86c6f 100644 --- a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json @@ -187,7 +187,7 @@ "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES", - "Filter": "filter_band0=1200", + "Filter": "filter_band0=12", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", @@ -198,7 +198,7 @@ "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES", - "Filter": "filter_band1=2000", + "Filter": "filter_band1=20", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", @@ -209,7 +209,7 @@ "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES", - "Filter": "filter_band2=3000", + "Filter": "filter_band2=30", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", @@ -220,7 +220,7 @@ "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES", - "Filter": "filter_band3=4000", + "Filter": "filter_band3=40", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", @@ -231,7 +231,7 @@ "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band0=1200", + "Filter": "edge=1,filter_band0=12", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", @@ -242,7 +242,7 @@ "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band1=2000", + "Filter": "edge=1,filter_band1=20", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", @@ -253,7 +253,7 @@ "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band2=4000", + "Filter": "edge=1,filter_band2=30", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", @@ -264,7 +264,7 @@ "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band3=4000", + "Filter": "edge=1,filter_band3=40", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", -- GitLab From 9d388b0a09f5e9861a50ab71cafbdac45a2a5d25 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 12 Oct 2018 10:31:58 -0700 Subject: [PATCH 1285/2269] sparc: Fix single-pcr perf event counter management. [ Upstream commit cfdc3170d214046b9509183fe9b9544dc644d40b ] It is important to clear the hw->state value for non-stopped events when they are added into the PMU. Otherwise when the event is scheduled out, we won't read the counter because HES_UPTODATE is still set. This breaks 'perf stat' and similar use cases, causing all the events to show zero. This worked for multi-pcr because we make explicit sparc_pmu_start() calls in calculate_multiple_pcrs(). calculate_single_pcr() doesn't do this because the idea there is to accumulate all of the counter settings into the single pcr value. So we have to add explicit hw->state handling there. Like x86, we use the PERF_HES_ARCH bit to track truly stopped events so that we don't accidently start them on a reload. Related to all of this, sparc_pmu_start() is missing a userpage update so add it. Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/sparc/kernel/perf_event.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 5c1f547583128..8a569b09d2c2c 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -927,6 +927,8 @@ static void read_in_all_counters(struct cpu_hw_events *cpuc) sparc_perf_event_update(cp, &cp->hw, cpuc->current_idx[i]); cpuc->current_idx[i] = PIC_NO_INDEX; + if (cp->hw.state & PERF_HES_STOPPED) + cp->hw.state |= PERF_HES_ARCH; } } } @@ -959,10 +961,12 @@ static void calculate_single_pcr(struct cpu_hw_events *cpuc) enc = perf_event_get_enc(cpuc->events[i]); cpuc->pcr[0] &= ~mask_for_index(idx); - if (hwc->state & PERF_HES_STOPPED) + if (hwc->state & PERF_HES_ARCH) { cpuc->pcr[0] |= nop_for_index(idx); - else + } else { cpuc->pcr[0] |= event_encoding(enc, idx); + hwc->state = 0; + } } out: cpuc->pcr[0] |= cpuc->event[0]->hw.config_base; @@ -988,6 +992,9 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc) cpuc->current_idx[i] = idx; + if (cp->hw.state & PERF_HES_ARCH) + continue; + sparc_pmu_start(cp, PERF_EF_RELOAD); } out: @@ -1079,6 +1086,8 @@ static void sparc_pmu_start(struct perf_event *event, int flags) event->hw.state = 0; sparc_pmu_enable_event(cpuc, &event->hw, idx); + + perf_event_update_userpage(event); } static void sparc_pmu_stop(struct perf_event *event, int flags) @@ -1371,9 +1380,9 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags) cpuc->events[n0] = event->hw.event_base; cpuc->current_idx[n0] = PIC_NO_INDEX; - event->hw.state = PERF_HES_UPTODATE; + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; if (!(ef_flags & PERF_EF_START)) - event->hw.state |= PERF_HES_STOPPED; + event->hw.state |= PERF_HES_ARCH; /* * If group events scheduling transaction was started, -- GitLab From c37b554f7d71fb92fa58a4c0a8476b54a24ca4e2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 12 Oct 2018 10:33:20 -0700 Subject: [PATCH 1286/2269] sparc: Throttle perf events properly. [ Upstream commit 455adb3174d2c8518cef1a61140c211f6ac224d2 ] Like x86 and arm, call perf_sample_event_took() in perf event NMI interrupt handler. Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/sparc/kernel/perf_event.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 8a569b09d2c2c..eceb0215bdeea 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -1612,6 +1613,8 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, struct perf_sample_data data; struct cpu_hw_events *cpuc; struct pt_regs *regs; + u64 finish_clock; + u64 start_clock; int i; if (!atomic_read(&active_events)) @@ -1625,6 +1628,8 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, return NOTIFY_DONE; } + start_clock = sched_clock(); + regs = args->regs; cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1663,6 +1668,10 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, sparc_pmu_stop(event, 0); } + finish_clock = sched_clock(); + + perf_sample_event_took(finish_clock - start_clock); + return NOTIFY_STOP; } -- GitLab From f26f0ff7dd8afb9057e8cef81344a2e71bf3e4e0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 14 Oct 2018 20:19:31 -0700 Subject: [PATCH 1287/2269] sparc64: Make proc_id signed. [ Upstream commit b3e1eb8e7ac9aaa283989496651d99267c4cad6c ] So that when it is unset, ie. '-1', userspace can see it properly. Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/sparc/include/asm/cpudata_64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h index 666d6b5c04404..9c3fc03abe9ae 100644 --- a/arch/sparc/include/asm/cpudata_64.h +++ b/arch/sparc/include/asm/cpudata_64.h @@ -28,7 +28,7 @@ typedef struct { unsigned short sock_id; /* physical package */ unsigned short core_id; unsigned short max_cache_id; /* groupings of highest shared cache */ - unsigned short proc_id; /* strand (aka HW thread) id */ + signed short proc_id; /* strand (aka HW thread) id */ } cpuinfo_sparc; DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); -- GitLab From e5a02efefbd20efca39becab8221cd509a854efa Mon Sep 17 00:00:00 2001 From: Song Muchun Date: Sun, 14 Oct 2018 19:26:12 +0800 Subject: [PATCH 1288/2269] sched/fair: Fix the min_vruntime update logic in dequeue_entity() [ Upstream commit 9845c49cc9bbb317a0bc9e9cf78d8e09d54c9af0 ] The comment and the code around the update_min_vruntime() call in dequeue_entity() are not in agreement. >From commit: b60205c7c558 ("sched/fair: Fix min_vruntime tracking") I think that we want to update min_vruntime when a task is sleeping/migrating. So, the check is inverted there - fix it. Signed-off-by: Song Muchun Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: b60205c7c558 ("sched/fair: Fix min_vruntime tracking") Link: http://lkml.kernel.org/r/20181014112612.2614-1-smuchun@gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/sched/fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 19bfa21f7197f..2d4d79420e36e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3825,7 +3825,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) * put back on, and if we advance min_vruntime, we'll be placed back * further than we started -- ie. we'll be penalized. */ - if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE) + if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE) update_min_vruntime(cfs_rq); } -- GitLab From faf96991f88265dd55bf3207109dc5106e570d0c Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Thu, 6 Sep 2018 18:18:12 -0400 Subject: [PATCH 1289/2269] perf tools: Fix use of alternatives to find JDIR [ Upstream commit 36b8d4628d3cc8f5a748e508cce8673bc00fc63c ] When a build is run from something like a cron job, the user's $PATH is rather minimal, of note, not including /usr/sbin in my own case. Because of that, an automated rpm package build ultimately fails to find libperf-jvmti.so, because somewhere within the build, this happens... /bin/sh: alternatives: command not found /bin/sh: alternatives: command not found Makefile.config:849: No openjdk development package found, please install JDK package, e.g. openjdk-8-jdk, java-1.8.0-openjdk-devel ...and while the build continues, libperf-jvmti.so isn't built, and things fall down when rpm tries to find all the %files specified. Exact same system builds everything just fine when the job is launched from a login shell instead of a cron job, since alternatives is in $PATH, so openjdk is actually found. The test required to get into this section of code actually specifies the full path, as does a block just above it, so let's do that here too. Signed-off-by: Jarod Wilson Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: William Cohen Fixes: d4dfdf00d43e ("perf jvmti: Plug compilation into perf build") Link: http://lkml.kernel.org/r/20180906221812.11167-1-jarod@redhat.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/Makefile.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 63f534a0902f2..f362ee46506ad 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -795,7 +795,7 @@ ifndef NO_JVMTI JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}') else ifneq (,$(wildcard /usr/sbin/alternatives)) - JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') + JDIR=$(shell /usr/sbin/alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') endif endif ifndef JDIR -- GitLab From 1309de40f2e0e47cc05c7b4e7a3637c5e71a89af Mon Sep 17 00:00:00 2001 From: David Miller Date: Thu, 11 Oct 2018 22:46:55 -0700 Subject: [PATCH 1290/2269] perf cpu_map: Align cpu map synthesized events properly. [ Upstream commit 0ed149cf5239cc6e7e65bf00f769e8f1e91076c0 ] The size of the resulting cpu map can be smaller than a multiple of sizeof(u64), resulting in SIGBUS on cpus like Sparc as the next event will not be aligned properly. Signed-off-by: David S. Miller Cc: Jiri Olsa Cc: Kan Liang Fixes: 6c872901af07 ("perf cpu_map: Add cpu_map event synthesize function") Link: http://lkml.kernel.org/r/20181011.224655.716771175766946817.davem@davemloft.net Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/event.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index fc690fecbfd66..a19e840db54a0 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -951,6 +951,7 @@ void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max } *size += sizeof(struct cpu_map_data); + *size = PERF_ALIGN(*size, sizeof(u64)); return zalloc(*size); } -- GitLab From bd79c781f6bf8697b6579b428e311432cb2e70fe Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 16 Oct 2018 22:25:24 +0200 Subject: [PATCH 1291/2269] x86/fpu: Remove second definition of fpu in __fpu__restore_sig() [ Upstream commit 6aa676761d4c1acfa31320e55fa1f83f3fcbbc7a ] Commit: c5bedc6847c3b ("x86/fpu: Get rid of PF_USED_MATH usage, convert it to fpu->fpstate_active") introduced the 'fpu' variable at top of __restore_xstate_sig(), which now shadows the other definition: arch/x86/kernel/fpu/signal.c:318:28: warning: symbol 'fpu' shadows an earlier one arch/x86/kernel/fpu/signal.c:271:20: originally declared here Remove the shadowed definition of 'fpu', as the two definitions are the same. Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: c5bedc6847c3b ("x86/fpu: Get rid of PF_USED_MATH usage, convert it to fpu->fpstate_active") Link: http://lkml.kernel.org/r/20181016202525.29437-3-bigeasy@linutronix.de Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/fpu/signal.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 23f1691670b66..61a949d84dfa5 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -314,7 +314,6 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) * thread's fpu state, reconstruct fxstate from the fsave * header. Validate and sanitize the copied state. */ - struct fpu *fpu = &tsk->thread.fpu; struct user_i387_ia32_struct env; int err = 0; -- GitLab From 60b7367caab51871b41ed03bbdffde93011e3606 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 12 Oct 2018 19:14:58 -0700 Subject: [PATCH 1292/2269] net: qla3xxx: Remove overflowing shift statement [ Upstream commit 8c3bf9b62b667456a57aefcf1689e826df146159 ] Clang currently warns: drivers/net/ethernet/qlogic/qla3xxx.c:384:24: warning: signed shift result (0xF00000000) requires 37 bits to represent, but 'int' only has 32 bits [-Wshift-overflow] ((ISP_NVRAM_MASK << 16) | qdev->eeprom_cmd_data)); ~~~~~~~~~~~~~~ ^ ~~ 1 warning generated. The warning is certainly accurate since ISP_NVRAM_MASK is defined as (0x000F << 16) which is then shifted by 16, resulting in 64424509440, well above UINT_MAX. Given that this is the only location in this driver where ISP_NVRAM_MASK is shifted again, it seems likely that ISP_NVRAM_MASK was originally defined without a shift and during the move of the shift to the definition, this statement wasn't properly removed (since ISP_NVRAM_MASK is used in the statenent right above this). Only the maintainers can confirm this since this statment has been here since the driver was first added to the kernel. Link: https://github.com/ClangBuiltLinux/linux/issues/127 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qla3xxx.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 2991179c2fd0f..080d00520362f 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -380,8 +380,6 @@ static void fm93c56a_select(struct ql3_adapter *qdev) qdev->eeprom_cmd_data = AUBURN_EEPROM_CS_1; ql_write_nvram_reg(qdev, spir, ISP_NVRAM_MASK | qdev->eeprom_cmd_data); - ql_write_nvram_reg(qdev, spir, - ((ISP_NVRAM_MASK << 16) | qdev->eeprom_cmd_data)); } /* -- GitLab From 5832fa5bd0324847ef5a256f36dd1e80caa2b024 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 18 Oct 2018 22:13:02 +0900 Subject: [PATCH 1293/2269] selftests: ftrace: Add synthetic event syntax testcase [ Upstream commit ba0e41ca81b935b958006c7120466e2217357827 ] Add a testcase to check the syntax and field types for synthetic_events interface. Link: http://lkml.kernel.org/r/153986838264.18251.16627517536956299922.stgit@devbox Acked-by: Shuah Khan Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../trigger-synthetic-event-syntax.tc | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc new file mode 100644 index 0000000000000..88e6c3f430066 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc @@ -0,0 +1,80 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: event trigger - test synthetic_events syntax parser + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +echo "Test synthetic_events syntax parser" + +echo > synthetic_events + +# synthetic event must have a field +! echo "myevent" >> synthetic_events +echo "myevent u64 var1" >> synthetic_events + +# synthetic event must be found in synthetic_events +grep "myevent[[:space:]]u64 var1" synthetic_events + +# it is not possible to add same name event +! echo "myevent u64 var2" >> synthetic_events + +# Non-append open will cleanup all events and add new one +echo "myevent u64 var2" > synthetic_events + +# multiple fields with different spaces +echo "myevent u64 var1; u64 var2;" > synthetic_events +grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events +echo "myevent u64 var1 ; u64 var2 ;" > synthetic_events +grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events +echo "myevent u64 var1 ;u64 var2" > synthetic_events +grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events + +# test field types +echo "myevent u32 var" > synthetic_events +echo "myevent u16 var" > synthetic_events +echo "myevent u8 var" > synthetic_events +echo "myevent s64 var" > synthetic_events +echo "myevent s32 var" > synthetic_events +echo "myevent s16 var" > synthetic_events +echo "myevent s8 var" > synthetic_events + +echo "myevent char var" > synthetic_events +echo "myevent int var" > synthetic_events +echo "myevent long var" > synthetic_events +echo "myevent pid_t var" > synthetic_events + +echo "myevent unsigned char var" > synthetic_events +echo "myevent unsigned int var" > synthetic_events +echo "myevent unsigned long var" > synthetic_events +grep "myevent[[:space:]]unsigned long var" synthetic_events + +# test string type +echo "myevent char var[10]" > synthetic_events +grep "myevent[[:space:]]char\[10\] var" synthetic_events + +do_reset + +exit 0 -- GitLab From fd83130bdbe48a0d4e6e650f37073d80b80d7adc Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 19 Oct 2018 21:15:26 +0200 Subject: [PATCH 1294/2269] i2c: rcar: cleanup DMA for all kinds of failure [ Upstream commit 31d86033a0749a0463ea654130b2de5c163154f1 ] DMA needs to be cleaned up not only on timeout, but on all errors where it has been setup before. Fixes: 73e8b0528346 ("i2c: rcar: add DMA support") Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-rcar.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index 7f044df1ea070..3415733a93645 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -761,8 +761,12 @@ static int rcar_i2c_master_xfer(struct i2c_adapter *adap, time_left = wait_event_timeout(priv->wait, priv->flags & ID_DONE, num * adap->timeout); - if (!time_left) { + + /* cleanup DMA if it couldn't complete properly due to an error */ + if (priv->dma_direction != DMA_NONE) rcar_i2c_cleanup_dma(priv); + + if (!time_left) { rcar_i2c_init(priv); ret = -ETIMEDOUT; } else if (priv->flags & ID_NACK) { -- GitLab From 5cf2ab06e8d0d7c9ca15f51eeed67103a973725f Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 18 Oct 2018 21:45:17 -0400 Subject: [PATCH 1295/2269] locking/lockdep: Fix debug_locks off performance problem [ Upstream commit 9506a7425b094d2f1d9c877ed5a78f416669269b ] It was found that when debug_locks was turned off because of a problem found by the lockdep code, the system performance could drop quite significantly when the lock_stat code was also configured into the kernel. For instance, parallel kernel build time on a 4-socket x86-64 server nearly doubled. Further analysis into the cause of the slowdown traced back to the frequent call to debug_locks_off() from the __lock_acquired() function probably due to some inconsistent lockdep states with debug_locks off. The debug_locks_off() function did an unconditional atomic xchg to write a 0 value into debug_locks which had already been set to 0. This led to severe cacheline contention in the cacheline that held debug_locks. As debug_locks is being referenced in quite a few different places in the kernel, this greatly slow down the system performance. To prevent that trashing of debug_locks cacheline, lock_acquired() and lock_contended() now checks the state of debug_locks before proceeding. The debug_locks_off() function is also modified to check debug_locks before calling __debug_locks_off(). Signed-off-by: Waiman Long Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/1539913518-15598-1-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/locking/lockdep.c | 4 ++-- lib/debug_locks.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index d7c155048ea9a..bf694c709b96f 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4215,7 +4215,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; - if (unlikely(!lock_stat)) + if (unlikely(!lock_stat || !debug_locks)) return; if (unlikely(current->lockdep_recursion)) @@ -4235,7 +4235,7 @@ void lock_acquired(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; - if (unlikely(!lock_stat)) + if (unlikely(!lock_stat || !debug_locks)) return; if (unlikely(current->lockdep_recursion)) diff --git a/lib/debug_locks.c b/lib/debug_locks.c index 96c4c633d95e6..124fdf238b3d9 100644 --- a/lib/debug_locks.c +++ b/lib/debug_locks.c @@ -37,7 +37,7 @@ EXPORT_SYMBOL_GPL(debug_locks_silent); */ int debug_locks_off(void) { - if (__debug_locks_off()) { + if (debug_locks && __debug_locks_off()) { if (!debug_locks_silent) { console_verbose(); return 1; -- GitLab From 7084b74ffc246e781c77a963321f34b661ea49cc Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 11 Oct 2018 12:20:49 -0700 Subject: [PATCH 1296/2269] ataflop: fix error handling during setup [ Upstream commit 71327f547ee3a46ec5c39fdbbd268401b2578d0e ] Move queue allocation next to disk allocation to fix a couple of issues: - If add_disk() hasn't been called, we should clear disk->queue before calling put_disk(). - If we fail to allocate a request queue, we still need to put all of the disks, not just the ones that we allocated queues for. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/block/ataflop.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 92da886180aa1..1dacc42e2dcf4 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1935,6 +1935,11 @@ static int __init atari_floppy_init (void) unit[i].disk = alloc_disk(1); if (!unit[i].disk) goto Enomem; + + unit[i].disk->queue = blk_init_queue(do_fd_request, + &ataflop_lock); + if (!unit[i].disk->queue) + goto Enomem; } if (UseTrackbuffer < 0) @@ -1966,10 +1971,6 @@ static int __init atari_floppy_init (void) sprintf(unit[i].disk->disk_name, "fd%d", i); unit[i].disk->fops = &floppy_fops; unit[i].disk->private_data = &unit[i]; - unit[i].disk->queue = blk_init_queue(do_fd_request, - &ataflop_lock); - if (!unit[i].disk->queue) - goto Enomem; set_capacity(unit[i].disk, MAX_DISK_SIZE * 2); add_disk(unit[i].disk); } @@ -1984,13 +1985,17 @@ static int __init atari_floppy_init (void) return 0; Enomem: - while (i--) { - struct request_queue *q = unit[i].disk->queue; + do { + struct gendisk *disk = unit[i].disk; - put_disk(unit[i].disk); - if (q) - blk_cleanup_queue(q); - } + if (disk) { + if (disk->queue) { + blk_cleanup_queue(disk->queue); + disk->queue = NULL; + } + put_disk(unit[i].disk); + } + } while (i--); unregister_blkdev(FLOPPY_MAJOR, "fd"); return -ENOMEM; -- GitLab From 8ac8e0fecd5e36caf0bdb34ccb9e6dc4853c62b3 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 11 Oct 2018 12:20:41 -0700 Subject: [PATCH 1297/2269] swim: fix cleanup on setup error [ Upstream commit 1448a2a5360ae06f25e2edc61ae070dff5c0beb4 ] If we fail to allocate the request queue for a disk, we still need to free that disk, not just the previous ones. Additionally, we need to cleanup the previous request queues. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/block/swim.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index e88d50f75a4a3..58e308145e952 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -887,8 +887,17 @@ static int swim_floppy_init(struct swim_priv *swd) exit_put_disks: unregister_blkdev(FLOPPY_MAJOR, "fd"); - while (drive--) - put_disk(swd->unit[drive].disk); + do { + struct gendisk *disk = swd->unit[drive].disk; + + if (disk) { + if (disk->queue) { + blk_cleanup_queue(disk->queue); + disk->queue = NULL; + } + put_disk(disk); + } + } while (drive--); return err; } -- GitLab From fd160e8b0f078fc46bd9857b2a73b7fc4ff63544 Mon Sep 17 00:00:00 2001 From: Ryan C Goodfellow Date: Fri, 12 Oct 2018 11:09:01 -0700 Subject: [PATCH 1298/2269] nfp: devlink port split support for 1x100G CXP NIC [ Upstream commit 5948185b97fa1f83d7855e638a72982a1073ebf5 ] This commit makes it possible to use devlink to split the 100G CXP Netronome into two 40G interfaces. Currently when you ask for 2 interfaces, the math in src/nfp_devlink.c:nfp_devlink_port_split calculates that you want 5 lanes per port because for some reason eth_port.port_lanes=10 (shouldn't this be 12 for CXP?). What we really want when asking for 2 breakout interfaces is 4 lanes per port. This commit makes that happen by calculating based on 8 lanes if 10 are present. Signed-off-by: Ryan C Goodfellow Reviewed-by: Jakub Kicinski Reviewed-by: Greg Weeks Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/netronome/nfp/nfp_devlink.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index 6c9f29c2e9754..90a6c4fbc113e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -96,6 +96,7 @@ nfp_devlink_port_split(struct devlink *devlink, unsigned int port_index, { struct nfp_pf *pf = devlink_priv(devlink); struct nfp_eth_table_port eth_port; + unsigned int lanes; int ret; if (count < 2) @@ -114,8 +115,12 @@ nfp_devlink_port_split(struct devlink *devlink, unsigned int port_index, goto out; } - ret = nfp_devlink_set_lanes(pf, eth_port.index, - eth_port.port_lanes / count); + /* Special case the 100G CXP -> 2x40G split */ + lanes = eth_port.port_lanes / count; + if (eth_port.lanes == 10 && count == 2) + lanes = 8 / count; + + ret = nfp_devlink_set_lanes(pf, eth_port.index, lanes); out: mutex_unlock(&pf->lock); @@ -127,6 +132,7 @@ nfp_devlink_port_unsplit(struct devlink *devlink, unsigned int port_index) { struct nfp_pf *pf = devlink_priv(devlink); struct nfp_eth_table_port eth_port; + unsigned int lanes; int ret; mutex_lock(&pf->lock); @@ -142,7 +148,12 @@ nfp_devlink_port_unsplit(struct devlink *devlink, unsigned int port_index) goto out; } - ret = nfp_devlink_set_lanes(pf, eth_port.index, eth_port.port_lanes); + /* Special case the 100G CXP -> 2x40G unsplit */ + lanes = eth_port.port_lanes; + if (eth_port.port_lanes == 8) + lanes = 10; + + ret = nfp_devlink_set_lanes(pf, eth_port.index, lanes); out: mutex_unlock(&pf->lock); -- GitLab From 11d9f6e44a38a7096e39f8ed293746be29cb7715 Mon Sep 17 00:00:00 2001 From: Serhey Popovych Date: Tue, 9 Oct 2018 21:21:01 +0300 Subject: [PATCH 1299/2269] tun: Consistently configure generic netdev params via rtnetlink [ Upstream commit df52eab23d703142c766ac00bdb8db19d71238d0 ] Configuring generic network device parameters on tun will fail in presence of IFLA_INFO_KIND attribute in IFLA_LINKINFO nested attribute since tun_validate() always return failure. This can be visualized with following ip-link(8) command sequences: # ip link set dev tun0 group 100 # ip link set dev tun0 group 100 type tun RTNETLINK answers: Invalid argument with contrast to dummy and veth drivers: # ip link set dev dummy0 group 100 # ip link set dev dummy0 type dummy # ip link set dev veth0 group 100 # ip link set dev veth0 group 100 type veth Fix by returning zero in tun_validate() when @data is NULL that is always in case since rtnl_link_ops->maxtype is zero in tun driver. Fixes: f019a7a594d9 ("tun: Implement ip link del tunXXX") Signed-off-by: Serhey Popovych Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index e0baea2dfd3cb..7f8c7e3aa3569 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1814,6 +1814,8 @@ static void tun_setup(struct net_device *dev) static int tun_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { + if (!data) + return 0; return -EINVAL; } -- GitLab From ec5c7c48925ca01c19c47133a3e3c35de0a4aaaf Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Tue, 2 Oct 2018 10:57:52 +0200 Subject: [PATCH 1300/2269] s390/sthyi: Fix machine name validity indication [ Upstream commit b5130dc2224d1881f24224c0590c6d97f2168d6a ] When running as a level 3 guest with no host provided sthyi support sclp_ocf_cpc_name_copy() will only return zeroes. Zeroes are not a valid group name, so let's not indicate that the group name field is valid. Also the group name is not dependent on stsi, let's not return based on stsi before setting it. Fixes: 95ca2cb57985 ("KVM: s390: Add sthyi emulation") Signed-off-by: Janosch Frank Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/sthyi.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c index 395926b8c1ed4..ffba4617d1087 100644 --- a/arch/s390/kvm/sthyi.c +++ b/arch/s390/kvm/sthyi.c @@ -174,17 +174,19 @@ static void fill_hdr(struct sthyi_sctns *sctns) static void fill_stsi_mac(struct sthyi_sctns *sctns, struct sysinfo_1_1_1 *sysinfo) { + sclp_ocf_cpc_name_copy(sctns->mac.infmname); + if (*(u64 *)sctns->mac.infmname != 0) + sctns->mac.infmval1 |= MAC_NAME_VLD; + if (stsi(sysinfo, 1, 1, 1)) return; - sclp_ocf_cpc_name_copy(sctns->mac.infmname); - memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype)); memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu)); memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman)); memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq)); - sctns->mac.infmval1 |= MAC_ID_VLD | MAC_NAME_VLD; + sctns->mac.infmval1 |= MAC_ID_VLD; } static void fill_stsi_par(struct sthyi_sctns *sctns, -- GitLab From d16cd14ea370d87824744644819b21f4ea9aa481 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 21 Sep 2018 12:10:48 +0200 Subject: [PATCH 1301/2269] hwmon: (pwm-fan) Set fan speed to 0 on suspend [ Upstream commit 95dcd64bc5a27080beaa344edfe5bdcca3d2e7dc ] Technically this is not required because disabling the PWM should be enough. However, when support for atomic operations was implemented in the PWM subsystem, only actual changes to the PWM channel are applied during pwm_config(), which means that during after resume from suspend the old settings won't be applied. One possible solution is for the PWM driver to implement its own PM operations such that settings from before suspend get applied on resume. This has the disadvantage of completely ignoring any particular ordering requirements that PWM user drivers might have, so it is best to leave it up to the user drivers to apply the settings that they want at the appropriate time. Another way to solve this would be to read back the current state of the PWM at the time of resume. That way, in case the configuration was lost during suspend, applying the old settings in PWM user drivers would actually get them applied because they differ from the current settings. However, not all PWM drivers support reading the hardware state, and not all hardware may support it. The best workaround at this point seems to be to let PWM user drivers tell the PWM subsystem that the PWM is turned off by, in addition to disabling it, also setting the duty cycle to 0. This causes the resume operation to apply a configuration that is different from the current configuration, resulting in the proper state from before suspend getting restored. Signed-off-by: Thierry Reding Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/pwm-fan.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/pwm-fan.c b/drivers/hwmon/pwm-fan.c index 70cc0d134f3cd..ca250e7ac511a 100644 --- a/drivers/hwmon/pwm-fan.c +++ b/drivers/hwmon/pwm-fan.c @@ -290,9 +290,19 @@ static int pwm_fan_remove(struct platform_device *pdev) static int pwm_fan_suspend(struct device *dev) { struct pwm_fan_ctx *ctx = dev_get_drvdata(dev); + struct pwm_args args; + int ret; + + pwm_get_args(ctx->pwm, &args); + + if (ctx->pwm_value) { + ret = pwm_config(ctx->pwm, 0, args.period); + if (ret < 0) + return ret; - if (ctx->pwm_value) pwm_disable(ctx->pwm); + } + return 0; } -- GitLab From 47f29328600d0d3b761f7c1d968aaf1cdc208169 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Tue, 9 Oct 2018 13:12:00 +0200 Subject: [PATCH 1302/2269] lightnvm: pblk: fix two sleep-in-atomic-context bugs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7325b4bbe5952e3e939f15de812f2ee0c0d33ca9 ] The driver may sleep with holding a spinlock. The function call paths (from bottom to top) in Linux-4.16 are: [FUNC] nvm_dev_dma_alloc(GFP_KERNEL) drivers/lightnvm/pblk-core.c, 754: nvm_dev_dma_alloc in pblk_line_submit_smeta_io drivers/lightnvm/pblk-core.c, 1048: pblk_line_submit_smeta_io in pblk_line_init_bb drivers/lightnvm/pblk-core.c, 1434: pblk_line_init_bb in pblk_line_replace_data drivers/lightnvm/pblk-recovery.c, 980: pblk_line_replace_data in pblk_recov_l2p drivers/lightnvm/pblk-recovery.c, 976: spin_lock in pblk_recov_l2p [FUNC] bio_map_kern(GFP_KERNEL) drivers/lightnvm/pblk-core.c, 762: bio_map_kern in pblk_line_submit_smeta_io drivers/lightnvm/pblk-core.c, 1048: pblk_line_submit_smeta_io in pblk_line_init_bb drivers/lightnvm/pblk-core.c, 1434: pblk_line_init_bb in pblk_line_replace_data drivers/lightnvm/pblk-recovery.c, 980: pblk_line_replace_data in pblk_recov_l2p drivers/lightnvm/pblk-recovery.c, 976: spin_lock in pblk_recov_l2p To fix these bugs, the call to pblk_line_replace_data() is moved out of the spinlock protection. These bugs are found by my static analysis tool DSAC. Signed-off-by: Jia-Ju Bai Reviewed-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/lightnvm/pblk-recovery.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index cb556e06673e7..5d0912bf9eab2 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -1001,12 +1001,14 @@ next: } } - spin_lock(&l_mg->free_lock); if (!open_lines) { + spin_lock(&l_mg->free_lock); WARN_ON_ONCE(!test_and_clear_bit(meta_line, &l_mg->meta_bitmap)); + spin_unlock(&l_mg->free_lock); pblk_line_replace_data(pblk); } else { + spin_lock(&l_mg->free_lock); /* Allocate next line for preparation */ l_mg->data_next = pblk_line_get(pblk); if (l_mg->data_next) { @@ -1014,8 +1016,8 @@ next: l_mg->data_next->type = PBLK_LINETYPE_DATA; is_next = 1; } + spin_unlock(&l_mg->free_lock); } - spin_unlock(&l_mg->free_lock); if (is_next) { pblk_line_erase(pblk, l_mg->data_next); -- GitLab From 8515f4ea7289440a5c46be6af9d8668449956e9a Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 8 Oct 2018 11:08:47 -0700 Subject: [PATCH 1303/2269] spi: spi-ep93xx: Use dma_data_direction for ep93xx_spi_dma_{finish,prepare} [ Upstream commit a1108c7b2efb892350ba6a0e932dfd45622f4e2b ] Clang warns when one enumerated type is implicitly converted to another. drivers/spi/spi-ep93xx.c:342:62: warning: implicit conversion from enumeration type 'enum dma_transfer_direction' to different enumeration type 'enum dma_data_direction' [-Wenum-conversion] nents = dma_map_sg(chan->device->dev, sgt->sgl, sgt->nents, dir); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~ ./include/linux/dma-mapping.h:428:58: note: expanded from macro 'dma_map_sg' #define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, 0) ~~~~~~~~~~~~~~~~ ^ drivers/spi/spi-ep93xx.c:348:57: warning: implicit conversion from enumeration type 'enum dma_transfer_direction' to different enumeration type 'enum dma_data_direction' [-Wenum-conversion] dma_unmap_sg(chan->device->dev, sgt->sgl, sgt->nents, dir); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~ ./include/linux/dma-mapping.h:429:62: note: expanded from macro 'dma_unmap_sg' #define dma_unmap_sg(d, s, n, r) dma_unmap_sg_attrs(d, s, n, r, 0) ~~~~~~~~~~~~~~~~~~ ^ drivers/spi/spi-ep93xx.c:377:56: warning: implicit conversion from enumeration type 'enum dma_transfer_direction' to different enumeration type 'enum dma_data_direction' [-Wenum-conversion] dma_unmap_sg(chan->device->dev, sgt->sgl, sgt->nents, dir); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~ ./include/linux/dma-mapping.h:429:62: note: expanded from macro 'dma_unmap_sg' #define dma_unmap_sg(d, s, n, r) dma_unmap_sg_attrs(d, s, n, r, 0) ~~~~~~~~~~~~~~~~~~ ^ 3 warnings generated. dma_{,un}map_sg expect an enum of type dma_data_direction but this driver uses dma_transfer_direction for everything. Convert the driver to use dma_data_direction for these two functions. There are two places that strictly require an enum of type dma_transfer_direction: the direction member in struct dma_slave_config and the direction parameter in dmaengine_prep_slave_sg. To avoid using an explicit cast, add a simple function, ep93xx_dma_data_to_trans_dir, to safely map between the two types because they are not 1 to 1 in meaning. Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Reviewed-by: Mika Westerberg Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-ep93xx.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/spi/spi-ep93xx.c b/drivers/spi/spi-ep93xx.c index e5cc07357746a..ce28c910ee48b 100644 --- a/drivers/spi/spi-ep93xx.c +++ b/drivers/spi/spi-ep93xx.c @@ -246,6 +246,19 @@ static int ep93xx_spi_read_write(struct spi_master *master) return -EINPROGRESS; } +static enum dma_transfer_direction +ep93xx_dma_data_to_trans_dir(enum dma_data_direction dir) +{ + switch (dir) { + case DMA_TO_DEVICE: + return DMA_MEM_TO_DEV; + case DMA_FROM_DEVICE: + return DMA_DEV_TO_MEM; + default: + return DMA_TRANS_NONE; + } +} + /** * ep93xx_spi_dma_prepare() - prepares a DMA transfer * @master: SPI master @@ -257,7 +270,7 @@ static int ep93xx_spi_read_write(struct spi_master *master) */ static struct dma_async_tx_descriptor * ep93xx_spi_dma_prepare(struct spi_master *master, - enum dma_transfer_direction dir) + enum dma_data_direction dir) { struct ep93xx_spi *espi = spi_master_get_devdata(master); struct spi_transfer *xfer = master->cur_msg->state; @@ -277,9 +290,9 @@ ep93xx_spi_dma_prepare(struct spi_master *master, buswidth = DMA_SLAVE_BUSWIDTH_1_BYTE; memset(&conf, 0, sizeof(conf)); - conf.direction = dir; + conf.direction = ep93xx_dma_data_to_trans_dir(dir); - if (dir == DMA_DEV_TO_MEM) { + if (dir == DMA_FROM_DEVICE) { chan = espi->dma_rx; buf = xfer->rx_buf; sgt = &espi->rx_sgt; @@ -343,7 +356,8 @@ ep93xx_spi_dma_prepare(struct spi_master *master, if (!nents) return ERR_PTR(-ENOMEM); - txd = dmaengine_prep_slave_sg(chan, sgt->sgl, nents, dir, DMA_CTRL_ACK); + txd = dmaengine_prep_slave_sg(chan, sgt->sgl, nents, conf.direction, + DMA_CTRL_ACK); if (!txd) { dma_unmap_sg(chan->device->dev, sgt->sgl, sgt->nents, dir); return ERR_PTR(-ENOMEM); @@ -360,13 +374,13 @@ ep93xx_spi_dma_prepare(struct spi_master *master, * unmapped. */ static void ep93xx_spi_dma_finish(struct spi_master *master, - enum dma_transfer_direction dir) + enum dma_data_direction dir) { struct ep93xx_spi *espi = spi_master_get_devdata(master); struct dma_chan *chan; struct sg_table *sgt; - if (dir == DMA_DEV_TO_MEM) { + if (dir == DMA_FROM_DEVICE) { chan = espi->dma_rx; sgt = &espi->rx_sgt; } else { @@ -381,8 +395,8 @@ static void ep93xx_spi_dma_callback(void *callback_param) { struct spi_master *master = callback_param; - ep93xx_spi_dma_finish(master, DMA_MEM_TO_DEV); - ep93xx_spi_dma_finish(master, DMA_DEV_TO_MEM); + ep93xx_spi_dma_finish(master, DMA_TO_DEVICE); + ep93xx_spi_dma_finish(master, DMA_FROM_DEVICE); spi_finalize_current_transfer(master); } @@ -392,15 +406,15 @@ static int ep93xx_spi_dma_transfer(struct spi_master *master) struct ep93xx_spi *espi = spi_master_get_devdata(master); struct dma_async_tx_descriptor *rxd, *txd; - rxd = ep93xx_spi_dma_prepare(master, DMA_DEV_TO_MEM); + rxd = ep93xx_spi_dma_prepare(master, DMA_FROM_DEVICE); if (IS_ERR(rxd)) { dev_err(&master->dev, "DMA RX failed: %ld\n", PTR_ERR(rxd)); return PTR_ERR(rxd); } - txd = ep93xx_spi_dma_prepare(master, DMA_MEM_TO_DEV); + txd = ep93xx_spi_dma_prepare(master, DMA_TO_DEVICE); if (IS_ERR(txd)) { - ep93xx_spi_dma_finish(master, DMA_DEV_TO_MEM); + ep93xx_spi_dma_finish(master, DMA_FROM_DEVICE); dev_err(&master->dev, "DMA TX failed: %ld\n", PTR_ERR(txd)); return PTR_ERR(txd); } -- GitLab From 52ff94ce5134464db22720973e7773af0969b96f Mon Sep 17 00:00:00 2001 From: Sanskriti Sharma Date: Tue, 2 Oct 2018 10:29:14 -0400 Subject: [PATCH 1304/2269] perf tools: Free temporary 'sys' string in read_event_files() [ Upstream commit 1e44224fb0528b4c0cc176bde2bb31e9127eb14b ] For each system in a given pevent, read_event_files() reads in a temporary 'sys' string. Be sure to free this string before moving onto to the next system and/or leaving read_event_files(). Fixes the following coverity complaints: Error: RESOURCE_LEAK (CWE-772): tools/perf/util/trace-event-read.c:343: overwrite_var: Overwriting "sys" in "sys = read_string()" leaks the storage that "sys" points to. tools/perf/util/trace-event-read.c:353: leaked_storage: Variable "sys" going out of scope leaks the storage it points to. Signed-off-by: Sanskriti Sharma Reviewed-by: Jiri Olsa Cc: Joe Lawrence Link: http://lkml.kernel.org/r/1538490554-8161-6-git-send-email-sansharm@redhat.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/trace-event-read.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 8a9a677f75768..6bfd690d63d9f 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -350,9 +350,12 @@ static int read_event_files(struct pevent *pevent) for (x=0; x < count; x++) { size = read8(pevent); ret = read_event_file(pevent, sys, size); - if (ret) + if (ret) { + free(sys); return ret; + } } + free(sys); } return 0; } -- GitLab From 2e8e70e56265496274e0c19464fffe896009bfc0 Mon Sep 17 00:00:00 2001 From: Sanskriti Sharma Date: Tue, 2 Oct 2018 10:29:11 -0400 Subject: [PATCH 1305/2269] perf tools: Cleanup trace-event-info 'tdata' leak [ Upstream commit faedbf3fd19f2511a39397f76359e4cc6ee93072 ] Free tracing_data structure in tracing_data_get() error paths. Fixes the following coverity complaint: Error: RESOURCE_LEAK (CWE-772): leaked_storage: Variable "tdata" going out of scope leaks the storage Signed-off-by: Sanskriti Sharma Reviewed-by: Jiri Olsa Cc: Joe Lawrence Link: http://lkml.kernel.org/r/1538490554-8161-3-git-send-email-sansharm@redhat.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/trace-event-info.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index 8f3b7ef221f2f..71a5b48637075 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -533,12 +533,14 @@ struct tracing_data *tracing_data_get(struct list_head *pattrs, "/tmp/perf-XXXXXX"); if (!mkstemp(tdata->temp_file)) { pr_debug("Can't make temp file"); + free(tdata); return NULL; } temp_fd = open(tdata->temp_file, O_RDWR); if (temp_fd < 0) { pr_debug("Can't read '%s'", tdata->temp_file); + free(tdata); return NULL; } -- GitLab From 1ab8d2dbc1a07743d0e48f63ec0d0179e6b3c900 Mon Sep 17 00:00:00 2001 From: Sanskriti Sharma Date: Tue, 2 Oct 2018 10:29:10 -0400 Subject: [PATCH 1306/2269] perf strbuf: Match va_{add,copy} with va_end [ Upstream commit ce49d8436cffa9b7a6a5f110879d53e89dbc6746 ] Ensure that all code paths in strbuf_addv() call va_end() on the ap_saved copy that was made. Fixes the following coverity complaint: Error: VARARGS (CWE-237): [#def683] tools/perf/util/strbuf.c:106: missing_va_end: va_end was not called for "ap_saved". Signed-off-by: Sanskriti Sharma Reviewed-by: Jiri Olsa Cc: Joe Lawrence Link: http://lkml.kernel.org/r/1538490554-8161-2-git-send-email-sansharm@redhat.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/strbuf.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index 3d1cf5bf7f184..9005fbe0780ed 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -98,19 +98,25 @@ static int strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap) va_copy(ap_saved, ap); len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); - if (len < 0) + if (len < 0) { + va_end(ap_saved); return len; + } if (len > strbuf_avail(sb)) { ret = strbuf_grow(sb, len); - if (ret) + if (ret) { + va_end(ap_saved); return ret; + } len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap_saved); va_end(ap_saved); if (len > strbuf_avail(sb)) { pr_debug("this should not happen, your vsnprintf is broken"); + va_end(ap_saved); return -EINVAL; } } + va_end(ap_saved); return strbuf_setlen(sb, sb->len + len); } -- GitLab From b5d5f109c6d28c34c4f5469199073d4836567a17 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Mon, 8 Oct 2018 11:06:19 -0400 Subject: [PATCH 1307/2269] cpupower: Fix coredump on VMWare [ Upstream commit f69ffc5d3db8f1f03fd6d1df5930f9a1fbd787b6 ] cpupower crashes on VMWare guests. The guests have the AMD PStateDef MSR (0xC0010064 + state number) set to zero. As a result fid and did are zero and the crash occurs because of a divide by zero (cof = fid/did). This can be prevented by checking the enable bit in the PStateDef MSR before calculating cof. By doing this the value of pstate[i] remains zero and the value can be tested before displaying the active Pstates. Check the enable bit in the PstateDef register for all supported families and only print out enabled Pstates. Signed-off-by: Prarit Bhargava Cc: Shuah Khan Cc: Stafford Horne Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/power/cpupower/utils/cpufreq-info.c | 2 ++ tools/power/cpupower/utils/helpers/amd.c | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index 3e701f0e9c143..5853faa9daf3d 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -202,6 +202,8 @@ static int get_boost_mode(unsigned int cpu) printf(_(" Boost States: %d\n"), b_states); printf(_(" Total States: %d\n"), pstate_no); for (i = 0; i < pstate_no; i++) { + if (!pstates[i]) + continue; if (i < b_states) printf(_(" Pstate-Pb%d: %luMHz (boost state)" "\n"), i, pstates[i]); diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c index bb41cdd0df6bf..58d23997424d9 100644 --- a/tools/power/cpupower/utils/helpers/amd.c +++ b/tools/power/cpupower/utils/helpers/amd.c @@ -119,6 +119,11 @@ int decode_pstates(unsigned int cpu, unsigned int cpu_family, } if (read_msr(cpu, MSR_AMD_PSTATE + i, &pstate.val)) return -1; + if ((cpu_family == 0x17) && (!pstate.fam17h_bits.en)) + continue; + else if (!pstate.bits.en) + continue; + pstates[i] = get_cof(cpu_family, pstate); } *no = i; -- GitLab From f349beaa4c8f3d928a720e58fceefce4c02f0740 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Sun, 23 Sep 2018 14:39:24 -0600 Subject: [PATCH 1308/2269] mmc: sdhci-pci-o2micro: Add quirk for O2 Micro dev 0x8620 rev 0x01 [ Upstream commit 5169894982bb67486d93cc1e10151712bb86bcb6 ] This device reports SDHCI_CLOCK_INT_STABLE even though it's not ready to take SDHCI_CLOCK_CARD_EN. The symptom is that reading SDHCI_CLOCK_CONTROL after enabling the clock shows absence of the bit from the register (e.g. expecting 0x0000fa07 = 0x0000fa03 | SDHCI_CLOCK_CARD_EN but only observed the first operand). mmc1: Timeout waiting for hardware cmd interrupt. mmc1: sdhci: ============ SDHCI REGISTER DUMP =========== mmc1: sdhci: Sys addr: 0x00000000 | Version: 0x00000603 mmc1: sdhci: Blk size: 0x00000000 | Blk cnt: 0x00000000 mmc1: sdhci: Argument: 0x00000000 | Trn mode: 0x00000000 mmc1: sdhci: Present: 0x01ff0001 | Host ctl: 0x00000001 mmc1: sdhci: Power: 0x0000000f | Blk gap: 0x00000000 mmc1: sdhci: Wake-up: 0x00000000 | Clock: 0x0000fa03 mmc1: sdhci: Timeout: 0x00000000 | Int stat: 0x00000000 mmc1: sdhci: Int enab: 0x00ff0083 | Sig enab: 0x00ff0083 mmc1: sdhci: AC12 err: 0x00000000 | Slot int: 0x00000000 mmc1: sdhci: Caps: 0x25fcc8bf | Caps_1: 0x00002077 mmc1: sdhci: Cmd: 0x00000000 | Max curr: 0x005800c8 mmc1: sdhci: Resp[0]: 0x00000000 | Resp[1]: 0x00000000 mmc1: sdhci: Resp[2]: 0x00000000 | Resp[3]: 0x00000000 mmc1: sdhci: Host ctl2: 0x00000008 mmc1: sdhci: ADMA Err: 0x00000000 | ADMA Ptr: 0x00000000 mmc1: sdhci: ============================================ The problem happens during wakeup from S3. Adding a delay quirk after power up reliably fixes the problem. Signed-off-by: Yu Zhao Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-pci-o2micro.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c index 14273ca006419..44a809a20d3af 100644 --- a/drivers/mmc/host/sdhci-pci-o2micro.c +++ b/drivers/mmc/host/sdhci-pci-o2micro.c @@ -334,6 +334,9 @@ int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch); break; case PCI_DEVICE_ID_O2_SEABIRD0: + if (chip->pdev->revision == 0x01) + chip->quirks |= SDHCI_QUIRK_DELAY_AFTER_POWER; + /* fall through */ case PCI_DEVICE_ID_O2_SEABIRD1: /* UnLock WP */ ret = pci_read_config_byte(chip->pdev, -- GitLab From 709c3305511b234c57710461a67d96fa202b6d03 Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Wed, 6 Jun 2018 17:20:58 +0300 Subject: [PATCH 1309/2269] iwlwifi: pcie: avoid empty free RB queue [ Upstream commit 868a1e863f95183f00809363fefba6d4f5bcd116 ] If all free RB queues are empty, the driver will never restock the free RB queue. That's because the restocking happens in the Rx flow, and if the free queue is empty there will be no Rx. Although there's a background worker (a.k.a. allocator) allocating memory for RBs so that the Rx handler can restock them, the worker may run only after the free queue has become empty (and then it is too late for restocking as explained above). There is a solution for that called 'emergency': If the number of used RB's reaches half the amount of all RB's, the Rx handler will not wait for the allocator but immediately allocate memory for the used RB's and restock the free queue. But, since the used RB's is per queue, it may happen that the used RB's are spread between the queues such that the emergency check will fail for each of the queues (and still run out of RBs, causing the above symptom). To fix it, move to emergency mode if the sum of *all* used RBs (for all Rx queues) reaches half the amount of all RB's Signed-off-by: Shaul Triebitz Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 32 +++++++++++++------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index ca99c3cf41c21..5a15362ef6719 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -1049,6 +1049,14 @@ void iwl_pcie_rx_free(struct iwl_trans *trans) kfree(trans_pcie->rxq); } +static void iwl_pcie_rx_move_to_allocator(struct iwl_rxq *rxq, + struct iwl_rb_allocator *rba) +{ + spin_lock(&rba->lock); + list_splice_tail_init(&rxq->rx_used, &rba->rbd_empty); + spin_unlock(&rba->lock); +} + /* * iwl_pcie_rx_reuse_rbd - Recycle used RBDs * @@ -1080,9 +1088,7 @@ static void iwl_pcie_rx_reuse_rbd(struct iwl_trans *trans, if ((rxq->used_count % RX_CLAIM_REQ_ALLOC) == RX_POST_REQ_ALLOC) { /* Move the 2 RBDs to the allocator ownership. Allocator has another 6 from pool for the request completion*/ - spin_lock(&rba->lock); - list_splice_tail_init(&rxq->rx_used, &rba->rbd_empty); - spin_unlock(&rba->lock); + iwl_pcie_rx_move_to_allocator(rxq, rba); atomic_inc(&rba->req_pending); queue_work(rba->alloc_wq, &rba->rx_alloc); @@ -1260,10 +1266,18 @@ restart: IWL_DEBUG_RX(trans, "Q %d: HW = SW = %d\n", rxq->id, r); while (i != r) { + struct iwl_rb_allocator *rba = &trans_pcie->rba; struct iwl_rx_mem_buffer *rxb; - - if (unlikely(rxq->used_count == rxq->queue_size / 2)) + /* number of RBDs still waiting for page allocation */ + u32 rb_pending_alloc = + atomic_read(&trans_pcie->rba.req_pending) * + RX_CLAIM_REQ_ALLOC; + + if (unlikely(rb_pending_alloc >= rxq->queue_size / 2 && + !emergency)) { + iwl_pcie_rx_move_to_allocator(rxq, rba); emergency = true; + } if (trans->cfg->mq_rx_supported) { /* @@ -1306,17 +1320,13 @@ restart: iwl_pcie_rx_allocator_get(trans, rxq); if (rxq->used_count % RX_CLAIM_REQ_ALLOC == 0 && !emergency) { - struct iwl_rb_allocator *rba = &trans_pcie->rba; - /* Add the remaining empty RBDs for allocator use */ - spin_lock(&rba->lock); - list_splice_tail_init(&rxq->rx_used, &rba->rbd_empty); - spin_unlock(&rba->lock); + iwl_pcie_rx_move_to_allocator(rxq, rba); } else if (emergency) { count++; if (count == 8) { count = 0; - if (rxq->used_count < rxq->queue_size / 3) + if (rb_pending_alloc < rxq->queue_size / 3) emergency = false; rxq->read = i; -- GitLab From 371075886a394e8c8e9cc5177e4cadb0f9e2ae17 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 13 Jun 2018 11:49:20 +0300 Subject: [PATCH 1310/2269] iwlwifi: mvm: clear HW_RESTART_REQUESTED when stopping the interface [ Upstream commit 155f7e0441cd121b1e673d465a35e99f4b9b2f0b ] Fix a bug that happens in the following scenario: 1) suspend without WoWLAN 2) mac80211 calls drv_stop because of the suspend 3) __iwl_mvm_mac_stop deallocates the aux station 4) during drv_stop the firmware crashes 5) iwlmvm: * sets IWL_MVM_STATUS_HW_RESTART_REQUESTED * asks mac80211 to kick the restart flow 6) mac80211 puts the restart worker into a freezable queue which means that the worker will not run for now since the workqueue is already frozen 7) ... 8) resume 9) mac80211 runs ieee80211_reconfig as part of the resume 10) mac80211 detects that a restart flow has been requested and that we are now resuming from suspend and cancels the restart worker 11) mac80211 calls drv_start() 12) __iwl_mvm_mac_start checks that IWL_MVM_STATUS_HW_RESTART_REQUESTED clears it, sets IWL_MVM_STATUS_IN_HW_RESTART and calls iwl_mvm_restart_cleanup() 13) iwl_fw_error_dump gets called and accesses the device to get debug data 14) iwl_mvm_up adds the aux station 15) iwl_mvm_add_aux_sta() allocates an internal station for the aux station 16) iwl_mvm_allocate_int_sta() tests IWL_MVM_STATUS_IN_HW_RESTART and doesn't really allocate a station ID for the aux station 17) a new queue is added for the aux station Note that steps from 5 to 9 aren't really part of the problem but were described for the sake of completeness. Once the iwl_mvm_mac_stop() is called, the device is not accessible, meaning that step 12) can't succeed and we'll see the following: drivers/net/wireless/intel/iwlwifi/pcie/trans.c:2122 iwl_trans_pcie_grab_nic_access+0xc0/0x1d6 [iwlwifi]() Timeout waiting for hardware access (CSR_GP_CNTRL 0x080403d8) Call Trace: [] iwl_trans_pcie_grab_nic_access+0xc0/0x1d6 [iwlwifi] [] iwl_trans_pcie_dump_regs+0x3fd/0x3fd [iwlwifi] [] iwl_fw_error_dump+0x4f5/0xe8b [iwlwifi] [] __iwl_mvm_mac_start+0x5a/0x21a [iwlmvm] [] iwl_mvm_mac_start+0xd4/0x103 [iwlmvm] [] drv_start+0xa1/0xc5 [iwl7000_mac80211] [] ieee80211_reconfig+0x145/0xf50 [mac80211] [] ieee80211_resume+0x62/0x66 [mac80211] [] wiphy_resume+0xa9/0xc6 [cfg80211] The station id of the aux station is set to 0xff in step 3 and because we don't really allocate a new station id for the auxliary station (as explained in 16), we end up sending a command to the firmware asking to connect the queue to station id 0xff. This makes the firmware crash with the following information: 0x00002093 | ADVANCED_SYSASSERT 0x000002F0 | trm_hw_status0 0x00000000 | trm_hw_status1 0x00000B38 | branchlink2 0x0001978C | interruptlink1 0x00000000 | interruptlink2 0xFF080501 | data1 0xDEADBEEF | data2 0xDEADBEEF | data3 Firmware error during reconfiguration - reprobe! FW error in SYNC CMD SCD_QUEUE_CFG Fix this by clearing IWL_MVM_STATUS_HW_RESTART_REQUESTED in iwl_mvm_mac_stop(). We won't be able to collect debug data anyway and when we will brought up again, we will have a clean state from the firmware perspective. Since we won't have IWL_MVM_STATUS_IN_HW_RESTART set in step 12) we won't get to the 2093 ASSERT either. Fixes: bf8b286f86fc ("iwlwifi: mvm: defer setting IWL_MVM_STATUS_IN_HW_RESTART") Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index db1fab9aa1c65..80a653950e86b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -1225,12 +1225,15 @@ void __iwl_mvm_mac_stop(struct iwl_mvm *mvm) iwl_mvm_del_aux_sta(mvm); /* - * Clear IN_HW_RESTART flag when stopping the hw (as restart_complete() - * won't be called in this case). + * Clear IN_HW_RESTART and HW_RESTART_REQUESTED flag when stopping the + * hw (as restart_complete() won't be called in this case) and mac80211 + * won't execute the restart. * But make sure to cleanup interfaces that have gone down before/during * HW restart was requested. */ - if (test_and_clear_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) + if (test_and_clear_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) || + test_and_clear_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, + &mvm->status)) ieee80211_iterate_interfaces(mvm->hw, 0, iwl_mvm_cleanup_iterator, mvm); -- GitLab From 0d7a51786faf11362a9d9b3841f9d9ffe5b65c24 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Thu, 4 Oct 2018 18:08:08 +0200 Subject: [PATCH 1311/2269] x86/olpc: Indicate that legacy PC XO-1 platform should not register RTC [ Upstream commit d92116b800fb79a72ad26121f5011f6aa3ad94c2 ] On OLPC XO-1, the RTC is discovered via device tree from the arch initcall. Don't let the PC platform register another one from its device initcall, it's not going to work: sysfs: cannot create duplicate filename '/devices/platform/rtc_cmos' CPU: 0 PID: 1 Comm: swapper Not tainted 4.19.0-rc6 #12 Hardware name: OLPC XO/XO, BIOS OLPC Ver 1.00.01 06/11/2014 Call Trace: dump_stack+0x16/0x18 sysfs_warn_dup+0x46/0x58 sysfs_create_dir_ns+0x76/0x9b kobject_add_internal+0xed/0x209 ? __schedule+0x3fa/0x447 kobject_add+0x5b/0x66 device_add+0x298/0x535 ? insert_resource_conflict+0x2a/0x3e platform_device_add+0x14d/0x192 ? io_delay_init+0x19/0x19 platform_device_register+0x1c/0x1f add_rtc_cmos+0x16/0x31 do_one_initcall+0x78/0x14a ? do_early_param+0x75/0x75 kernel_init_freeable+0x152/0x1e0 ? rest_init+0xa2/0xa2 kernel_init+0x8/0xd5 ret_from_fork+0x2e/0x38 kobject_add_internal failed for rtc_cmos with -EEXIST, don't try to register things with the same name in the same directory. platform rtc_cmos: registered platform RTC device (no PNP device found) Signed-off-by: Lubomir Rintel Signed-off-by: Borislav Petkov Acked-by: Thomas Gleixner CC: "H. Peter Anvin" CC: Ingo Molnar CC: x86-ml Link: http://lkml.kernel.org/r/20181004160808.307738-1-lkundrak@v3.sk Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/platform/olpc/olpc-xo1-rtc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/platform/olpc/olpc-xo1-rtc.c b/arch/x86/platform/olpc/olpc-xo1-rtc.c index a2b4efddd61a5..8e7ddd7e313a4 100644 --- a/arch/x86/platform/olpc/olpc-xo1-rtc.c +++ b/arch/x86/platform/olpc/olpc-xo1-rtc.c @@ -16,6 +16,7 @@ #include #include +#include static void rtc_wake_on(struct device *dev) { @@ -75,6 +76,8 @@ static int __init xo1_rtc_init(void) if (r) return r; + x86_platform.legacy.rtc = 0; + device_init_wakeup(&xo1_rtc_device.dev, 1); return 0; } -- GitLab From 85e20b47dec298b569de62cce4b1762f647b28e7 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Fri, 24 Aug 2018 10:51:26 +0800 Subject: [PATCH 1312/2269] ACPI / processor: Fix the return value of acpi_processor_ids_walk() [ Upstream commit d0381bf4f80c571dde1244fe5b85dc35e8b3f546 ] ACPI driver should make sure all the processor IDs in their ACPI Namespace are unique. the driver performs a depth-first walk of the namespace tree and calls the acpi_processor_ids_walk() to check the duplicate IDs. But, the acpi_processor_ids_walk() mistakes the return value. If a processor is checked, it returns true which causes the walk break immediately, and other processors will never be checked. Repace the value with AE_OK which is the standard acpi_status value. And don't abort the namespace walk even on error. Fixes: 8c8cb30f49b8 (acpi/processor: Implement DEVICE operator for processor enumeration) Signed-off-by: Dou Liyang Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_processor.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 86c10599d9f83..ccf07674a2a09 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -642,7 +642,7 @@ static acpi_status __init acpi_processor_ids_walk(acpi_handle handle, status = acpi_get_type(handle, &acpi_type); if (ACPI_FAILURE(status)) - return false; + return status; switch (acpi_type) { case ACPI_TYPE_PROCESSOR: @@ -662,11 +662,12 @@ static acpi_status __init acpi_processor_ids_walk(acpi_handle handle, } processor_validated_ids_update(uid); - return true; + return AE_OK; err: + /* Exit on error, but don't abort the namespace walk */ acpi_handle_info(handle, "Invalid processor object\n"); - return false; + return AE_OK; } -- GitLab From bd8dfdcc2249b3bcfd33bdb547e392c4aca134f7 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 3 Oct 2018 15:35:21 +0530 Subject: [PATCH 1313/2269] cpufreq: dt: Try freeing static OPPs only if we have added them [ Upstream commit 51c99dd2c06b234575661fa1e0a1dea6c3ef566f ] We can not call dev_pm_opp_of_cpumask_remove_table() freely anymore since the latest OPP core updates as that uses reference counting to free resources. There are cases where no static OPPs are added (using DT) for a platform and trying to remove the OPP table may end up decrementing refcount which is already zero and hence generating warnings. Lets track if we were able to add static OPPs or not and then only remove the table based on that. Some reshuffling of code is also done to do that. Reported-by: Niklas Cassel Tested-by: Niklas Cassel Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq-dt.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index d83ab94d041a8..ca6ee9f389b6c 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -32,6 +32,7 @@ struct private_data { struct device *cpu_dev; struct thermal_cooling_device *cdev; const char *reg_name; + bool have_static_opps; }; static struct freq_attr *cpufreq_dt_attr[] = { @@ -196,6 +197,15 @@ static int cpufreq_init(struct cpufreq_policy *policy) } } + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) { + ret = -ENOMEM; + goto out_put_regulator; + } + + priv->reg_name = name; + priv->opp_table = opp_table; + /* * Initialize OPP tables for all policy->cpus. They will be shared by * all CPUs which have marked their CPUs shared with OPP bindings. @@ -206,7 +216,8 @@ static int cpufreq_init(struct cpufreq_policy *policy) * * OPPs might be populated at runtime, don't check for error here */ - dev_pm_opp_of_cpumask_add_table(policy->cpus); + if (!dev_pm_opp_of_cpumask_add_table(policy->cpus)) + priv->have_static_opps = true; /* * But we need OPP table to function so if it is not there let's @@ -232,19 +243,10 @@ static int cpufreq_init(struct cpufreq_policy *policy) __func__, ret); } - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) { - ret = -ENOMEM; - goto out_free_opp; - } - - priv->reg_name = name; - priv->opp_table = opp_table; - ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); if (ret) { dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret); - goto out_free_priv; + goto out_free_opp; } priv->cpu_dev = cpu_dev; @@ -280,10 +282,11 @@ static int cpufreq_init(struct cpufreq_policy *policy) out_free_cpufreq_table: dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table); -out_free_priv: - kfree(priv); out_free_opp: - dev_pm_opp_of_cpumask_remove_table(policy->cpus); + if (priv->have_static_opps) + dev_pm_opp_of_cpumask_remove_table(policy->cpus); + kfree(priv); +out_put_regulator: if (name) dev_pm_opp_put_regulators(opp_table); out_put_clk: @@ -298,7 +301,8 @@ static int cpufreq_exit(struct cpufreq_policy *policy) cpufreq_cooling_unregister(priv->cdev); dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); - dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); + if (priv->have_static_opps) + dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); if (priv->reg_name) dev_pm_opp_put_regulators(priv->opp_table); -- GitLab From 2d2b73cf0bfcac058789e0edd4fbe83d8b26bb14 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 18 Sep 2018 08:55:55 -0500 Subject: [PATCH 1314/2269] mtd: rawnand: atmel: Fix potential NULL pointer dereference [ Upstream commit fbed20280d912449cfb40c382cb55e3d11502587 ] There is a potential execution path in which function of_find_compatible_node() returns NULL. In such a case, we end up having a NULL pointer dereference when accessing pointer *nfc_np* in function of_clk_get(). So, we better don't take any chances and fix this by null checking pointer *nfc_np* before calling of_clk_get(). Addresses-Coverity-ID: 1473052 ("Dereference null return value") Fixes: f88fc122cc34 ("mtd: nand: Cleanup/rework the atmel_nand driver") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Boris Brezillon Acked-by: Tudor Ambarus Signed-off-by: Miquel Raynal Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/atmel/nand-controller.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mtd/nand/atmel/nand-controller.c b/drivers/mtd/nand/atmel/nand-controller.c index 148744418e82b..32a2f947a4541 100644 --- a/drivers/mtd/nand/atmel/nand-controller.c +++ b/drivers/mtd/nand/atmel/nand-controller.c @@ -2079,6 +2079,10 @@ atmel_hsmc_nand_controller_legacy_init(struct atmel_hsmc_nand_controller *nc) nand_np = dev->of_node; nfc_np = of_find_compatible_node(dev->of_node, NULL, "atmel,sama5d3-nfc"); + if (!nfc_np) { + dev_err(dev, "Could not find device node for sama5d3-nfc\n"); + return -ENODEV; + } nc->clk = of_clk_get(nfc_np, 0); if (IS_ERR(nc->clk)) { -- GitLab From b66d0bb19f24702ca3b9ac8dc6f570d5be90ec7d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 5 Sep 2018 15:34:42 +0100 Subject: [PATCH 1315/2269] signal: Introduce COMPAT_SIGMINSTKSZ for use in compat_sys_sigaltstack [ Upstream commit 22839869f21ab3850fbbac9b425ccc4c0023926f ] The sigaltstack(2) system call fails with -ENOMEM if the new alternative signal stack is found to be smaller than SIGMINSTKSZ. On architectures such as arm64, where the native value for SIGMINSTKSZ is larger than the compat value, this can result in an unexpected error being reported to a compat task. See, for example: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=904385 This patch fixes the problem by extending do_sigaltstack to take the minimum signal stack size as an additional parameter, allowing the native and compat system call entry code to pass in their respective values. COMPAT_SIGMINSTKSZ is just defined as SIGMINSTKSZ if it has not been defined by the architecture. Cc: Arnd Bergmann Cc: Dominik Brodowski Cc: "Eric W. Biederman" Cc: Andrew Morton Cc: Al Viro Cc: Oleg Nesterov Reported-by: Steve McIntyre Tested-by: Steve McIntyre <93sam@debian.org> Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/compat.h | 3 +++ kernel/signal.c | 14 +++++++++----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/include/linux/compat.h b/include/linux/compat.h index 3e838a828459d..23909d12f7292 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -68,6 +68,9 @@ typedef struct compat_sigaltstack { compat_size_t ss_size; } compat_stack_t; #endif +#ifndef COMPAT_MINSIGSTKSZ +#define COMPAT_MINSIGSTKSZ MINSIGSTKSZ +#endif #define compat_jiffies_to_clock_t(x) \ (((unsigned long)(x) * COMPAT_USER_HZ) / HZ) diff --git a/kernel/signal.c b/kernel/signal.c index 4439ba9dc5d9e..b74acbec98762 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3215,7 +3215,8 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) } static int -do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp) +do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp, + size_t min_ss_size) { struct task_struct *t = current; @@ -3245,7 +3246,7 @@ do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp) ss_size = 0; ss_sp = NULL; } else { - if (unlikely(ss_size < MINSIGSTKSZ)) + if (unlikely(ss_size < min_ss_size)) return -ENOMEM; } @@ -3263,7 +3264,8 @@ SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss) if (uss && copy_from_user(&new, uss, sizeof(stack_t))) return -EFAULT; err = do_sigaltstack(uss ? &new : NULL, uoss ? &old : NULL, - current_user_stack_pointer()); + current_user_stack_pointer(), + MINSIGSTKSZ); if (!err && uoss && copy_to_user(uoss, &old, sizeof(stack_t))) err = -EFAULT; return err; @@ -3274,7 +3276,8 @@ int restore_altstack(const stack_t __user *uss) stack_t new; if (copy_from_user(&new, uss, sizeof(stack_t))) return -EFAULT; - (void)do_sigaltstack(&new, NULL, current_user_stack_pointer()); + (void)do_sigaltstack(&new, NULL, current_user_stack_pointer(), + MINSIGSTKSZ); /* squash all but EFAULT for now */ return 0; } @@ -3309,7 +3312,8 @@ COMPAT_SYSCALL_DEFINE2(sigaltstack, uss.ss_size = uss32.ss_size; } ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, - compat_user_stack_pointer()); + compat_user_stack_pointer(), + COMPAT_MINSIGSTKSZ); if (ret >= 0 && uoss_ptr) { compat_stack_t old; memset(&old, 0, sizeof(old)); -- GitLab From 71ef2fef4a394fb40bf0087f2ddaa5d035355d30 Mon Sep 17 00:00:00 2001 From: Christian Hewitt Date: Tue, 4 Sep 2018 21:50:57 +0400 Subject: [PATCH 1316/2269] Bluetooth: btbcm: Add entry for BCM4335C0 UART bluetooth [ Upstream commit a357ea098c9605f60d92a66a9073f56ce25726da ] This patch adds the device ID for the AMPAK AP6335 combo module used in the 1st generation WeTek Hub Android/LibreELEC HTPC box. The WiFI chip identifies itself as BCM4339, while Bluetooth identifies itself as BCM4335 (rev C0): ``` [ 4.864248] Bluetooth: hci0: BCM: chip id 86 [ 4.866388] Bluetooth: hci0: BCM: features 0x2f [ 4.889317] Bluetooth: hci0: BCM4335C0 [ 4.889332] Bluetooth: hci0: BCM4335C0 (003.001.009) build 0000 [ 9.778383] Bluetooth: hci0: BCM4335C0 (003.001.009) build 0268 ``` Output from hciconfig: ``` hci0: Type: Primary Bus: UART BD Address: 43:39:00:00:1F:AC ACL MTU: 1021:8 SCO MTU: 64:1 UP RUNNING RX bytes:7567 acl:234 sco:0 events:386 errors:0 TX bytes:53844 acl:77 sco:0 commands:304 errors:0 Features: 0xbf 0xfe 0xcf 0xfe 0xdb 0xff 0x7b 0x87 Packet type: DM1 DM3 DM5 DH1 DH3 DH5 HV1 HV2 HV3 Link policy: RSWITCH SNIFF Link mode: SLAVE ACCEPT Name: 'HUB' Class: 0x0c0000 Service Classes: Rendering, Capturing Device Class: Miscellaneous, HCI Version: 4.0 (0x6) Revision: 0x10c LMP Version: 4.0 (0x6) Subversion: 0x6109 Manufacturer: Broadcom Corporation (15) ``` Signed-off-by: Christian Hewitt Signed-off-by: Marcel Holtmann Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btbcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c index cc4bdefa6648b..67315cb28826b 100644 --- a/drivers/bluetooth/btbcm.c +++ b/drivers/bluetooth/btbcm.c @@ -325,6 +325,7 @@ static const struct { { 0x4103, "BCM4330B1" }, /* 002.001.003 */ { 0x410e, "BCM43341B0" }, /* 002.001.014 */ { 0x4406, "BCM4324B3" }, /* 002.004.006 */ + { 0x6109, "BCM4335C0" }, /* 003.001.009 */ { 0x610c, "BCM4354" }, /* 003.001.012 */ { 0x2209, "BCM43430A1" }, /* 001.002.009 */ { } -- GitLab From fcc506cab03fee33a12ab0558bf6c53b897f9339 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 16 Sep 2018 16:22:47 +0100 Subject: [PATCH 1317/2269] x86: boot: Fix EFI stub alignment [ Upstream commit 9c1442a9d039a1a3302fa93e9a11001c5f23b624 ] We currently align the end of the compressed image to a multiple of 16. However, the PE-COFF header included in the EFI stub says that the file alignment is 32 bytes, and when adding an EFI signature to the file it must first be padded to this alignment. sbsigntool commands warn about this: warning: file-aligned section .text extends beyond end of file warning: checksum areas are greater than image size. Invalid section table? Worse, pesign -at least when creating a detached signature- uses the hash of the unpadded file, resulting in an invalid signature if padding is required. Avoid both these problems by increasing alignment to 32 bytes when CONFIG_EFI_STUB is enabled. Signed-off-by: Ben Hutchings Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/tools/build.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index d4e6cd4577e5d..bf0e824003584 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -391,6 +391,13 @@ int main(int argc, char ** argv) die("Unable to mmap '%s': %m", argv[2]); /* Number of 16-byte paragraphs, including space for a 4-byte CRC */ sys_size = (sz + 15 + 4) / 16; +#ifdef CONFIG_EFI_STUB + /* + * COFF requires minimum 32-byte alignment of sections, and + * adding a signature is problematic without that alignment. + */ + sys_size = (sys_size + 1) & ~1; +#endif /* Patch the setup code with the appropriate size parameters */ buf[0x1f1] = setup_sectors-1; -- GitLab From 5c27d0536adc684122587ce39467d450ecdf23b0 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 20 Sep 2018 01:58:18 +0000 Subject: [PATCH 1318/2269] pinctrl: qcom: spmi-mpp: Fix err handling of pmic_mpp_set_mux [ Upstream commit 69f8455f6cc78fa6cdf80d0105d7a748106271dc ] 'ret' should be returned while pmic_mpp_write_mode_ctl fails. Fixes: 0e948042c420 ("pinctrl: qcom: spmi-mpp: Implement support for sink mode") Signed-off-by: YueHaibing Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/qcom/pinctrl-spmi-mpp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c index 6556dbeae65ef..418bc40df7ca0 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c @@ -319,6 +319,8 @@ static int pmic_mpp_set_mux(struct pinctrl_dev *pctldev, unsigned function, pad->function = function; ret = pmic_mpp_write_mode_ctl(state, pad); + if (ret < 0) + return ret; val = pad->is_enabled << PMIC_MPP_REG_MASTER_EN_SHIFT; -- GitLab From 4484de1283a7808c9a7ba55dc3841a44771a5db0 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Wed, 5 Sep 2018 09:48:58 +0200 Subject: [PATCH 1319/2269] brcmfmac: fix for proper support of 160MHz bandwidth [ Upstream commit 330994e8e8ec5d0b269a5265e6032b37e29aa336 ] Decoding of firmware channel information was not complete for 160MHz support. This resulted in the following warning: WARNING: CPU: 2 PID: 2222 at .../broadcom/brcm80211/brcmutil/d11.c:196 brcmu_d11ac_decchspec+0x2e/0x100 [brcmutil] Modules linked in: brcmfmac(O) brcmutil(O) sha256_generic cfg80211 ... CPU: 2 PID: 2222 Comm: kworker/2:0 Tainted: G O 4.17.0-wt-testing-x64-00002-gf1bed50 #1 Hardware name: Dell Inc. Latitude E6410/07XJP9, BIOS A07 02/15/2011 Workqueue: events request_firmware_work_func RIP: 0010:brcmu_d11ac_decchspec+0x2e/0x100 [brcmutil] RSP: 0018:ffffc90000047bd0 EFLAGS: 00010206 RAX: 000000000000e832 RBX: ffff8801146fe910 RCX: ffff8801146fd3c0 RDX: 0000000000002800 RSI: 0000000000000070 RDI: ffffc90000047c30 RBP: ffffc90000047bd0 R08: 0000000000000000 R09: ffffffffa0798c80 R10: ffff88012bca55e0 R11: ffff880110a4ea00 R12: ffff8801146f8000 R13: ffffc90000047c30 R14: ffff8801146fe930 R15: ffff8801138e02e0 FS: 0000000000000000(0000) GS:ffff88012bc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f18ce8b8070 CR3: 000000000200a003 CR4: 00000000000206e0 Call Trace: brcmf_setup_wiphybands+0x212/0x780 [brcmfmac] brcmf_cfg80211_attach+0xae2/0x11a0 [brcmfmac] brcmf_attach+0x1fc/0x4b0 [brcmfmac] ? __kmalloc+0x13c/0x1c0 brcmf_pcie_setup+0x99b/0xe00 [brcmfmac] brcmf_fw_request_done+0x16a/0x1f0 [brcmfmac] request_firmware_work_func+0x36/0x60 process_one_work+0x146/0x350 worker_thread+0x4a/0x3b0 kthread+0x102/0x140 ? process_one_work+0x350/0x350 ? kthread_bind+0x20/0x20 ret_from_fork+0x35/0x40 Code: 66 90 0f b7 07 55 48 89 e5 89 c2 88 47 02 88 47 03 66 81 e2 00 38 66 81 fa 00 18 74 6e 66 81 fa 00 20 74 39 66 81 fa 00 10 74 14 <0f> 0b 66 25 00 c0 74 20 66 3d 00 c0 75 20 c6 47 04 01 5d c3 66 ---[ end trace 550c46682415b26d ]--- brcmfmac: brcmf_construct_chaninfo: Ignoring unexpected firmware channel 50 This patch adds the missing stuff to properly handle this. Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- .../broadcom/brcm80211/brcmutil/d11.c | 34 ++++++++++++++++++- .../broadcom/brcm80211/include/brcmu_wifi.h | 2 ++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c b/drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c index d8b79cb72b58d..e7584b842dce4 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c @@ -77,6 +77,8 @@ static u16 d11ac_bw(enum brcmu_chan_bw bw) return BRCMU_CHSPEC_D11AC_BW_40; case BRCMU_CHAN_BW_80: return BRCMU_CHSPEC_D11AC_BW_80; + case BRCMU_CHAN_BW_160: + return BRCMU_CHSPEC_D11AC_BW_160; default: WARN_ON(1); } @@ -190,8 +192,38 @@ static void brcmu_d11ac_decchspec(struct brcmu_chan *ch) break; } break; - case BRCMU_CHSPEC_D11AC_BW_8080: case BRCMU_CHSPEC_D11AC_BW_160: + switch (ch->sb) { + case BRCMU_CHAN_SB_LLL: + ch->control_ch_num -= CH_70MHZ_APART; + break; + case BRCMU_CHAN_SB_LLU: + ch->control_ch_num -= CH_50MHZ_APART; + break; + case BRCMU_CHAN_SB_LUL: + ch->control_ch_num -= CH_30MHZ_APART; + break; + case BRCMU_CHAN_SB_LUU: + ch->control_ch_num -= CH_10MHZ_APART; + break; + case BRCMU_CHAN_SB_ULL: + ch->control_ch_num += CH_10MHZ_APART; + break; + case BRCMU_CHAN_SB_ULU: + ch->control_ch_num += CH_30MHZ_APART; + break; + case BRCMU_CHAN_SB_UUL: + ch->control_ch_num += CH_50MHZ_APART; + break; + case BRCMU_CHAN_SB_UUU: + ch->control_ch_num += CH_70MHZ_APART; + break; + default: + WARN_ON_ONCE(1); + break; + } + break; + case BRCMU_CHSPEC_D11AC_BW_8080: default: WARN_ON_ONCE(1); break; diff --git a/drivers/net/wireless/broadcom/brcm80211/include/brcmu_wifi.h b/drivers/net/wireless/broadcom/brcm80211/include/brcmu_wifi.h index 7b9a77981df16..75b2a0438cfa7 100644 --- a/drivers/net/wireless/broadcom/brcm80211/include/brcmu_wifi.h +++ b/drivers/net/wireless/broadcom/brcm80211/include/brcmu_wifi.h @@ -29,6 +29,8 @@ #define CH_UPPER_SB 0x01 #define CH_LOWER_SB 0x02 #define CH_EWA_VALID 0x04 +#define CH_70MHZ_APART 14 +#define CH_50MHZ_APART 10 #define CH_30MHZ_APART 6 #define CH_20MHZ_APART 4 #define CH_10MHZ_APART 2 -- GitLab From 4c2f34ab8315299f8a4e43cb70bf981ef296d7bf Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 19 Sep 2018 11:39:31 +0200 Subject: [PATCH 1320/2269] net: phy: phylink: ensure the carrier is off when starting phylink [ Upstream commit aeeb2e8fdefdd5d257a1446351c70cb3df540199 ] Phylink made an assumption about the carrier state being down when calling phylink_start(). If this assumption isn't satisfied, the internal phylink state could misbehave and a net device could end up not being functional. This patch fixes this by explicitly calling netif_carrier_off() in phylink_start(). Signed-off-by: Antoine Tenart Acked-by: Russell King Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phylink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 79f28b9186c69..70ce7da26d1ff 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -747,6 +747,9 @@ void phylink_start(struct phylink *pl) phylink_an_mode_str(pl->link_an_mode), phy_modes(pl->link_config.interface)); + /* Always set the carrier off */ + netif_carrier_off(pl->netdev); + /* Apply the link configuration to the MAC when starting. This allows * a fixed-link to start with the correct parameters, and also * ensures that we set the appropriate advertisment for Serdes links. -- GitLab From d4d73f1fd8ec1f7c0581f177154a36b5acebb4f1 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Fri, 14 Sep 2018 16:23:07 +0200 Subject: [PATCH 1321/2269] block, bfq: correctly charge and reset entity service in all cases [ Upstream commit cbeb869a3d1110450186b738199963c5e68c2a71 ] BFQ schedules entities (which represent either per-process queues or groups of queues) as a function of their timestamps. In particular, as a function of their (virtual) finish times. The finish time of an entity is computed as a function of the budget assigned to the entity, assuming, tentatively, that the entity, once in service, will receive an amount of service equal to its budget. Then, when the entity is expired because it finishes to be served, this finish time is updated as a function of the actual service received by the entity. This allows the entity to be correctly charged with only the service received, and then to be correctly re-scheduled. Yet an entity may receive service also while not being the entity in service (in the scheduling environment of its parent entity), for several reasons. If the entity remains with no backlog while receiving this 'unofficial' service, then it is expired. Also on such an expiration, the finish time of the entity should be updated to account for only the service actually received by the entity. Unfortunately, such an update is not performed for an entity expiring without being the entity in service. In a similar vein, the service counter of the entity in service is reset when the entity is expired, to be ready to be used for next service cycle. This reset too should be performed also in case an entity is expired because it remains empty after receiving service while not being the entity in service. But in this case the reset is not performed. This commit performs the above update of the finish time and reset of the service received, also for an entity expiring while not being the entity in service. Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- block/bfq-wf2q.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index 414ba686a8475..c1727604ad14a 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -1172,10 +1172,17 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree) st = bfq_entity_service_tree(entity); is_in_service = entity == sd->in_service_entity; - if (is_in_service) { - bfq_calc_finish(entity, entity->service); + bfq_calc_finish(entity, entity->service); + + if (is_in_service) sd->in_service_entity = NULL; - } + else + /* + * Non in-service entity: nobody will take care of + * resetting its service counter on expiration. Do it + * now. + */ + entity->service = 0; if (entity->tree == &st->active) bfq_active_extract(st, entity); -- GitLab From 0e0b860fffa41ecca0004da3bed908b7c6fbfcfb Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 11 Sep 2018 19:20:40 +0900 Subject: [PATCH 1322/2269] kprobes: Return error if we fail to reuse kprobe instead of BUG_ON() [ Upstream commit 819319fc93461c07b9cdb3064f154bd8cfd48172 ] Make reuse_unused_kprobe() to return error code if it fails to reuse unused kprobe for optprobe instead of calling BUG_ON(). Signed-off-by: Masami Hiramatsu Cc: Anil S Keshavamurthy Cc: David S . Miller Cc: Linus Torvalds Cc: Naveen N . Rao Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/153666124040.21306.14150398706331307654.stgit@devbox Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/kprobes.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 5c90765d37e77..5cbad4fb91071 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -700,9 +700,10 @@ static void unoptimize_kprobe(struct kprobe *p, bool force) } /* Cancel unoptimizing for reusing */ -static void reuse_unused_kprobe(struct kprobe *ap) +static int reuse_unused_kprobe(struct kprobe *ap) { struct optimized_kprobe *op; + int ret; BUG_ON(!kprobe_unused(ap)); /* @@ -716,8 +717,12 @@ static void reuse_unused_kprobe(struct kprobe *ap) /* Enable the probe again */ ap->flags &= ~KPROBE_FLAG_DISABLED; /* Optimize it again (remove from op->list) */ - BUG_ON(!kprobe_optready(ap)); + ret = kprobe_optready(ap); + if (ret) + return ret; + optimize_kprobe(ap); + return 0; } /* Remove optimized instructions */ @@ -942,11 +947,16 @@ static void __disarm_kprobe(struct kprobe *p, bool reopt) #define kprobe_disarmed(p) kprobe_disabled(p) #define wait_for_kprobe_optimizer() do {} while (0) -/* There should be no unused kprobes can be reused without optimization */ -static void reuse_unused_kprobe(struct kprobe *ap) +static int reuse_unused_kprobe(struct kprobe *ap) { + /* + * If the optimized kprobe is NOT supported, the aggr kprobe is + * released at the same time that the last aggregated kprobe is + * unregistered. + * Thus there should be no chance to reuse unused kprobe. + */ printk(KERN_ERR "Error: There should be no unused kprobe here.\n"); - BUG_ON(kprobe_unused(ap)); + return -EINVAL; } static void free_aggr_kprobe(struct kprobe *p) @@ -1320,9 +1330,12 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p) goto out; } init_aggr_kprobe(ap, orig_p); - } else if (kprobe_unused(ap)) + } else if (kprobe_unused(ap)) { /* This probe is going to die. Rescue it */ - reuse_unused_kprobe(ap); + ret = reuse_unused_kprobe(ap); + if (ret) + goto out; + } if (kprobe_gone(ap)) { /* -- GitLab From 2a9dc99f9b03cb6d76b8f4d2f447a349371d10ae Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 27 Aug 2018 09:45:44 +0200 Subject: [PATCH 1323/2269] ACPI / LPSS: Add alternative ACPI HIDs for Cherry Trail DMA controllers [ Upstream commit 240714061c58e6b1abfb3322398a7634151c06cb ] Bay and Cherry Trail DSTDs represent a different set of devices depending on which OS the device think it is booting. One set of decices for Windows and another set of devices for Android which targets the Android-x86 Linux kernel fork (which e.g. used to have its own display driver instead of using the i915 driver). Which set of devices we are actually going to get is out of our control, this is controlled by the ACPI OSID variable, which gets either set through an EFI setup option, or sometimes is autodetected. So we need to support both. This commit adds support for the 80862286 and 808622C0 ACPI HIDs which we get for the first resp. second DMA controller on Cherry Trail devices when OSID is set to Android. Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_lpss.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 75c3cb377b98b..a56d3f352765b 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -326,9 +326,11 @@ static const struct acpi_device_id acpi_lpss_device_ids[] = { { "INT33FC", }, /* Braswell LPSS devices */ + { "80862286", LPSS_ADDR(lpss_dma_desc) }, { "80862288", LPSS_ADDR(bsw_pwm_dev_desc) }, { "8086228A", LPSS_ADDR(bsw_uart_dev_desc) }, { "8086228E", LPSS_ADDR(bsw_spi_dev_desc) }, + { "808622C0", LPSS_ADDR(lpss_dma_desc) }, { "808622C1", LPSS_ADDR(bsw_i2c_dev_desc) }, /* Broadwell LPSS devices */ -- GitLab From 6b196dd5bcd92ea5217278c2d1a4bc64ddbb3036 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 30 Aug 2018 17:58:52 -0700 Subject: [PATCH 1324/2269] pinctrl: qcom: spmi-mpp: Fix drive strength setting [ Upstream commit 89c68b102f13f123aaef22b292526d6b92501334 ] It looks like we parse the drive strength setting here, but never actually write it into the hardware to update it. Parse the setting and then write it at the end of the pinconf setting function so that it actually sticks in the hardware. Fixes: 0e948042c420 ("pinctrl: qcom: spmi-mpp: Implement support for sink mode") Cc: Doug Anderson Signed-off-by: Stephen Boyd Reviewed-by: Bjorn Andersson Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/qcom/pinctrl-spmi-mpp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c index 418bc40df7ca0..7577f133d326d 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c @@ -457,7 +457,7 @@ static int pmic_mpp_config_set(struct pinctrl_dev *pctldev, unsigned int pin, pad->dtest = arg; break; case PIN_CONFIG_DRIVE_STRENGTH: - arg = pad->drive_strength; + pad->drive_strength = arg; break; case PMIC_MPP_CONF_AMUX_ROUTE: if (arg >= PMIC_MPP_AMUX_ROUTE_ABUS4) @@ -504,6 +504,10 @@ static int pmic_mpp_config_set(struct pinctrl_dev *pctldev, unsigned int pin, if (ret < 0) return ret; + ret = pmic_mpp_write(state, pad, PMIC_MPP_REG_SINK_CTL, pad->drive_strength); + if (ret < 0) + return ret; + val = pad->is_enabled << PMIC_MPP_REG_MASTER_EN_SHIFT; return pmic_mpp_write(state, pad, PMIC_MPP_REG_EN_CTL, val); -- GitLab From 49d44c0a0b2d03fdb60fd217d6234e1987cb3040 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 30 Aug 2018 08:23:39 -0700 Subject: [PATCH 1325/2269] pinctrl: spmi-mpp: Fix pmic_mpp_config_get() to be compliant [ Upstream commit 0d5b476f8f57fcb06c45fe27681ac47254f63fd2 ] If you look at "pinconf-groups" in debugfs for ssbi-mpp you'll notice it looks like nonsense. The problem is fairly well described in commit 1cf86bc21257 ("pinctrl: qcom: spmi-gpio: Fix pmic_gpio_config_get() to be compliant") and commit 05e0c828955c ("pinctrl: msm: Fix msm_config_group_get() to be compliant"), but it was pointed out that ssbi-mpp has the same problem. Let's fix it there too. NOTE: in case it's helpful to someone reading this, the way to tell whether to do the -EINVAL or not is to look at the PCONFDUMP for a given attribute. If the last element (has_arg) is false then you need to do the -EINVAL trick. ALSO NOTE: it seems unlikely that the values returned when we try to get PIN_CONFIG_BIAS_PULL_UP will actually be printed since "has_arg" is false for that one, but I guess it's still fine to return different values so I kept doing that. It seems like another driver (ssbi-gpio) uses a custom attribute (PM8XXX_QCOM_PULL_UP_STRENGTH) for something similar so maybe a future change should do that here too. Fixes: cfb24f6ebd38 ("pinctrl: Qualcomm SPMI PMIC MPP pin controller driver") Signed-off-by: Douglas Anderson Reviewed-by: Stephen Boyd Reviewed-by: Bjorn Andersson Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/qcom/pinctrl-spmi-mpp.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c index 7577f133d326d..ac251c62bc666 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c @@ -345,13 +345,12 @@ static int pmic_mpp_config_get(struct pinctrl_dev *pctldev, switch (param) { case PIN_CONFIG_BIAS_DISABLE: - arg = pad->pullup == PMIC_MPP_PULL_UP_OPEN; + if (pad->pullup != PMIC_MPP_PULL_UP_OPEN) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_BIAS_PULL_UP: switch (pad->pullup) { - case PMIC_MPP_PULL_UP_OPEN: - arg = 0; - break; case PMIC_MPP_PULL_UP_0P6KOHM: arg = 600; break; @@ -366,13 +365,17 @@ static int pmic_mpp_config_get(struct pinctrl_dev *pctldev, } break; case PIN_CONFIG_BIAS_HIGH_IMPEDANCE: - arg = !pad->is_enabled; + if (pad->is_enabled) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_POWER_SOURCE: arg = pad->power_source; break; case PIN_CONFIG_INPUT_ENABLE: - arg = pad->input_enabled; + if (!pad->input_enabled) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_OUTPUT: arg = pad->out_value; @@ -384,7 +387,9 @@ static int pmic_mpp_config_get(struct pinctrl_dev *pctldev, arg = pad->amux_input; break; case PMIC_MPP_CONF_PAIRED: - arg = pad->paired; + if (!pad->paired) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_DRIVE_STRENGTH: arg = pad->drive_strength; -- GitLab From b7111ec0bdf1e3a833965ab6df7258066f95c7ec Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 30 Aug 2018 08:23:38 -0700 Subject: [PATCH 1326/2269] pinctrl: ssbi-gpio: Fix pm8xxx_pin_config_get() to be compliant [ Upstream commit b432414b996d32a1bd9afe2bd595bd5729c1477f ] If you look at "pinconf-groups" in debugfs for ssbi-gpio you'll notice it looks like nonsense. The problem is fairly well described in commit 1cf86bc21257 ("pinctrl: qcom: spmi-gpio: Fix pmic_gpio_config_get() to be compliant") and commit 05e0c828955c ("pinctrl: msm: Fix msm_config_group_get() to be compliant"), but it was pointed out that ssbi-gpio has the same problem. Let's fix it there too. Fixes: b4c45fe974bc ("pinctrl: qcom: ssbi: Family A gpio & mpp drivers") Signed-off-by: Douglas Anderson Reviewed-by: Stephen Boyd Reviewed-by: Bjorn Andersson Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c | 28 ++++++++++++++++++------ 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c index f53e32a9d8fce..0e153bae322ee 100644 --- a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c @@ -260,22 +260,32 @@ static int pm8xxx_pin_config_get(struct pinctrl_dev *pctldev, switch (param) { case PIN_CONFIG_BIAS_DISABLE: - arg = pin->bias == PM8XXX_GPIO_BIAS_NP; + if (pin->bias != PM8XXX_GPIO_BIAS_NP) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_BIAS_PULL_DOWN: - arg = pin->bias == PM8XXX_GPIO_BIAS_PD; + if (pin->bias != PM8XXX_GPIO_BIAS_PD) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_BIAS_PULL_UP: - arg = pin->bias <= PM8XXX_GPIO_BIAS_PU_1P5_30; + if (pin->bias > PM8XXX_GPIO_BIAS_PU_1P5_30) + return -EINVAL; + arg = 1; break; case PM8XXX_QCOM_PULL_UP_STRENGTH: arg = pin->pull_up_strength; break; case PIN_CONFIG_BIAS_HIGH_IMPEDANCE: - arg = pin->disable; + if (!pin->disable) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_INPUT_ENABLE: - arg = pin->mode == PM8XXX_GPIO_MODE_INPUT; + if (pin->mode != PM8XXX_GPIO_MODE_INPUT) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_OUTPUT: if (pin->mode & PM8XXX_GPIO_MODE_OUTPUT) @@ -290,10 +300,14 @@ static int pm8xxx_pin_config_get(struct pinctrl_dev *pctldev, arg = pin->output_strength; break; case PIN_CONFIG_DRIVE_PUSH_PULL: - arg = !pin->open_drain; + if (pin->open_drain) + return -EINVAL; + arg = 1; break; case PIN_CONFIG_DRIVE_OPEN_DRAIN: - arg = pin->open_drain; + if (!pin->open_drain) + return -EINVAL; + arg = 1; break; default: return -EINVAL; -- GitLab From 189f98295600fc5bfc33cf0aa05392cf7f074650 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 2 Sep 2018 18:13:14 +0200 Subject: [PATCH 1327/2269] net: dsa: mv88e6xxx: Fix writing to a PHY page. [ Upstream commit c309b158090d788e96ee597444965cb79b040484 ] After changing to the needed page, actually write the value to the register! Fixes: 09cb7dfd3f14 ("net: dsa: mv88e6xxx: describe PHY page and SerDes") Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mv88e6xxx/phy.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/phy.c b/drivers/net/dsa/mv88e6xxx/phy.c index 436668bd50dc8..e53ce9610feee 100644 --- a/drivers/net/dsa/mv88e6xxx/phy.c +++ b/drivers/net/dsa/mv88e6xxx/phy.c @@ -110,6 +110,9 @@ int mv88e6xxx_phy_page_write(struct mv88e6xxx_chip *chip, int phy, err = mv88e6xxx_phy_page_get(chip, phy, page); if (!err) { err = mv88e6xxx_phy_write(chip, phy, MV88E6XXX_PHY_PAGE, page); + if (!err) + err = mv88e6xxx_phy_write(chip, phy, reg, val); + mv88e6xxx_phy_page_put(chip, phy); } -- GitLab From 8692ff190ca02538538a2d020294fb2feccfc507 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Tue, 1 May 2018 14:54:22 +0300 Subject: [PATCH 1328/2269] iwlwifi: mvm: fix BAR seq ctrl reporting [ Upstream commit 941ab4eb66c10bc5c7234e83a7a858b2806ed151 ] There is a bug in FW where the sequence control may be incorrect, and the driver overrides it with the value of the ieee80211 header. However, in BAR there is no sequence control in the header, which result with arbitrary sequence. This access to an unknown location is bad and it makes the logs very confusing - so fix it. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 6c014c2739227..62a6e293cf121 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -1345,6 +1345,7 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, while (!skb_queue_empty(&skbs)) { struct sk_buff *skb = __skb_dequeue(&skbs); struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = (void *)skb->data; bool flushed = false; skb_freed++; @@ -1389,11 +1390,11 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, info->flags |= IEEE80211_TX_STAT_AMPDU_NO_BACK; info->flags &= ~IEEE80211_TX_CTL_AMPDU; - /* W/A FW bug: seq_ctl is wrong when the status isn't success */ - if (status != TX_STATUS_SUCCESS) { - struct ieee80211_hdr *hdr = (void *)skb->data; + /* W/A FW bug: seq_ctl is wrong upon failure / BAR frame */ + if (ieee80211_is_back_req(hdr->frame_control)) + seq_ctl = 0; + else if (status != TX_STATUS_SUCCESS) seq_ctl = le16_to_cpu(hdr->seq_ctrl); - } if (unlikely(!seq_ctl)) { struct ieee80211_hdr *hdr = (void *)skb->data; -- GitLab From ca18ed4416bb83d91e6759ae6d9cfd84e2b11442 Mon Sep 17 00:00:00 2001 From: Sebastian Basierski Date: Thu, 9 Aug 2018 11:45:40 +0200 Subject: [PATCH 1329/2269] ixgbevf: VF2VF TCP RSS [ Upstream commit 7fb94bd58dd6650a0158e68d414e185077d8b57a ] While VF2VF with RSS communication, RSS Type were wrongly recognized and RSS hash was not calculated as it should be. Packets was distributed on various queues by accident. This commit fixes that behaviour and causes proper RSS Type recognition. Signed-off-by: Sebastian Basierski Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 90be4385bf368..e238f6e85ab66 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -3427,6 +3427,10 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring, skb_checksum_help(skb); goto no_csum; } + + if (first->protocol == htons(ETH_P_IP)) + type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4; + /* update TX checksum flag */ first->tx_flags |= IXGBE_TX_FLAGS_CSUM; vlan_macip_lens = skb_checksum_start_offset(skb) - -- GitLab From ef5d27e1ed9c97c0670d0aa1f433f68e29de5bfb Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Wed, 22 Aug 2018 09:39:52 +0200 Subject: [PATCH 1330/2269] ath10k: schedule hardware restart if WMI command times out [ Upstream commit a9911937e7d332761e8c4fcbc7ba0426bdc3956f ] When running in AP mode, ath10k sometimes suffers from TX credit starvation. The issue is hard to reproduce and shows up once in a few days, but has been repeatedly seen with QCA9882 and a large range of firmwares, including 10.2.4.70.67. Once the module is in this state, TX credits are never replenished, which results in "SWBA overrun" errors, as no beacons can be sent. Even worse, WMI commands run in a timeout while holding the conf mutex for three seconds each, making any further operations slow and the whole system unresponsive. The firmware/driver never recovers from that state automatically, and triggering TX flush or warm restarts won't work over WMI. So issue a hardware restart if a WMI command times out due to missing TX credits. This implies a connectivity outage of about 1.4s in AP mode, but brings back the interface and the whole system to a usable state. WMI command timeouts have not been seen in absent of this specific issue, so taking such drastic actions seems legitimate. Signed-off-by: Martin Willi Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/wmi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 2ab5311659ea9..8cb47858eb004 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -1852,6 +1852,12 @@ int ath10k_wmi_cmd_send(struct ath10k *ar, struct sk_buff *skb, u32 cmd_id) if (ret) dev_kfree_skb_any(skb); + if (ret == -EAGAIN) { + ath10k_warn(ar, "wmi command %d timeout, restarting hardware\n", + cmd_id); + queue_work(ar->workqueue, &ar->restart_work); + } + return ret; } -- GitLab From 0a76f5c5b1014e7d59f87cb054f332ff8b1deea9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 12 Oct 2018 09:20:17 +0200 Subject: [PATCH 1331/2269] thermal: da9062/61: Prevent hardware access during system suspend [ Upstream commit 760eea43f8c6d48684f1f34b8a02fddc1456e849 ] The workqueue used for monitoring the hardware may run while the device is already suspended. Fix this by using the freezable system workqueue instead, cfr. commit 51e20d0e3a60cf46 ("thermal: Prevent polling from happening during system suspend"). Fixes: 608567aac3206ae8 ("thermal: da9062/61: Thermal junction temperature monitoring driver") Signed-off-by: Geert Uytterhoeven Acked-by: Steve Twiss Signed-off-by: Eduardo Valentin Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/da9062-thermal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/da9062-thermal.c b/drivers/thermal/da9062-thermal.c index dd8dd947b7f07..01b0cb9944577 100644 --- a/drivers/thermal/da9062-thermal.c +++ b/drivers/thermal/da9062-thermal.c @@ -106,7 +106,7 @@ static void da9062_thermal_poll_on(struct work_struct *work) THERMAL_EVENT_UNSPECIFIED); delay = msecs_to_jiffies(thermal->zone->passive_delay); - schedule_delayed_work(&thermal->work, delay); + queue_delayed_work(system_freezable_wq, &thermal->work, delay); return; } @@ -125,7 +125,7 @@ static irqreturn_t da9062_thermal_irq_handler(int irq, void *data) struct da9062_thermal *thermal = data; disable_irq_nosync(thermal->irq); - schedule_delayed_work(&thermal->work, 0); + queue_delayed_work(system_freezable_wq, &thermal->work, 0); return IRQ_HANDLED; } -- GitLab From 09d43edcdaef8961f01ae499b836e3cb39e47f83 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 18 Oct 2018 10:56:17 +0200 Subject: [PATCH 1332/2269] cgroup, netclassid: add a preemption point to write_classid [ Upstream commit a90e90b7d55e789c71d85b946ffb5c1ab2f137ca ] We have seen a customer complaining about soft lockups on !PREEMPT kernel config with 4.4 based kernel [1072141.435366] NMI watchdog: BUG: soft lockup - CPU#21 stuck for 22s! [systemd:1] [1072141.444090] Modules linked in: mpt3sas raid_class binfmt_misc af_packet 8021q garp mrp stp llc xfs libcrc32c bonding iscsi_ibft iscsi_boot_sysfs msr ext4 crc16 jbd2 mbcache cdc_ether usbnet mii joydev hid_generic usbhid intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel ipmi_ssif mgag200 i2c_algo_bit ttm ipmi_devintf drbg ixgbe drm_kms_helper vxlan ansi_cprng ip6_udp_tunnel drm aesni_intel udp_tunnel aes_x86_64 iTCO_wdt syscopyarea ptp xhci_pci lrw iTCO_vendor_support pps_core gf128mul ehci_pci glue_helper sysfillrect mdio pcspkr sb_edac ablk_helper cryptd ehci_hcd sysimgblt xhci_hcd fb_sys_fops edac_core mei_me lpc_ich ses usbcore enclosure dca mfd_core ipmi_si mei i2c_i801 scsi_transport_sas usb_common ipmi_msghandler shpchp fjes wmi processor button acpi_pad btrfs xor raid6_pq sd_mod crc32c_intel megaraid_sas sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua scsi_mod md_mod autofs4 [1072141.444146] Supported: Yes [1072141.444149] CPU: 21 PID: 1 Comm: systemd Not tainted 4.4.121-92.80-default #1 [1072141.444150] Hardware name: LENOVO Lenovo System x3650 M5 -[5462P4U]- -[5462P4U]-/01GR451, BIOS -[TCE136H-2.70]- 06/13/2018 [1072141.444151] task: ffff880191bd0040 ti: ffff880191bd4000 task.ti: ffff880191bd4000 [1072141.444153] RIP: 0010:[] [] update_classid_sock+0x29/0x40 [1072141.444157] RSP: 0018:ffff880191bd7d58 EFLAGS: 00000286 [1072141.444158] RAX: ffff883b177cb7c0 RBX: 0000000000000000 RCX: 0000000000000000 [1072141.444159] RDX: 00000000000009c7 RSI: ffff880191bd7d5c RDI: ffff8822e29bb200 [1072141.444160] RBP: ffff883a72230980 R08: 0000000000000101 R09: 0000000000000000 [1072141.444161] R10: 0000000000000008 R11: f000000000000000 R12: ffffffff815229d0 [1072141.444162] R13: 0000000000000000 R14: ffff881fd0a47ac0 R15: ffff880191bd7f28 [1072141.444163] FS: 00007f3e2f1eb8c0(0000) GS:ffff882000340000(0000) knlGS:0000000000000000 [1072141.444164] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1072141.444165] CR2: 00007f3e2f200000 CR3: 0000001ffea4e000 CR4: 00000000001606f0 [1072141.444166] Stack: [1072141.444166] ffffffa800000246 00000000000009c7 ffffffff8121d583 ffff8818312a05c0 [1072141.444168] ffff8818312a1100 ffff880197c3b280 ffff881861422858 ffffffffffffffea [1072141.444170] ffffffff81522b1c ffffffff81d0ca20 ffff8817fa17b950 ffff883fdd8121e0 [1072141.444171] Call Trace: [1072141.444179] [] iterate_fd+0x53/0x80 [1072141.444182] [] write_classid+0x4c/0x80 [1072141.444187] [] cgroup_file_write+0x9b/0x100 [1072141.444193] [] kernfs_fop_write+0x11b/0x150 [1072141.444198] [] __vfs_write+0x26/0x100 [1072141.444201] [] vfs_write+0x9d/0x190 [1072141.444203] [] SyS_write+0x42/0xa0 [1072141.444207] [] entry_SYSCALL_64_fastpath+0x1e/0xca [1072141.445490] DWARF2 unwinder stuck at entry_SYSCALL_64_fastpath+0x1e/0xca If a cgroup has many tasks with many open file descriptors then we would end up in a large loop without any rescheduling point throught the operation. Add cond_resched once per task. Signed-off-by: Michal Hocko Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/core/netclassid_cgroup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 5e4f04004a49a..7bf8335986152 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -106,6 +106,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, iterate_fd(p->files, 0, update_classid_sock, (void *)(unsigned long)cs->classid); task_unlock(p); + cond_resched(); } css_task_iter_end(&it); -- GitLab From 94dc21c6a549aa67c8403142496c1531b91ded33 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 16 Oct 2018 16:31:25 +1100 Subject: [PATCH 1333/2269] scsi: esp_scsi: Track residual for PIO transfers [ Upstream commit fd47d919d0c336e7c22862b51ee94927ffea227a ] If a target disconnects during a PIO data transfer the command may fail when the target reconnects: scsi host1: DMA length is zero! scsi host1: cur adr[04380000] len[00000000] The scsi bus is then reset. This happens because the residual reached zero before the transfer was completed. The usual residual calculation relies on the Transfer Count registers. That works for DMA transfers but not for PIO transfers. Fix the problem by storing the PIO transfer residual and using that to correctly calculate bytes_sent. Fixes: 6fe07aaffbf0 ("[SCSI] m68k: new mac_esp scsi driver") Tested-by: Stan Johnson Signed-off-by: Finn Thain Tested-by: Michael Schmitz Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/esp_scsi.c | 1 + drivers/scsi/esp_scsi.h | 2 ++ drivers/scsi/mac_esp.c | 2 ++ 3 files changed, 5 insertions(+) diff --git a/drivers/scsi/esp_scsi.c b/drivers/scsi/esp_scsi.c index c3fc34b9964df..9e5d3f7d29ae7 100644 --- a/drivers/scsi/esp_scsi.c +++ b/drivers/scsi/esp_scsi.c @@ -1338,6 +1338,7 @@ static int esp_data_bytes_sent(struct esp *esp, struct esp_cmd_entry *ent, bytes_sent = esp->data_dma_len; bytes_sent -= ecount; + bytes_sent -= esp->send_cmd_residual; /* * The am53c974 has a DMA 'pecularity'. The doc states: diff --git a/drivers/scsi/esp_scsi.h b/drivers/scsi/esp_scsi.h index 8163dca2071bf..a77772777a309 100644 --- a/drivers/scsi/esp_scsi.h +++ b/drivers/scsi/esp_scsi.h @@ -540,6 +540,8 @@ struct esp { void *dma; int dmarev; + + u32 send_cmd_residual; }; /* A front-end driver for the ESP chip should do the following in diff --git a/drivers/scsi/mac_esp.c b/drivers/scsi/mac_esp.c index eb551f3cc471d..71879f2207e0e 100644 --- a/drivers/scsi/mac_esp.c +++ b/drivers/scsi/mac_esp.c @@ -427,6 +427,8 @@ static void mac_esp_send_pio_cmd(struct esp *esp, u32 addr, u32 esp_count, scsi_esp_cmd(esp, ESP_CMD_TI); } } + + esp->send_cmd_residual = esp_count; } static int mac_esp_irq_pending(struct esp *esp) -- GitLab From be5658a1dce1664c9b1b414a2ce1a14eceea590a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Sep 2018 10:19:24 +0100 Subject: [PATCH 1334/2269] UAPI: ndctl: Fix g++-unsupported initialisation in headers [ Upstream commit 9607871f37dc3e717639694b8d0dc738f2a68efc ] The following code in the linux/ndctl header file: static inline const char *nvdimm_bus_cmd_name(unsigned cmd) { static const char * const names[] = { [ND_CMD_ARS_CAP] = "ars_cap", [ND_CMD_ARS_START] = "ars_start", [ND_CMD_ARS_STATUS] = "ars_status", [ND_CMD_CLEAR_ERROR] = "clear_error", [ND_CMD_CALL] = "cmd_call", }; if (cmd < ARRAY_SIZE(names) && names[cmd]) return names[cmd]; return "unknown"; } is broken in a number of ways: (1) ARRAY_SIZE() is not generally defined. (2) g++ does not support "non-trivial" array initialisers fully yet. (3) Every file that calls this function will acquire a copy of names[]. The same goes for nvdimm_cmd_name(). Fix all three by converting to a switch statement where each case returns a string. That way if cmd is a constant, the compiler can trivially reduce it and, if not, the compiler can use a shared lookup table if it thinks that is more efficient. A better way would be to remove these functions and their arrays from the header entirely. Signed-off-by: David Howells Signed-off-by: Dan Williams Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/ndctl.h | 48 +++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 3f03567631cb0..145f242c7c909 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -176,37 +176,31 @@ enum { static inline const char *nvdimm_bus_cmd_name(unsigned cmd) { - static const char * const names[] = { - [ND_CMD_ARS_CAP] = "ars_cap", - [ND_CMD_ARS_START] = "ars_start", - [ND_CMD_ARS_STATUS] = "ars_status", - [ND_CMD_CLEAR_ERROR] = "clear_error", - [ND_CMD_CALL] = "cmd_call", - }; - - if (cmd < ARRAY_SIZE(names) && names[cmd]) - return names[cmd]; - return "unknown"; + switch (cmd) { + case ND_CMD_ARS_CAP: return "ars_cap"; + case ND_CMD_ARS_START: return "ars_start"; + case ND_CMD_ARS_STATUS: return "ars_status"; + case ND_CMD_CLEAR_ERROR: return "clear_error"; + case ND_CMD_CALL: return "cmd_call"; + default: return "unknown"; + } } static inline const char *nvdimm_cmd_name(unsigned cmd) { - static const char * const names[] = { - [ND_CMD_SMART] = "smart", - [ND_CMD_SMART_THRESHOLD] = "smart_thresh", - [ND_CMD_DIMM_FLAGS] = "flags", - [ND_CMD_GET_CONFIG_SIZE] = "get_size", - [ND_CMD_GET_CONFIG_DATA] = "get_data", - [ND_CMD_SET_CONFIG_DATA] = "set_data", - [ND_CMD_VENDOR_EFFECT_LOG_SIZE] = "effect_size", - [ND_CMD_VENDOR_EFFECT_LOG] = "effect_log", - [ND_CMD_VENDOR] = "vendor", - [ND_CMD_CALL] = "cmd_call", - }; - - if (cmd < ARRAY_SIZE(names) && names[cmd]) - return names[cmd]; - return "unknown"; + switch (cmd) { + case ND_CMD_SMART: return "smart"; + case ND_CMD_SMART_THRESHOLD: return "smart_thresh"; + case ND_CMD_DIMM_FLAGS: return "flags"; + case ND_CMD_GET_CONFIG_SIZE: return "get_size"; + case ND_CMD_GET_CONFIG_DATA: return "get_data"; + case ND_CMD_SET_CONFIG_DATA: return "set_data"; + case ND_CMD_VENDOR_EFFECT_LOG_SIZE: return "effect_size"; + case ND_CMD_VENDOR_EFFECT_LOG: return "effect_log"; + case ND_CMD_VENDOR: return "vendor"; + case ND_CMD_CALL: return "cmd_call"; + default: return "unknown"; + } } #define ND_IOCTL 'N' -- GitLab From 7d6c6f839e989f0488e0c4a5e2b789bc3cbd0078 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Fri, 21 Sep 2018 10:36:17 -0700 Subject: [PATCH 1335/2269] KVM: nVMX: Clear reserved bits of #DB exit qualification [ Upstream commit cfb634fe3052aefc4e1360fa322018c9a0b49755 ] According to volume 3 of the SDM, bits 63:15 and 12:4 of the exit qualification field for debug exceptions are reserved (cleared to 0). However, the SDM is incorrect about bit 16 (corresponding to DR6.RTM). This bit should be set if a debug exception (#DB) or a breakpoint exception (#BP) occurred inside an RTM region while advanced debugging of RTM transactional regions was enabled. Note that this is the opposite of DR6.RTM, which "indicates (when clear) that a debug exception (#DB) or breakpoint exception (#BP) occurred inside an RTM region while advanced debugging of RTM transactional regions was enabled." There is still an issue with stale DR6 bits potentially being misreported for the current debug exception. DR6 should not have been modified before vectoring the #DB exception, and the "new DR6 bits" should be available somewhere, but it was and they aren't. Fixes: b96fb439774e1 ("KVM: nVMX: fixes to nested virt interrupt injection") Signed-off-by: Jim Mattson Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx.c | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4015b88383ce9..367cdd263a5ce 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -174,6 +174,7 @@ enum { #define DR6_BD (1 << 13) #define DR6_BS (1 << 14) +#define DR6_BT (1 << 15) #define DR6_RTM (1 << 16) #define DR6_FIXED_1 0xfffe0ff0 #define DR6_INIT 0xffff0ff0 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fd46d890296c9..ec588cf4fe953 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2733,10 +2733,13 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit } } else { if (vmcs12->exception_bitmap & (1u << nr)) { - if (nr == DB_VECTOR) + if (nr == DB_VECTOR) { *exit_qual = vcpu->arch.dr6; - else + *exit_qual &= ~(DR6_FIXED_1 | DR6_BT); + *exit_qual ^= DR6_RTM; + } else { *exit_qual = 0; + } return 1; } } -- GitLab From 4877db2eab42c7f451f15a1d47a7832b76cd6b98 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Sat, 6 Oct 2018 13:34:21 -0500 Subject: [PATCH 1336/2269] scsi: megaraid_sas: fix a missing-check bug [ Upstream commit 47db7873136a9c57c45390a53b57019cf73c8259 ] In megasas_mgmt_compat_ioctl_fw(), to handle the structure compat_megasas_iocpacket 'cioc', a user-space structure megasas_iocpacket 'ioc' is allocated before megasas_mgmt_ioctl_fw() is invoked to handle the packet. Since the two data structures have different fields, the data is copied from 'cioc' to 'ioc' field by field. In the copy process, 'sense_ptr' is prepared if the field 'sense_len' is not null, because it will be used in megasas_mgmt_ioctl_fw(). To prepare 'sense_ptr', the user-space data 'ioc->sense_off' and 'cioc->sense_off' are copied and saved to kernel-space variables 'local_sense_off' and 'user_sense_off' respectively. Given that 'ioc->sense_off' is also copied from 'cioc->sense_off', 'local_sense_off' and 'user_sense_off' should have the same value. However, 'cioc' is in the user space and a malicious user can race to change the value of 'cioc->sense_off' after it is copied to 'ioc->sense_off' but before it is copied to 'user_sense_off'. By doing so, the attacker can inject different values into 'local_sense_off' and 'user_sense_off'. This can cause undefined behavior in the following execution, because the two variables are supposed to be same. This patch enforces a check on the two kernel variables 'local_sense_off' and 'user_sense_off' to make sure they are the same after the copy. In case they are not, an error code EINVAL will be returned. Signed-off-by: Wenwen Wang Acked-by: Sumit Saxena Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid/megaraid_sas_base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index d55c365be2388..d0abee3e6ed9e 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -7361,6 +7361,9 @@ static int megasas_mgmt_compat_ioctl_fw(struct file *file, unsigned long arg) get_user(user_sense_off, &cioc->sense_off)) return -EFAULT; + if (local_sense_off != user_sense_off) + return -EINVAL; + if (local_sense_len) { void __user **sense_ioc_ptr = (void __user **)((u8 *)((unsigned long)&ioc->frame.raw) + local_sense_off); -- GitLab From 670ebdf0c41d366ca024f4d1016248e166f8b046 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 7 Oct 2018 12:12:40 +0300 Subject: [PATCH 1337/2269] RDMA/core: Do not expose unsupported counters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0f6ef65d1c6ec8deb5d0f11f86631ec4cfe8f22e ] If the provider driver (such as rdma_rxe) doesn't support pma counters, avoid exposing its directory similar to optional hw_counters directory. If core fails to read the PMA counter, return an error so that user can retry later if needed. Fixes: 35c4cbb17811 ("IB/core: Create get_perf_mad function in sysfs.c") Reported-by: Holger Hoffstätte Tested-by: Holger Hoffstätte Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/sysfs.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 0a1e96c25ca36..f75f99476ad07 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -489,7 +489,7 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data, 40 + offset / 8, sizeof(data)); if (ret < 0) - return sprintf(buf, "N/A (no PMA)\n"); + return ret; switch (width) { case 4: @@ -1012,10 +1012,12 @@ static int add_port(struct ib_device *device, int port_num, goto err_put; } - p->pma_table = get_counter_table(device, port_num); - ret = sysfs_create_group(&p->kobj, p->pma_table); - if (ret) - goto err_put_gid_attrs; + if (device->process_mad) { + p->pma_table = get_counter_table(device, port_num); + ret = sysfs_create_group(&p->kobj, p->pma_table); + if (ret) + goto err_put_gid_attrs; + } p->gid_group.name = "gids"; p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len); @@ -1128,7 +1130,8 @@ err_free_gid: p->gid_group.attrs = NULL; err_remove_pma: - sysfs_remove_group(&p->kobj, p->pma_table); + if (p->pma_table) + sysfs_remove_group(&p->kobj, p->pma_table); err_put_gid_attrs: kobject_put(&p->gid_attr_group->kobj); @@ -1240,7 +1243,9 @@ static void free_port_list_attributes(struct ib_device *device) kfree(port->hw_stats); free_hsag(&port->kobj, port->hw_stats_ag); } - sysfs_remove_group(p, port->pma_table); + + if (port->pma_table) + sysfs_remove_group(p, port->pma_table); sysfs_remove_group(p, &port->pkey_group); sysfs_remove_group(p, &port->gid_group); sysfs_remove_group(&port->gid_attr_group->kobj, -- GitLab From b020347269610f3ac05d27c3851e9aa8d1257141 Mon Sep 17 00:00:00 2001 From: Denis Drozdov Date: Thu, 11 Oct 2018 22:33:57 +0300 Subject: [PATCH 1338/2269] IB/ipoib: Clear IPCB before icmp_send [ Upstream commit 4d6e4d12da2c308f8f976d3955c45ee62539ac98 ] IPCB should be cleared before icmp_send, since it may contain data from previous layers and the data could be misinterpreted as ip header options, which later caused the ihl to be set to an invalid value and resulted in the following stack corruption: [ 1083.031512] ib0: packet len 57824 (> 2048) too long to send, dropping [ 1083.031843] ib0: packet len 37904 (> 2048) too long to send, dropping [ 1083.032004] ib0: packet len 4040 (> 2048) too long to send, dropping [ 1083.032253] ib0: packet len 63800 (> 2048) too long to send, dropping [ 1083.032481] ib0: packet len 23960 (> 2048) too long to send, dropping [ 1083.033149] ib0: packet len 63800 (> 2048) too long to send, dropping [ 1083.033439] ib0: packet len 63800 (> 2048) too long to send, dropping [ 1083.033700] ib0: packet len 63800 (> 2048) too long to send, dropping [ 1083.034124] ib0: packet len 63800 (> 2048) too long to send, dropping [ 1083.034387] ================================================================== [ 1083.034602] BUG: KASAN: stack-out-of-bounds in __ip_options_echo+0xf08/0x1310 [ 1083.034798] Write of size 4 at addr ffff880353457c5f by task kworker/u16:0/7 [ 1083.034990] [ 1083.035104] CPU: 7 PID: 7 Comm: kworker/u16:0 Tainted: G O 4.19.0-rc5+ #1 [ 1083.035316] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu2 04/01/2014 [ 1083.035573] Workqueue: ipoib_wq ipoib_cm_skb_reap [ib_ipoib] [ 1083.035750] Call Trace: [ 1083.035888] dump_stack+0x9a/0xeb [ 1083.036031] print_address_description+0xe3/0x2e0 [ 1083.036213] kasan_report+0x18a/0x2e0 [ 1083.036356] ? __ip_options_echo+0xf08/0x1310 [ 1083.036522] __ip_options_echo+0xf08/0x1310 [ 1083.036688] icmp_send+0x7b9/0x1cd0 [ 1083.036843] ? icmp_route_lookup.constprop.9+0x1070/0x1070 [ 1083.037018] ? netif_schedule_queue+0x5/0x200 [ 1083.037180] ? debug_show_all_locks+0x310/0x310 [ 1083.037341] ? rcu_dynticks_curr_cpu_in_eqs+0x85/0x120 [ 1083.037519] ? debug_locks_off+0x11/0x80 [ 1083.037673] ? debug_check_no_obj_freed+0x207/0x4c6 [ 1083.037841] ? check_flags.part.27+0x450/0x450 [ 1083.037995] ? debug_check_no_obj_freed+0xc3/0x4c6 [ 1083.038169] ? debug_locks_off+0x11/0x80 [ 1083.038318] ? skb_dequeue+0x10e/0x1a0 [ 1083.038476] ? ipoib_cm_skb_reap+0x2b5/0x650 [ib_ipoib] [ 1083.038642] ? netif_schedule_queue+0xa8/0x200 [ 1083.038820] ? ipoib_cm_skb_reap+0x544/0x650 [ib_ipoib] [ 1083.038996] ipoib_cm_skb_reap+0x544/0x650 [ib_ipoib] [ 1083.039174] process_one_work+0x912/0x1830 [ 1083.039336] ? wq_pool_ids_show+0x310/0x310 [ 1083.039491] ? lock_acquire+0x145/0x3a0 [ 1083.042312] worker_thread+0x87/0xbb0 [ 1083.045099] ? process_one_work+0x1830/0x1830 [ 1083.047865] kthread+0x322/0x3e0 [ 1083.050624] ? kthread_create_worker_on_cpu+0xc0/0xc0 [ 1083.053354] ret_from_fork+0x3a/0x50 For instance __ip_options_echo is failing to proceed with invalid srr and optlen passed from another layer via IPCB [ 762.139568] IPv4: __ip_options_echo rr=0 ts=0 srr=43 cipso=0 [ 762.139720] IPv4: ip_options_build: IPCB 00000000f3cd969e opt 000000002ccb3533 [ 762.139838] IPv4: __ip_options_echo in srr: optlen 197 soffset 84 [ 762.139852] IPv4: ip_options_build srr=0 is_frag=0 rr_needaddr=0 ts_needaddr=0 ts_needtime=0 rr=0 ts=0 [ 762.140269] ================================================================== [ 762.140713] IPv4: __ip_options_echo rr=0 ts=0 srr=0 cipso=0 [ 762.141078] BUG: KASAN: stack-out-of-bounds in __ip_options_echo+0x12ec/0x1680 [ 762.141087] Write of size 4 at addr ffff880353457c7f by task kworker/u16:0/7 Signed-off-by: Denis Drozdov Reviewed-by: Erez Shitrit Reviewed-by: Feras Daoud Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 9939f32d01548..0e85b3445c078 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1427,11 +1427,15 @@ static void ipoib_cm_skb_reap(struct work_struct *work) spin_unlock_irqrestore(&priv->lock, flags); netif_tx_unlock_bh(dev); - if (skb->protocol == htons(ETH_P_IP)) + if (skb->protocol == htons(ETH_P_IP)) { + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + } #if IS_ENABLED(CONFIG_IPV6) - else if (skb->protocol == htons(ETH_P_IPV6)) + else if (skb->protocol == htons(ETH_P_IPV6)) { + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + } #endif dev_kfree_skb_any(skb); -- GitLab From 88018bc071bcfd70b151ff236b6029cc3a9ff7c6 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Mon, 8 Oct 2018 03:27:53 -0700 Subject: [PATCH 1339/2269] RDMA/bnxt_re: Fix recursive lock warning in debug kernel [ Upstream commit d455f29f6d76a5f94881ca1289aaa1e90617ff5d ] Fix possible recursive lock warning. Its a false warning as the locks are part of two differnt HW Queue data structure - cmdq and creq. Debug kernel is throwing the following warning and stack trace. [ 783.914967] ============================================ [ 783.914970] WARNING: possible recursive locking detected [ 783.914973] 4.19.0-rc2+ #33 Not tainted [ 783.914976] -------------------------------------------- [ 783.914979] swapper/2/0 is trying to acquire lock: [ 783.914982] 000000002aa3949d (&(&hwq->lock)->rlock){..-.}, at: bnxt_qplib_service_creq+0x232/0x350 [bnxt_re] [ 783.914999] but task is already holding lock: [ 783.915002] 00000000be73920d (&(&hwq->lock)->rlock){..-.}, at: bnxt_qplib_service_creq+0x2a/0x350 [bnxt_re] [ 783.915013] other info that might help us debug this: [ 783.915016] Possible unsafe locking scenario: [ 783.915019] CPU0 [ 783.915021] ---- [ 783.915034] lock(&(&hwq->lock)->rlock); [ 783.915035] lock(&(&hwq->lock)->rlock); [ 783.915037] *** DEADLOCK *** [ 783.915038] May be due to missing lock nesting notation [ 783.915039] 1 lock held by swapper/2/0: [ 783.915040] #0: 00000000be73920d (&(&hwq->lock)->rlock){..-.}, at: bnxt_qplib_service_creq+0x2a/0x350 [bnxt_re] [ 783.915044] stack backtrace: [ 783.915046] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.19.0-rc2+ #33 [ 783.915047] Hardware name: Dell Inc. PowerEdge R730/0599V5, BIOS 1.0.4 08/28/2014 [ 783.915048] Call Trace: [ 783.915049] [ 783.915054] dump_stack+0x90/0xe3 [ 783.915058] __lock_acquire+0x106c/0x1080 [ 783.915061] ? sched_clock+0x5/0x10 [ 783.915063] lock_acquire+0xbd/0x1a0 [ 783.915065] ? bnxt_qplib_service_creq+0x232/0x350 [bnxt_re] [ 783.915069] _raw_spin_lock_irqsave+0x4a/0x90 [ 783.915071] ? bnxt_qplib_service_creq+0x232/0x350 [bnxt_re] [ 783.915073] bnxt_qplib_service_creq+0x232/0x350 [bnxt_re] [ 783.915078] tasklet_action_common.isra.17+0x197/0x1b0 [ 783.915081] __do_softirq+0xcb/0x3a6 [ 783.915084] irq_exit+0xe9/0x100 [ 783.915085] do_IRQ+0x6a/0x120 [ 783.915087] common_interrupt+0xf/0xf [ 783.915088] Use nested notation for the spin_lock to avoid this warning. Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 8d91733009a47..ad74988837c90 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -311,8 +311,17 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, bnxt_qplib_release_cq_locks(qp, &flags); break; default: - /* Command Response */ - spin_lock_irqsave(&cmdq->lock, flags); + /* + * Command Response + * cmdq->lock needs to be acquired to synchronie + * the command send and completion reaping. This function + * is always called with creq->lock held. Using + * the nested variant of spin_lock. + * + */ + + spin_lock_irqsave_nested(&cmdq->lock, flags, + SINGLE_DEPTH_NESTING); cookie = le16_to_cpu(qp_event->cookie); mcookie = qp_event->cookie; blocked = cookie & RCFW_CMD_IS_BLOCKING; -- GitLab From 07335466dd5aa91f0e4b60b3254bd032f98d4fd8 Mon Sep 17 00:00:00 2001 From: "Tudor.Ambarus@microchip.com" Date: Mon, 15 Oct 2018 09:00:54 +0000 Subject: [PATCH 1340/2269] usb: host: ohci-at91: fix request of irq for optional gpio [ Upstream commit 325b9313ec3be56c8e2fe03f977fee19cec75820 ] atmel,oc-gpio is optional. Request its irq only when atmel,oc is set in device tree. devm_gpiod_get_index_optional returns NULL if -ENOENT. Check its return value for NULL before error, because it is more probable that atmel,oc is not set. This fixes the following errors on boards where atmel,oc is not set in device tree: [ 0.960000] at91_ohci 500000.ohci: failed to request gpio "overcurrent" IRQ [ 0.960000] at91_ohci 500000.ohci: failed to request gpio "overcurrent" IRQ [ 0.970000] at91_ohci 500000.ohci: failed to request gpio "overcurrent" IRQ Signed-off-by: Tudor Ambarus Acked-by: Nicolas Ferre Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-at91.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c index 5302f988e7e67..e0ebd3d513c68 100644 --- a/drivers/usb/host/ohci-at91.c +++ b/drivers/usb/host/ohci-at91.c @@ -550,6 +550,8 @@ static int ohci_hcd_at91_drv_probe(struct platform_device *pdev) pdata->overcurrent_pin[i] = devm_gpiod_get_index_optional(&pdev->dev, "atmel,oc", i, GPIOD_IN); + if (!pdata->overcurrent_pin[i]) + continue; if (IS_ERR(pdata->overcurrent_pin[i])) { err = PTR_ERR(pdata->overcurrent_pin[i]); dev_err(&pdev->dev, "unable to claim gpio \"overcurrent\": %d\n", err); -- GitLab From 38ee346ac1fee2f3da3bfb6ae0e0c07fb85c9f77 Mon Sep 17 00:00:00 2001 From: Honghui Zhang Date: Mon, 15 Oct 2018 16:08:52 +0800 Subject: [PATCH 1341/2269] PCI: mediatek: Fix mtk_pcie_find_port() endpoint/port matching logic [ Upstream commit 074d6f32689ce05a084b6fa3db38445745bf11cc ] The Mediatek's host controller has two slots, each with its own control registers. The host driver needs to identify what slot is connected to what port in order to access the device's configuration space. Current code retrieving slot connected to a given endpoint device. Assuming each slot is connected to one endpoint device as below: host bridge bus 0 --> __________|_______ | | | | slot 0 slot 1 bus 1 -->| bus 2 --> | | | EP 0 EP 1 During PCI enumeration, system software will scan all the PCI devices on every bus starting from devfn 0. Using PCI_SLOT(devfn) for matching an endpoint to its slot is erroneous in that the devfn does not contain the hierarchical bus numbering in it. In order to match an endpoint with its slot (and related port), the PCI tree must be walked up to the root bus (where the root ports are situated) and then the PCI_SLOT(devfn) matching logic can be correctly applied for matching. This patch fixes the mtk_pcie_find_port() slot matching logic by adding appropriate PCI tree walking code to retrieve the slot/port a given endpoint is connected to. Signed-off-by: Honghui Zhang [lorenzo.pieralisi@arm.com: rewrote the commit log] Signed-off-by: Lorenzo Pieralisi Acked-by: Ryder Lee Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pcie-mediatek.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/pci/host/pcie-mediatek.c b/drivers/pci/host/pcie-mediatek.c index db93efdf1d635..c896bb9ef9685 100644 --- a/drivers/pci/host/pcie-mediatek.c +++ b/drivers/pci/host/pcie-mediatek.c @@ -333,6 +333,17 @@ static struct mtk_pcie_port *mtk_pcie_find_port(struct pci_bus *bus, { struct mtk_pcie *pcie = bus->sysdata; struct mtk_pcie_port *port; + struct pci_dev *dev = NULL; + + /* + * Walk the bus hierarchy to get the devfn value + * of the port in the root bus. + */ + while (bus && bus->number) { + dev = bus->self; + bus = dev->bus; + devfn = dev->devfn; + } list_for_each_entry(port, &pcie->ports, list) if (port->slot == PCI_SLOT(devfn)) -- GitLab From 6057caad215b163aca43c7e0346e2dbc052cc10a Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Thu, 30 Aug 2018 16:40:05 +0200 Subject: [PATCH 1342/2269] tpm: suppress transmit cmd error logs when TPM 1.2 is disabled/deactivated [ Upstream commit 0d6d0d62d9505a9816716aa484ebd0b04c795063 ] For TPM 1.2 chips the system setup utility allows to set the TPM device in one of the following states: * Active: Security chip is functional * Inactive: Security chip is visible, but is not functional * Disabled: Security chip is hidden and is not functional When choosing the "Inactive" state, the TPM 1.2 device is enumerated and registered, but sending TPM commands fail with either TPM_DEACTIVATED or TPM_DISABLED depending if the firmware deactivated or disabled the TPM. Since these TPM 1.2 error codes don't have special treatment, inactivating the TPM leads to a very noisy kernel log buffer that shows messages like the following: tpm_tis 00:05: 1.2 TPM (device-id 0x0, rev-id 78) tpm tpm0: A TPM error (6) occurred attempting to read a pcr value tpm tpm0: TPM is disabled/deactivated (0x6) tpm tpm0: A TPM error (6) occurred attempting get random tpm tpm0: A TPM error (6) occurred attempting to read a pcr value ima: No TPM chip found, activating TPM-bypass! (rc=6) tpm tpm0: A TPM error (6) occurred attempting get random tpm tpm0: A TPM error (6) occurred attempting get random tpm tpm0: A TPM error (6) occurred attempting get random tpm tpm0: A TPM error (6) occurred attempting get random Let's just suppress error log messages for the TPM_{DEACTIVATED,DISABLED} return codes, since this is expected when the TPM 1.2 is set to Inactive. In that case the kernel log is cleaner and less confusing for users, i.e: tpm_tis 00:05: 1.2 TPM (device-id 0x0, rev-id 78) tpm tpm0: TPM is disabled/deactivated (0x6) ima: No TPM chip found, activating TPM-bypass! (rc=6) Reported-by: Hans de Goede Signed-off-by: Javier Martinez Canillas Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm-interface.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index 89d5915b1a3fc..6e93df272c20a 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -653,7 +653,8 @@ ssize_t tpm_transmit_cmd(struct tpm_chip *chip, struct tpm_space *space, return len; err = be32_to_cpu(header->return_code); - if (err != 0 && desc) + if (err != 0 && err != TPM_ERR_DISABLED && err != TPM_ERR_DEACTIVATED + && desc) dev_err(&chip->dev, "A TPM error (%d) occurred %s\n", err, desc); if (err) -- GitLab From 4f4850858f8584a45652ac83fe21feaf7b45c87c Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Sun, 23 Sep 2018 21:10:44 +0000 Subject: [PATCH 1343/2269] Drivers: hv: vmbus: Use cpumask_var_t for on-stack cpu mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 25355252607ca288f329ee033f387764883393f6 ] A cpumask structure on the stack can cause a warning with CONFIG_NR_CPUS=8192 (e.g. Ubuntu 16.04 and 18.04 use this): drivers/hv//channel_mgmt.c: In function ‘init_vp_index’: drivers/hv//channel_mgmt.c:702:1: warning: the frame size of 1032 bytes is larger than 1024 bytes [-Wframe-larger-than=] Nowadays it looks most distros enable CONFIG_CPUMASK_OFFSTACK=y, and hence we can work around the warning by using cpumask_var_t. Signed-off-by: Dexuan Cui Cc: K. Y. Srinivasan Cc: Haiyang Zhang Cc: Stephen Hemminger Cc: Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 1700b4e7758d4..752c52f7353d8 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -599,16 +599,18 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) bool perf_chn = vmbus_devs[dev_type].perf_device; struct vmbus_channel *primary = channel->primary_channel; int next_node; - struct cpumask available_mask; + cpumask_var_t available_mask; struct cpumask *alloced_mask; if ((vmbus_proto_version == VERSION_WS2008) || - (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) { + (vmbus_proto_version == VERSION_WIN7) || (!perf_chn) || + !alloc_cpumask_var(&available_mask, GFP_KERNEL)) { /* * Prior to win8, all channel interrupts are * delivered on cpu 0. * Also if the channel is not a performance critical * channel, bind it to cpu 0. + * In case alloc_cpumask_var() fails, bind it to cpu 0. */ channel->numa_node = 0; channel->target_cpu = 0; @@ -646,7 +648,7 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) cpumask_clear(alloced_mask); } - cpumask_xor(&available_mask, alloced_mask, + cpumask_xor(available_mask, alloced_mask, cpumask_of_node(primary->numa_node)); cur_cpu = -1; @@ -664,10 +666,10 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) } while (true) { - cur_cpu = cpumask_next(cur_cpu, &available_mask); + cur_cpu = cpumask_next(cur_cpu, available_mask); if (cur_cpu >= nr_cpu_ids) { cur_cpu = -1; - cpumask_copy(&available_mask, + cpumask_copy(available_mask, cpumask_of_node(primary->numa_node)); continue; } @@ -697,6 +699,8 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) channel->target_cpu = cur_cpu; channel->target_vp = hv_cpu_number_to_vp_number(cur_cpu); + + free_cpumask_var(available_mask); } static void vmbus_wait_for_unload(void) -- GitLab From a9ecbed15e83db8b2ff245c3b6138225e6f9783b Mon Sep 17 00:00:00 2001 From: Jorgen Hansen Date: Fri, 21 Sep 2018 00:31:05 -0700 Subject: [PATCH 1344/2269] VMCI: Resource wildcard match fixed [ Upstream commit 11924ba5e671d6caef1516923e2bd8c72929a3fe ] When adding a VMCI resource, the check for an existing entry would ignore that the new entry could be a wildcard. This could result in multiple resource entries that would match a given handle. One disastrous outcome of this is that the refcounting used to ensure that delayed callbacks for VMCI datagrams have run before the datagram is destroyed can be wrong, since the refcount could be increased on the duplicate entry. This in turn leads to a use after free bug. This issue was discovered by Hangbin Liu using KASAN and syzkaller. Fixes: bc63dedb7d46 ("VMCI: resource object implementation") Reported-by: Hangbin Liu Reviewed-by: Adit Ranadive Reviewed-by: Vishnu Dasa Signed-off-by: Jorgen Hansen Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_vmci/vmci_driver.c | 2 +- drivers/misc/vmw_vmci/vmci_resource.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/misc/vmw_vmci/vmci_driver.c b/drivers/misc/vmw_vmci/vmci_driver.c index d7eaf1eb11e7f..003bfba407588 100644 --- a/drivers/misc/vmw_vmci/vmci_driver.c +++ b/drivers/misc/vmw_vmci/vmci_driver.c @@ -113,5 +113,5 @@ module_exit(vmci_drv_exit); MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMware Virtual Machine Communication Interface."); -MODULE_VERSION("1.1.5.0-k"); +MODULE_VERSION("1.1.6.0-k"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/vmw_vmci/vmci_resource.c b/drivers/misc/vmw_vmci/vmci_resource.c index 1ab6e8737a5f0..da1ee2e1ba991 100644 --- a/drivers/misc/vmw_vmci/vmci_resource.c +++ b/drivers/misc/vmw_vmci/vmci_resource.c @@ -57,7 +57,8 @@ static struct vmci_resource *vmci_resource_lookup(struct vmci_handle handle, if (r->type == type && rid == handle.resource && - (cid == handle.context || cid == VMCI_INVALID_ID)) { + (cid == handle.context || cid == VMCI_INVALID_ID || + handle.context == VMCI_INVALID_ID)) { resource = r; break; } -- GitLab From e105738583b02017b92ddfd2676f8bc408b88781 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 27 Sep 2018 16:54:13 -0500 Subject: [PATCH 1345/2269] PCI / ACPI: Enable wake automatically for power managed bridges [ Upstream commit 6299cf9ec3985cac70bede8a855b5087b81a6640 ] We enable power management automatically for bridges where pci_bridge_d3_possible() returns true. However, these bridges may have ACPI methods such as _DSW that need to be called before D3 entry. For example in Lenovo Thinkpad X1 Carbon 6th _DSW method is used to prepare D3cold for the PCIe root port hosting Thunderbolt chain. Because wake is not enabled _DSW method is never called and the port does not enter D3cold properly consuming more power than necessary. Users can work this around by writing "enabled" to "wakeup" sysfs file under the device in question but that is not something an ordinary user is expected to do. Since we already automatically enable power management for PCIe ports with ->bridge_d3 set extend that to enable wake for them as well, assuming the port has any ACPI wakeup related objects implemented in the namespace (adev->wakeup.flags.valid is true). This ensures the necessary ACPI methods get called at appropriate times and allows the root port in Thinkpad X1 Carbon 6th to go into D3cold. Signed-off-by: Mika Westerberg Signed-off-by: Bjorn Helgaas Reviewed-by: Rafael J. Wysocki Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci-acpi.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 4708eb9df71b0..a3cedf8de8630 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -738,19 +738,33 @@ static void pci_acpi_setup(struct device *dev) return; device_set_wakeup_capable(dev, true); + /* + * For bridges that can do D3 we enable wake automatically (as + * we do for the power management itself in that case). The + * reason is that the bridge may have additional methods such as + * _DSW that need to be called. + */ + if (pci_dev->bridge_d3) + device_wakeup_enable(dev); + acpi_pci_wakeup(pci_dev, false); } static void pci_acpi_cleanup(struct device *dev) { struct acpi_device *adev = ACPI_COMPANION(dev); + struct pci_dev *pci_dev = to_pci_dev(dev); if (!adev) return; pci_acpi_remove_pm_notifier(adev); - if (adev->wakeup.flags.valid) + if (adev->wakeup.flags.valid) { + if (pci_dev->bridge_d3) + device_wakeup_disable(dev); + device_set_wakeup_capable(dev, false); + } } static bool pci_acpi_bus_match(struct device *dev) -- GitLab From 19216bf2820e8b8f98e76baf6964a86faed8027e Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 10 Sep 2018 22:12:49 +0200 Subject: [PATCH 1346/2269] usb: gadget: udc: atmel: handle at91sam9rl PMC [ Upstream commit bb80e4fa57eb75ebd64ae9be4155da6d12c1a997 ] The at91sam9rl PMC is not quite the same as the at91sam9g45 one and now has its own compatible string. Add support for that. Fixes: 217bace8e548 ("ARM: dts: fix PMC compatible") Acked-by: Cristian Birsan Signed-off-by: Alexandre Belloni Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/atmel_usba_udc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c index a884c022df7a5..cb66f982c313b 100644 --- a/drivers/usb/gadget/udc/atmel_usba_udc.c +++ b/drivers/usb/gadget/udc/atmel_usba_udc.c @@ -2071,6 +2071,8 @@ static struct usba_ep * atmel_udc_of_init(struct platform_device *pdev, udc->errata = match->data; udc->pmc = syscon_regmap_lookup_by_compatible("atmel,at91sam9g45-pmc"); + if (IS_ERR(udc->pmc)) + udc->pmc = syscon_regmap_lookup_by_compatible("atmel,at91sam9rl-pmc"); if (IS_ERR(udc->pmc)) udc->pmc = syscon_regmap_lookup_by_compatible("atmel,at91sam9x5-pmc"); if (udc->errata && IS_ERR(udc->pmc)) -- GitLab From e75ca3babf06fd59af2e807415f0016d7793d3c9 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 2 Oct 2018 01:34:44 -0400 Subject: [PATCH 1347/2269] ext4: fix argument checking in EXT4_IOC_MOVE_EXT [ Upstream commit f18b2b83a727a3db208308057d2c7945f368e625 ] If the starting block number of either the source or destination file exceeds the EOF, EXT4_IOC_MOVE_EXT should return EINVAL. Also fixed the helper function mext_check_coverage() so that if the logical block is beyond EOF, make it return immediately, instead of looping until the block number wraps all the away around. This takes long enough that if there are multiple threads trying to do pound on an the same inode doing non-sensical things, it can end up triggering the kernel's soft lockup detector. Reported-by: syzbot+c61979f6f2cba5cb3c06@syzkaller.appspotmail.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/ext4/move_extent.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 9bb36909ec920..cd8d481e0c485 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -526,9 +526,13 @@ mext_check_arguments(struct inode *orig_inode, orig_inode->i_ino, donor_inode->i_ino); return -EINVAL; } - if (orig_eof < orig_start + *len - 1) + if (orig_eof <= orig_start) + *len = 0; + else if (orig_eof < orig_start + *len - 1) *len = orig_eof - orig_start; - if (donor_eof < donor_start + *len - 1) + if (donor_eof <= donor_start) + *len = 0; + else if (donor_eof < donor_start + *len - 1) *len = donor_eof - donor_start; if (!*len) { ext4_debug("ext4 move extent: len should not be 0 " -- GitLab From 442e2ba5724121abc355712715d027b8453e33c9 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 1 Oct 2018 18:36:36 -0700 Subject: [PATCH 1348/2269] MD: fix invalid stored role for a disk [ Upstream commit d595567dc4f0c1d90685ec1e2e296e2cad2643ac ] If we change the number of array's device after device is removed from array, then add the device back to array, we can see that device is added as active role instead of spare which we expected. Please see the below link for details: https://marc.info/?l=linux-raid&m=153736982015076&w=2 This is caused by that we prefer to use device's previous role which is recorded by saved_raid_disk, but we should respect the new number of conf->raid_disks since it could be changed after device is removed. Reported-by: Gioh Kim Tested-by: Gioh Kim Acked-by: Guoqing Jiang Signed-off-by: Shaohua Li Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 5599712d478e6..4c1a6abed6069 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1766,6 +1766,10 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) } else set_bit(In_sync, &rdev->flags); rdev->raid_disk = role; + if (role >= mddev->raid_disks) { + rdev->saved_raid_disk = -1; + rdev->raid_disk = -1; + } break; } if (sb->devflags & WriteMostly1) -- GitLab From 34068ae1fa0a91f48ceea69d063ae6f6701d083d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Sep 2018 15:35:59 +0800 Subject: [PATCH 1349/2269] f2fs: fix to recover inode's i_flags during POR [ Upstream commit 19c73a691ccf6fb2f12d4e9cf9830023966cec88 ] Testcase to reproduce this bug: 1. mkfs.f2fs /dev/sdd 2. mount -t f2fs /dev/sdd /mnt/f2fs 3. touch /mnt/f2fs/file 4. sync 5. chattr +A /mnt/f2fs/file 6. xfs_io -f /mnt/f2fs/file -c "fsync" 7. godown /mnt/f2fs 8. umount /mnt/f2fs 9. mount -t f2fs /dev/sdd /mnt/f2fs 10. lsattr /mnt/f2fs/file -----------------N- /mnt/f2fs/file But actually, we expect the corrct result is: -------A---------N- /mnt/f2fs/file The reason is we didn't recover inode.i_flags field during mount, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/recovery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 9626758bc7624..765fadf954afc 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -210,6 +210,7 @@ static void recover_inode(struct inode *inode, struct page *page) inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); F2FS_I(inode)->i_advise = raw->i_advise; + F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags); if (file_enc_name(inode)) name = ""; -- GitLab From 8cc76433d8c9dd666c19b1aaefd5c55a31115f89 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Mon, 24 Sep 2018 07:00:41 -0700 Subject: [PATCH 1350/2269] PCI/MSI: Warn and return error if driver enables MSI/MSI-X twice [ Upstream commit 4c1ef72e9b71a19fb405ebfcd37c0a5e16fa44ca ] It is a serious driver defect to enable MSI or MSI-X more than once. Doing so may panic the kernel as in the stack trace below: Call Trace: sysfs_add_one+0xa5/0xd0 create_dir+0x7c/0xe0 sysfs_create_subdir+0x1c/0x20 internal_create_group+0x6d/0x290 sysfs_create_groups+0x4a/0xa0 populate_msi_sysfs+0x1cd/0x210 pci_enable_msix+0x31c/0x3e0 igbuio_pci_open+0x72/0x300 [igb_uio] uio_open+0xcc/0x120 [uio] chrdev_open+0xa1/0x1e0 [...] do_sys_open+0xf3/0x1f0 SyS_open+0x1e/0x20 system_call_fastpath+0x16/0x1b ---[ end trace 11042e2848880209 ]--- Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: ffffffffa056b4fa We want to keep the WARN_ON() and stack trace so the driver can be fixed, but we can avoid the kernel panic by returning an error. We may still get warnings like this: Call Trace: pci_enable_msix+0x3c9/0x3e0 igbuio_pci_open+0x72/0x300 [igb_uio] uio_open+0xcc/0x120 [uio] chrdev_open+0xa1/0x1e0 [...] do_sys_open+0xf3/0x1f0 SyS_open+0x1e/0x20 system_call_fastpath+0x16/0x1b ------------[ cut here ]------------ WARNING: at fs/sysfs/dir.c:526 sysfs_add_one+0xa5/0xd0() sysfs: cannot create duplicate filename '/devices/pci0000:00/0000:00:03.0/0000:01:00.1/msi_irqs' Signed-off-by: Tonghao Zhang [bhelgaas: changelog, fix patch whitespace, remove !!] Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/msi.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 496ed9130600e..536e9a5cd2b14 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -958,7 +958,6 @@ static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, } } } - WARN_ON(!!dev->msix_enabled); /* Check whether driver already requested for MSI irq */ if (dev->msi_enabled) { @@ -1028,8 +1027,6 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, if (!pci_msi_supported(dev, minvec)) return -EINVAL; - WARN_ON(!!dev->msi_enabled); - /* Check whether driver already requested MSI-X irqs */ if (dev->msix_enabled) { dev_info(&dev->dev, @@ -1040,6 +1037,9 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, if (maxvec < minvec) return -ERANGE; + if (WARN_ON_ONCE(dev->msi_enabled)) + return -EINVAL; + nvec = pci_msi_vec_count(dev); if (nvec < 0) return nvec; @@ -1088,6 +1088,9 @@ static int __pci_enable_msix_range(struct pci_dev *dev, if (maxvec < minvec) return -ERANGE; + if (WARN_ON_ONCE(dev->msix_enabled)) + return -EINVAL; + for (;;) { if (affd) { nvec = irq_calc_affinity_vectors(minvec, nvec, affd); -- GitLab From 0d20b616732e103070f2ffcd061ec0812754808a Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 20 Sep 2018 13:17:46 -0600 Subject: [PATCH 1351/2269] coresight: etb10: Fix handling of perf mode [ Upstream commit 987d1e8dcd370d96029a3d76a0031b043c4a69ae ] If the ETB is already enabled in sysfs mode, the ETB reports success even if a perf mode is requested. Fix this by checking the requested mode. Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight-etb10.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index 56ecd7aff5eb9..d14a9cb7959a5 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -155,6 +155,10 @@ static int etb_enable(struct coresight_device *csdev, u32 mode) if (val == CS_MODE_PERF) return -EBUSY; + /* Don't let perf disturb sysFS sessions */ + if (val == CS_MODE_SYSFS && mode == CS_MODE_PERF) + return -EBUSY; + /* Nothing to do, the tracer is already enabled. */ if (val == CS_MODE_SYSFS) goto out; -- GitLab From 00de8eac10fdcc2d3fff63bf2cfa122a15c72aee Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Tue, 25 Sep 2018 14:00:24 +0530 Subject: [PATCH 1352/2269] PCI: dwc: pci-dra7xx: Enable errata i870 for both EP and RC mode [ Upstream commit 726d75a6d243bf6730da3216f3592503f6f0f588 ] Errata i870 is applicable in both EP and RC mode. Therefore rename function dra7xx_pcie_ep_unaligned_memaccess(), that implements errata workaround, to dra7xx_pcie_unaligned_memaccess() and call it for both RC and EP. Make sure driver probe does not fail in case the workaround is not applied for RC mode in order to maintain DT backward compatibility. Reported-by: Chris Welch Signed-off-by: Vignesh R [lorenzo.pieralisi@arm.com: reworded the log] Signed-off-by: Lorenzo Pieralisi Acked-by: Kishon Vijay Abraham I Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/dwc/pci-dra7xx.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/pci/dwc/pci-dra7xx.c b/drivers/pci/dwc/pci-dra7xx.c index 362607f727ee6..06eae132aff78 100644 --- a/drivers/pci/dwc/pci-dra7xx.c +++ b/drivers/pci/dwc/pci-dra7xx.c @@ -546,7 +546,7 @@ static const struct of_device_id of_dra7xx_pcie_match[] = { }; /* - * dra7xx_pcie_ep_unaligned_memaccess: workaround for AM572x/AM571x Errata i870 + * dra7xx_pcie_unaligned_memaccess: workaround for AM572x/AM571x Errata i870 * @dra7xx: the dra7xx device where the workaround should be applied * * Access to the PCIe slave port that are not 32-bit aligned will result @@ -556,7 +556,7 @@ static const struct of_device_id of_dra7xx_pcie_match[] = { * * To avoid this issue set PCIE_SS1_AXI2OCP_LEGACY_MODE_ENABLE to 1. */ -static int dra7xx_pcie_ep_unaligned_memaccess(struct device *dev) +static int dra7xx_pcie_unaligned_memaccess(struct device *dev) { int ret; struct device_node *np = dev->of_node; @@ -707,6 +707,11 @@ static int __init dra7xx_pcie_probe(struct platform_device *pdev) case DW_PCIE_RC_TYPE: dra7xx_pcie_writel(dra7xx, PCIECTRL_TI_CONF_DEVICE_TYPE, DEVICE_TYPE_RC); + + ret = dra7xx_pcie_unaligned_memaccess(dev); + if (ret) + dev_err(dev, "WA for Errata i870 not applied\n"); + ret = dra7xx_add_pcie_port(dra7xx, pdev); if (ret < 0) goto err_gpio; @@ -715,7 +720,7 @@ static int __init dra7xx_pcie_probe(struct platform_device *pdev) dra7xx_pcie_writel(dra7xx, PCIECTRL_TI_CONF_DEVICE_TYPE, DEVICE_TYPE_EP); - ret = dra7xx_pcie_ep_unaligned_memaccess(dev); + ret = dra7xx_pcie_unaligned_memaccess(dev); if (ret) goto err_gpio; -- GitLab From 6a4e231f265052b0da714076143fe2f2425eee89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Wed, 12 Sep 2018 11:59:30 +0300 Subject: [PATCH 1353/2269] crypto: caam - fix implicit casts in endianness helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit aae733a3f46f5ef338fbdde26e14cbb205a23de0 ] Fix the following sparse endianness warnings: drivers/crypto/caam/regs.h:95:1: sparse: incorrect type in return expression (different base types) @@ expected unsigned int @@ got restricted __le32unsigned int @@ drivers/crypto/caam/regs.h:95:1: expected unsigned int drivers/crypto/caam/regs.h:95:1: got restricted __le32 [usertype] drivers/crypto/caam/regs.h:95:1: sparse: incorrect type in return expression (different base types) @@ expected unsigned int @@ got restricted __be32unsigned int @@ drivers/crypto/caam/regs.h:95:1: expected unsigned int drivers/crypto/caam/regs.h:95:1: got restricted __be32 [usertype] drivers/crypto/caam/regs.h:92:1: sparse: cast to restricted __le32 drivers/crypto/caam/regs.h:92:1: sparse: cast to restricted __be32 Fixes: 261ea058f016 ("crypto: caam - handle core endianness != caam endianness") Reported-by: kbuild test robot Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/caam/regs.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index fee363865d88e..e5513cc59ec3e 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -70,22 +70,22 @@ extern bool caam_little_end; extern bool caam_imx; -#define caam_to_cpu(len) \ -static inline u##len caam##len ## _to_cpu(u##len val) \ -{ \ - if (caam_little_end) \ - return le##len ## _to_cpu(val); \ - else \ - return be##len ## _to_cpu(val); \ +#define caam_to_cpu(len) \ +static inline u##len caam##len ## _to_cpu(u##len val) \ +{ \ + if (caam_little_end) \ + return le##len ## _to_cpu((__force __le##len)val); \ + else \ + return be##len ## _to_cpu((__force __be##len)val); \ } -#define cpu_to_caam(len) \ -static inline u##len cpu_to_caam##len(u##len val) \ -{ \ - if (caam_little_end) \ - return cpu_to_le##len(val); \ - else \ - return cpu_to_be##len(val); \ +#define cpu_to_caam(len) \ +static inline u##len cpu_to_caam##len(u##len val) \ +{ \ + if (caam_little_end) \ + return (__force u##len)cpu_to_le##len(val); \ + else \ + return (__force u##len)cpu_to_be##len(val); \ } caam_to_cpu(16) -- GitLab From 7987dfa6057678f5864309496c4c26662d93385a Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Tue, 4 Sep 2018 17:18:57 +0200 Subject: [PATCH 1354/2269] usb: chipidea: Prevent unbalanced IRQ disable [ Upstream commit 8b97d73c4d72a2abf58f8e49062a7ee1e5f1334e ] The ChipIdea IRQ is disabled before scheduling the otg work and re-enabled on otg work completion. However if the job is already scheduled we have to undo the effect of disable_irq int order to balance the IRQ disable-depth value. Fixes: be6b0c1bd0be ("usb: chipidea: using one inline function to cover queue work operations") Signed-off-by: Loic Poulain Signed-off-by: Peter Chen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/otg.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/chipidea/otg.h b/drivers/usb/chipidea/otg.h index 9ecb598e48f04..a5557c70034a3 100644 --- a/drivers/usb/chipidea/otg.h +++ b/drivers/usb/chipidea/otg.h @@ -20,7 +20,8 @@ void ci_handle_vbus_change(struct ci_hdrc *ci); static inline void ci_otg_queue_work(struct ci_hdrc *ci) { disable_irq_nosync(ci->irq); - queue_work(ci->wq, &ci->work); + if (queue_work(ci->wq, &ci->work) == false) + enable_irq(ci->irq); } #endif /* __DRIVERS_USB_CHIPIDEA_OTG_H */ -- GitLab From 24e249524e9b19e1e8027904ed3dcbd5e34e9dea Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 14 Sep 2018 14:53:32 -0400 Subject: [PATCH 1355/2269] driver/dma/ioat: Call del_timer_sync() without holding prep_lock [ Upstream commit cfb03be6c7e8a1591285849c361d67b09f5149f7 ] The following lockdep splat was observed: [ 1222.241750] ====================================================== [ 1222.271301] WARNING: possible circular locking dependency detected [ 1222.301060] 4.16.0-10.el8+5.x86_64+debug #1 Not tainted [ 1222.326659] ------------------------------------------------------ [ 1222.356565] systemd-shutdow/1 is trying to acquire lock: [ 1222.382660] ((&ioat_chan->timer)){+.-.}, at: [<00000000f71e1a28>] del_timer_sync+0x5/0xf0 [ 1222.422928] [ 1222.422928] but task is already holding lock: [ 1222.451743] (&(&ioat_chan->prep_lock)->rlock){+.-.}, at: [<000000008ea98b12>] ioat_shutdown+0x86/0x100 [ioatdma] : [ 1223.524987] Chain exists of: [ 1223.524987] (&ioat_chan->timer) --> &(&ioat_chan->cleanup_lock)->rlock --> &(&ioat_chan->prep_lock)->rlock [ 1223.524987] [ 1223.594082] Possible unsafe locking scenario: [ 1223.594082] [ 1223.622630] CPU0 CPU1 [ 1223.645080] ---- ---- [ 1223.667404] lock(&(&ioat_chan->prep_lock)->rlock); [ 1223.691535] lock(&(&ioat_chan->cleanup_lock)->rlock); [ 1223.728657] lock(&(&ioat_chan->prep_lock)->rlock); [ 1223.765122] lock((&ioat_chan->timer)); [ 1223.784095] [ 1223.784095] *** DEADLOCK *** [ 1223.784095] [ 1223.813492] 4 locks held by systemd-shutdow/1: [ 1223.834677] #0: (reboot_mutex){+.+.}, at: [<0000000056d33456>] SYSC_reboot+0x10f/0x300 [ 1223.873310] #1: (&dev->mutex){....}, at: [<00000000258dfdd7>] device_shutdown+0x1c8/0x660 [ 1223.913604] #2: (&dev->mutex){....}, at: [<0000000068331147>] device_shutdown+0x1d6/0x660 [ 1223.954000] #3: (&(&ioat_chan->prep_lock)->rlock){+.-.}, at: [<000000008ea98b12>] ioat_shutdown+0x86/0x100 [ioatdma] In the ioat_shutdown() function: spin_lock_bh(&ioat_chan->prep_lock); set_bit(IOAT_CHAN_DOWN, &ioat_chan->state); del_timer_sync(&ioat_chan->timer); spin_unlock_bh(&ioat_chan->prep_lock); According to the synchronization rule for the del_timer_sync() function, the caller must not hold locks which would prevent completion of the timer's handler. The timer structure has its own lock that manages its synchronization. Setting the IOAT_CHAN_DOWN bit should prevent other CPUs from trying to use that device anyway, there is probably no need to call del_timer_sync() while holding the prep_lock. So the del_timer_sync() call is now moved outside of the prep_lock critical section to prevent the circular lock dependency. Signed-off-by: Waiman Long Reviewed-by: Dave Jiang Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/dma/ioat/init.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index 854deb0da07ce..68680e4151eaa 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -1205,8 +1205,15 @@ static void ioat_shutdown(struct pci_dev *pdev) spin_lock_bh(&ioat_chan->prep_lock); set_bit(IOAT_CHAN_DOWN, &ioat_chan->state); - del_timer_sync(&ioat_chan->timer); spin_unlock_bh(&ioat_chan->prep_lock); + /* + * Synchronization rule for del_timer_sync(): + * - The caller must not hold locks which would prevent + * completion of the timer's handler. + * So prep_lock cannot be held before calling it. + */ + del_timer_sync(&ioat_chan->timer); + /* this should quiesce then reset */ ioat_reset_hw(ioat_chan); } -- GitLab From adfbc0d10727027a17fb1784548245cda2fae30e Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 16 Aug 2018 09:39:41 +0200 Subject: [PATCH 1356/2269] uio: ensure class is registered before devices [ Upstream commit ae61cf5b9913027c6953a79ed3894da4f47061bd ] When both uio and the uio drivers are built in the kernel, it is possible for a driver to register devices before the uio class is registered. This may result in a NULL pointer dereference later on in get_device_parent() when accessing the class glue_dirs spinlock. The trace looks like that: Unable to handle kernel NULL pointer dereference at virtual address 00000140 [...] [] _raw_spin_lock+0x14/0x48 [] device_add+0x154/0x6a0 [] device_create_groups_vargs+0x120/0x128 [] device_create+0x54/0x60 [] __uio_register_device+0x120/0x4a8 [] jaguar2_pci_probe+0x2d4/0x558 [] local_pci_probe+0x3c/0xb8 [] pci_device_probe+0x11c/0x180 [] driver_probe_device+0x22c/0x2d8 [] __driver_attach+0xbc/0xc0 [] bus_for_each_dev+0x4c/0x98 [] driver_attach+0x20/0x28 [] bus_add_driver+0x1b8/0x228 [] driver_register+0x60/0xf8 [] __pci_register_driver+0x40/0x48 Return EPROBE_DEFER in that case so the driver can register the device later. Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 41784798c789b..0a730136646d5 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -249,6 +249,8 @@ static struct class uio_class = { .dev_groups = uio_groups, }; +bool uio_class_registered; + /* * device functions */ @@ -780,6 +782,9 @@ static int init_uio_class(void) printk(KERN_ERR "class_register failed for uio\n"); goto err_class_register; } + + uio_class_registered = true; + return 0; err_class_register: @@ -790,6 +795,7 @@ exit: static void release_uio_class(void) { + uio_class_registered = false; class_unregister(&uio_class); uio_major_cleanup(); } @@ -809,6 +815,9 @@ int __uio_register_device(struct module *owner, struct uio_device *idev; int ret = 0; + if (!uio_class_registered) + return -EPROBE_DEFER; + if (!parent || !info || !info->name || !info->version) return -EINVAL; -- GitLab From 7f4b23ed2cdb5ba7a6edf002ea9cf0f5f684dca5 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 10 Sep 2018 10:30:45 -0700 Subject: [PATCH 1357/2269] scsi: lpfc: Correct soft lockup when running mds diagnostics [ Upstream commit 0ef01a2d95fd62bb4f536e7ce4d5e8e74b97a244 ] When running an mds diagnostic that passes frames with the switch, soft lockups are detected. The driver is in a CQE processing loop and has sufficient amount of traffic that it never exits the ring processing routine, thus the "lockup". Cap the number of elements in the work processing routine to 64 elements. This ensures that the cpu will be given up and the handler reschedule to process additional items. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_sli.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index dc83498024dc3..24b6e56f6e97e 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -3585,6 +3585,7 @@ lpfc_sli_handle_slow_ring_event_s4(struct lpfc_hba *phba, struct hbq_dmabuf *dmabuf; struct lpfc_cq_event *cq_event; unsigned long iflag; + int count = 0; spin_lock_irqsave(&phba->hbalock, iflag); phba->hba_flag &= ~HBA_SP_QUEUE_EVT; @@ -3606,16 +3607,22 @@ lpfc_sli_handle_slow_ring_event_s4(struct lpfc_hba *phba, if (irspiocbq) lpfc_sli_sp_handle_rspiocb(phba, pring, irspiocbq); + count++; break; case CQE_CODE_RECEIVE: case CQE_CODE_RECEIVE_V1: dmabuf = container_of(cq_event, struct hbq_dmabuf, cq_event); lpfc_sli4_handle_received_buffer(phba, dmabuf); + count++; break; default: break; } + + /* Limit the number of events to 64 to avoid soft lockups */ + if (count == 64) + break; } } -- GitLab From 5115854bb3a8bb7c04a191c3e8778faf04a2ec96 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 10 Sep 2018 10:30:44 -0700 Subject: [PATCH 1358/2269] scsi: lpfc: Correct race with abort on completion path [ Upstream commit ca7fb76e091f889cfda1287c07a9358f73832b39 ] On io completion, the driver is taking an adapter wide lock and nulling the scsi command back pointer. The nulling of the back pointer is to signify the io was completed and the scsi_done() routine was called. However, the routine makes no check to see if the abort routine had done the same thing and possibly nulled the pointer. Thus it may doubly-complete the io. Make the following mods: - Check to make sure forward progress (call scsi_done()) only happens if the command pointer was non-null. - As the taking of the lock, which is adapter wide, is very costly on a system under load, null the pointer using an xchg operation rather than under lock. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_scsi.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 1a6f122bb25db..4ade13d72deb3 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -4149,9 +4149,17 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn, lpfc_scsi_unprep_dma_buf(phba, lpfc_cmd); - spin_lock_irqsave(&phba->hbalock, flags); - lpfc_cmd->pCmd = NULL; - spin_unlock_irqrestore(&phba->hbalock, flags); + /* If pCmd was set to NULL from abort path, do not call scsi_done */ + if (xchg(&lpfc_cmd->pCmd, NULL) == NULL) { + lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP, + "0711 FCP cmd already NULL, sid: 0x%06x, " + "did: 0x%06x, oxid: 0x%04x\n", + vport->fc_myDID, + (pnode) ? pnode->nlp_DID : 0, + phba->sli_rev == LPFC_SLI_REV4 ? + lpfc_cmd->cur_iocbq.sli4_xritag : 0xffff); + return; + } /* The sdev is not guaranteed to be valid post scsi_done upcall. */ cmd->scsi_done(cmd); -- GitLab From 5ae1c65ae960cdc0b2b9a5a614d54f7cfa33e31a Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Tue, 26 Jun 2018 13:12:43 +0800 Subject: [PATCH 1359/2269] f2fs: report error if quota off error during umount [ Upstream commit cda9cc595f0bb6ffa51a4efc4b6533dfa4039b4c ] Now, we depend on fsck to ensure quota file data is ok, so we scan whole partition if checkpoint without umount flag. It's same for quota off error case, which may make quota file data inconsistent. generic/019 reports below error: __quota_error: 1160 callbacks suppressed Quota error (device zram1): write_blk: dquota write failed Quota error (device zram1): qtree_write_dquot: Error -28 occurred while creating quota Quota error (device zram1): write_blk: dquota write failed Quota error (device zram1): qtree_write_dquot: Error -28 occurred while creating quota Quota error (device zram1): write_blk: dquota write failed Quota error (device zram1): qtree_write_dquot: Error -28 occurred while creating quota Quota error (device zram1): write_blk: dquota write failed Quota error (device zram1): qtree_write_dquot: Error -28 occurred while creating quota Quota error (device zram1): write_blk: dquota write failed Quota error (device zram1): qtree_write_dquot: Error -28 occurred while creating quota VFS: Busy inodes after unmount of zram1. Self-destruct in 5 seconds. Have a nice day... If we failed in below path due to fail to write dquot block, we will miss to release quota inode, fix it. - f2fs_put_super - f2fs_quota_off_umount - f2fs_quota_off - f2fs_quota_sync <-- failed - dquot_quota_off <-- missed to call Signed-off-by: Yunlei He Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/super.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index eae35909fa51a..7cda685296b27 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1488,7 +1488,9 @@ static int f2fs_quota_off(struct super_block *sb, int type) if (!inode || !igrab(inode)) return dquot_quota_off(sb, type); - f2fs_quota_sync(sb, type); + err = f2fs_quota_sync(sb, type); + if (err) + goto out_put; err = dquot_quota_off(sb, type); if (err) @@ -1507,9 +1509,20 @@ out_put: void f2fs_quota_off_umount(struct super_block *sb) { int type; + int err; - for (type = 0; type < MAXQUOTAS; type++) - f2fs_quota_off(sb, type); + for (type = 0; type < MAXQUOTAS; type++) { + err = f2fs_quota_off(sb, type); + if (err) { + int ret = dquot_quota_off(sb, type); + + f2fs_msg(sb, KERN_ERR, + "Fail to turn off disk quota " + "(type: %d, err: %d, ret:%d), Please " + "run fsck to fix it.", type, err, ret); + set_sbi_flag(F2FS_SB(sb), SBI_NEED_FSCK); + } + } } int f2fs_get_projid(struct inode *inode, kprojid_t *projid) -- GitLab From 06bd97b79584764d3ff4dbd42d4ca87d2cbe5a46 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 3 Sep 2018 20:02:46 +0200 Subject: [PATCH 1360/2269] signal: Always deliver the kernel's SIGKILL and SIGSTOP to a pid namespace init [ Upstream commit 3597dfe01d12f570bc739da67f857fd222a3ea66 ] Instead of playing whack-a-mole and changing SEND_SIG_PRIV to SEND_SIG_FORCED throughout the kernel to ensure a pid namespace init gets signals sent by the kernel, stop allowing a pid namespace init to ignore SIGKILL or SIGSTOP sent by the kernel. A pid namespace init is only supposed to be able to ignore signals sent from itself and children with SIG_DFL. Fixes: 921cf9f63089 ("signals: protect cinit from unblocked SIG_DFL signals") Reviewed-by: Thomas Gleixner Signed-off-by: "Eric W. Biederman" Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/signal.c b/kernel/signal.c index b74acbec98762..7181215e9b64e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1003,7 +1003,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, result = TRACE_SIGNAL_IGNORED; if (!prepare_signal(sig, t, - from_ancestor_ns || (info == SEND_SIG_FORCED))) + from_ancestor_ns || (info == SEND_SIG_PRIV) || (info == SEND_SIG_FORCED))) goto ret; pending = group ? &t->signal->shared_pending : &t->pending; -- GitLab From 64b48a5cbea969bb7bd2f779dab52c2753c44e46 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 9 Sep 2018 22:48:58 +0200 Subject: [PATCH 1361/2269] mfd: menelaus: Fix possible race condition and leak [ Upstream commit 9612f8f503804d2fd2f63aa6ba1e58bba4612d96 ] The IRQ work is added before the struct rtc is allocated and registered, but this struct is used in the IRQ handler. This may lead to a NULL pointer dereference. Switch to devm_rtc_allocate_device/rtc_register_device to allocate the rtc before calling menelaus_add_irq_work. Also, this solves a possible leak as the RTC is never released. Signed-off-by: Alexandre Belloni Signed-off-by: Lee Jones Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/menelaus.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/mfd/menelaus.c b/drivers/mfd/menelaus.c index 29b7164a823bd..d28ebe7ecd211 100644 --- a/drivers/mfd/menelaus.c +++ b/drivers/mfd/menelaus.c @@ -1094,6 +1094,7 @@ static void menelaus_rtc_alarm_work(struct menelaus_chip *m) static inline void menelaus_rtc_init(struct menelaus_chip *m) { int alarm = (m->client->irq > 0); + int err; /* assume 32KDETEN pin is pulled high */ if (!(menelaus_read_reg(MENELAUS_OSC_CTRL) & 0x80)) { @@ -1101,6 +1102,12 @@ static inline void menelaus_rtc_init(struct menelaus_chip *m) return; } + m->rtc = devm_rtc_allocate_device(&m->client->dev); + if (IS_ERR(m->rtc)) + return; + + m->rtc->ops = &menelaus_rtc_ops; + /* support RTC alarm; it can issue wakeups */ if (alarm) { if (menelaus_add_irq_work(MENELAUS_RTCALM_IRQ, @@ -1125,10 +1132,8 @@ static inline void menelaus_rtc_init(struct menelaus_chip *m) menelaus_write_reg(MENELAUS_RTC_CTRL, m->rtc_control); } - m->rtc = rtc_device_register(DRIVER_NAME, - &m->client->dev, - &menelaus_rtc_ops, THIS_MODULE); - if (IS_ERR(m->rtc)) { + err = rtc_register_device(m->rtc); + if (err) { if (alarm) { menelaus_remove_irq_work(MENELAUS_RTCALM_IRQ); device_init_wakeup(&m->client->dev, 0); -- GitLab From 0bc6f0649b539b20ebf2096ead11246027ce9bef Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 29 Aug 2018 23:32:44 +0200 Subject: [PATCH 1362/2269] dmaengine: dma-jz4780: Return error if not probed from DT [ Upstream commit 54f919a04cf221bc1601d1193682d4379dacacbd ] The driver calls clk_get() with the clock name set to NULL, which means that the driver could only work when probed from devicetree. From now on, we explicitly require the driver to be probed from devicetree. Signed-off-by: Paul Cercueil Tested-by: Mathieu Malaterre Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/dma/dma-jz4780.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c index 7373b7a555ec3..803cfb4523b08 100644 --- a/drivers/dma/dma-jz4780.c +++ b/drivers/dma/dma-jz4780.c @@ -754,6 +754,11 @@ static int jz4780_dma_probe(struct platform_device *pdev) struct resource *res; int i, ret; + if (!dev->of_node) { + dev_err(dev, "This driver must be probed from devicetree\n"); + return -EINVAL; + } + jzdma = devm_kzalloc(dev, sizeof(*jzdma), GFP_KERNEL); if (!jzdma) return -ENOMEM; -- GitLab From ab5ed779fe0dcf3e81484409530267880f661d95 Mon Sep 17 00:00:00 2001 From: Vijay Immanuel Date: Tue, 12 Jun 2018 18:20:49 -0700 Subject: [PATCH 1363/2269] IB/rxe: fix for duplicate request processing and ack psns [ Upstream commit b97db58557f4aa6d9903f8e1deea6b3d1ed0ba43 ] Don't reset the resp opcode for a replayed read response. The resp opcode could be in the middle of a write or send sequence, when the duplicate read request was received. An example sequence is as follows: - Receive read request for 12KB PSN 20. Transmit read response first, middle and last with PSNs 20,21,22. - Receive write first PSN 23. At this point the resp psn is 24 and resp opcode is write first. - The sender notices that PSN 20 is dropped and retransmits. Receive read request for 12KB PSN 20. Transmit read response first, middle and last with PSNs 20,21,22. The resp opcode is set to -1, the resp psn remains 24. - Receive write first PSN 23. This is processed by duplicate_request(). The resp opcode remains -1 and resp psn remains 24. - Receive write middle PSN 24. check_op_seq() reports a missing first error since the resp opcode is -1. When sending an ack for a duplicate send or write request, use the psn of the previous ack sent. Do not use the psn of a read response for the ack. An example sequence is as follows: - Receive write PSN 30. Transmit ACK for PSN 30. - Receive read request 4KB PSN 31. Transmit read response with PSN 31. The resp psn is now 32. - The sender notices that PSN 30 is dropped and retransmits. Receive write PSN 30. duplicate_request() sends an ACK with PSN 31. That is incorrect since PSN 31 was a read request. Signed-off-by: Vijay Immanuel Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/rxe/rxe_resp.c | 8 ++++++-- drivers/infiniband/sw/rxe/rxe_verbs.h | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index bd43c1c7a42fd..4d84b010b3ee2 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -683,6 +683,7 @@ static enum resp_states read_reply(struct rxe_qp *qp, rxe_advance_resp_resource(qp); res->type = RXE_READ_MASK; + res->replay = 0; res->read.va = qp->resp.va; res->read.va_org = qp->resp.va; @@ -753,7 +754,8 @@ static enum resp_states read_reply(struct rxe_qp *qp, state = RESPST_DONE; } else { qp->resp.res = NULL; - qp->resp.opcode = -1; + if (!res->replay) + qp->resp.opcode = -1; if (psn_compare(res->cur_psn, qp->resp.psn) >= 0) qp->resp.psn = res->cur_psn; state = RESPST_CLEANUP; @@ -815,6 +817,7 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) /* next expected psn, read handles this separately */ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + qp->resp.ack_psn = qp->resp.psn; qp->resp.opcode = pkt->opcode; qp->resp.status = IB_WC_SUCCESS; @@ -1071,7 +1074,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { enum resp_states rc; - u32 prev_psn = (qp->resp.psn - 1) & BTH_PSN_MASK; + u32 prev_psn = (qp->resp.ack_psn - 1) & BTH_PSN_MASK; if (pkt->mask & RXE_SEND_MASK || pkt->mask & RXE_WRITE_MASK) { @@ -1114,6 +1117,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, res->state = (pkt->psn == res->first_psn) ? rdatm_res_state_new : rdatm_res_state_replay; + res->replay = 1; /* Reset the resource, except length. */ res->read.va_org = iova; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 1019f5e7dbddd..59f6a24db064b 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -173,6 +173,7 @@ enum rdatm_res_state { struct resp_res { int type; + int replay; u32 first_psn; u32 last_psn; u32 cur_psn; @@ -197,6 +198,7 @@ struct rxe_resp_info { enum rxe_qp_state state; u32 msn; u32 psn; + u32 ack_psn; int opcode; int drop_msg; int goto_error; -- GitLab From edadd638264f3782d0d166125a7a5515523dbe3c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 11 Aug 2018 23:33:34 +0200 Subject: [PATCH 1364/2269] ALSA: hda: Check the non-cached stream buffers more explicitly [ Upstream commit 78c9be61c3a5cd9e2439fd27a5ffad73a81958c7 ] Introduce a new flag, uc_buffer, to indicate that the controller requires the non-cached pages for stream buffers, either as a chip-specific requirement or specified via snoop=0 option. This improves the code-readability. Also, this patch fixes the incorrect behavior for C-Media chip where the stream buffers were never handled as non-cached due to the check of driver_type even if you pass snoop=0 option. Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_controller.h | 1 + sound/pci/hda/hda_intel.c | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/hda_controller.h b/sound/pci/hda/hda_controller.h index a68e75b00ea3b..53c3cd28bc995 100644 --- a/sound/pci/hda/hda_controller.h +++ b/sound/pci/hda/hda_controller.h @@ -160,6 +160,7 @@ struct azx { unsigned int msi:1; unsigned int probing:1; /* codec probing phase */ unsigned int snoop:1; + unsigned int uc_buffer:1; /* non-cached pages for stream buffers */ unsigned int align_buffer_size:1; unsigned int region_requested:1; unsigned int disabled:1; /* disabled by vga_switcheroo */ diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 873d9824fbcff..4e38905bc47db 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -410,7 +410,7 @@ static void __mark_pages_wc(struct azx *chip, struct snd_dma_buffer *dmab, bool #ifdef CONFIG_SND_DMA_SGBUF if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_SG) { struct snd_sg_buf *sgbuf = dmab->private_data; - if (chip->driver_type == AZX_DRIVER_CMEDIA) + if (!chip->uc_buffer) return; /* deal with only CORB/RIRB buffers */ if (on) set_pages_array_wc(sgbuf->page_table, sgbuf->pages); @@ -1634,6 +1634,7 @@ static void azx_check_snoop_available(struct azx *chip) dev_info(chip->card->dev, "Force to %s mode by module option\n", snoop ? "snoop" : "non-snoop"); chip->snoop = snoop; + chip->uc_buffer = !snoop; return; } @@ -1654,8 +1655,12 @@ static void azx_check_snoop_available(struct azx *chip) snoop = false; chip->snoop = snoop; - if (!snoop) + if (!snoop) { dev_info(chip->card->dev, "Force to non-snoop mode\n"); + /* C-Media requires non-cached pages only for CORB/RIRB */ + if (chip->driver_type != AZX_DRIVER_CMEDIA) + chip->uc_buffer = true; + } } static void azx_probe_work(struct work_struct *work) @@ -2094,7 +2099,7 @@ static void pcm_mmap_prepare(struct snd_pcm_substream *substream, #ifdef CONFIG_X86 struct azx_pcm *apcm = snd_pcm_substream_chip(substream); struct azx *chip = apcm->chip; - if (!azx_snoop(chip) && chip->driver_type != AZX_DRIVER_CMEDIA) + if (chip->uc_buffer) area->vm_page_prot = pgprot_writecombine(area->vm_page_prot); #endif } -- GitLab From bb61032295732b08a714678e0ccea732dabfc49e Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Mon, 8 Oct 2018 11:06:18 -0400 Subject: [PATCH 1365/2269] cpupower: Fix AMD Family 0x17 msr_pstate size [ Upstream commit 8c22e2f695920ebd94f9a53bcf2a65eb36d4dba1 ] The msr_pstate data is only 63 bits long and should be 64 bits. Add in the missing bit from res1 for AMD Family 0x17. Reference: https://www.amd.com/system/files/TechDocs/54945_PPR_Family_17h_Models_00h-0Fh.pdf, page 138. Signed-off-by: Prarit Bhargava Cc: Shuah Khan Cc: Stafford Horne Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/power/cpupower/utils/helpers/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c index 58d23997424d9..9607ada5b29a6 100644 --- a/tools/power/cpupower/utils/helpers/amd.c +++ b/tools/power/cpupower/utils/helpers/amd.c @@ -33,7 +33,7 @@ union msr_pstate { unsigned vid:8; unsigned iddval:8; unsigned idddiv:2; - unsigned res1:30; + unsigned res1:31; unsigned en:1; } fam17h_bits; unsigned long long val; -- GitLab From a3ab5e3c80106fa6b131e0ad0959d5a8376ce589 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 16 Oct 2018 19:30:13 -0700 Subject: [PATCH 1366/2269] Revert "f2fs: fix to clear PG_checked flag in set_page_dirty()" commit 164a63fa6b384e30ceb96ed80bc7dc3379bc0960 upstream. This reverts commit 66110abc4c931f879d70e83e1281f891699364bf. If we clear the cold data flag out of the writeback flow, we can miscount -1 by end_io, which incurs a deadlock caused by all I/Os being blocked during heavy GC. Balancing F2FS Async: - IO (CP: 1, Data: -1, Flush: ( 0 0 1), Discard: ( ... GC thread: IRQ - move_data_page() - set_page_dirty() - clear_cold_data() - f2fs_write_end_io() - type = WB_DATA_TYPE(page); here, we get wrong type - dec_page_count(sbi, type); - f2fs_wait_on_page_writeback() Cc: Reported-and-Tested-by: Park Ju Hyung Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/data.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e10bd73f0723e..85142e5df88b0 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2190,10 +2190,6 @@ static int f2fs_set_data_page_dirty(struct page *page) if (!PageUptodate(page)) SetPageUptodate(page); - /* don't remain PG_checked flag which was set during GC */ - if (is_cold_data(page)) - clear_cold_data(page); - if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) { if (!IS_ATOMIC_WRITTEN_PAGE(page)) { register_inmem_page(inode, page); -- GitLab From 2cc5dcf7a9e074c5bdfdefea1801aa0a657750d6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 22 Oct 2018 09:12:51 +0800 Subject: [PATCH 1367/2269] f2fs: fix to account IO correctly commit 4c58ed076875f36dae0f240da1e25e99e5d4afb8 upstream. Below race can cause reversed reference on dirty count, fix it by relocating __submit_bio() and inc_page_count(). Thread A Thread B - f2fs_inplace_write_data - f2fs_submit_page_bio - __submit_bio - f2fs_write_end_io - dec_page_count - inc_page_count Cc: Fixes: d1b3e72d5490 ("f2fs: submit bio of in-place-update pages") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/data.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 85142e5df88b0..6fbb6d75318a3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -381,10 +381,10 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) } bio_set_op_attrs(bio, fio->op, fio->op_flags); - __submit_bio(fio->sbi, bio, fio->type); - if (!is_read_io(fio->op)) inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page)); + + __submit_bio(fio->sbi, bio, fio->type); return 0; } -- GitLab From b4438856ea1e6337c1c11fee6b612ffafee055cb Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 9 Feb 2018 14:28:01 +0530 Subject: [PATCH 1368/2269] ARM: dts: exynos: Remove "cooling-{min|max}-level" for CPU nodes commit cd6f55457eb449a388e793abd676e3a5b73510bc upstream. The "cooling-min-level" and "cooling-max-level" properties are not parsed by any part of the kernel currently and the max cooling state of a CPU cooling device is found by referring to the cpufreq table instead. Remove the unused properties from the CPU nodes. Signed-off-by: Viresh Kumar Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/exynos4210.dtsi | 2 -- arch/arm/boot/dts/exynos4412.dtsi | 2 -- arch/arm/boot/dts/exynos5250.dtsi | 2 -- arch/arm/boot/dts/exynos5420-cpus.dtsi | 16 ---------------- arch/arm/boot/dts/exynos5422-cpus.dtsi | 16 ---------------- 5 files changed, 38 deletions(-) diff --git a/arch/arm/boot/dts/exynos4210.dtsi b/arch/arm/boot/dts/exynos4210.dtsi index 768fb075b1fd1..4e8325b173ead 100644 --- a/arch/arm/boot/dts/exynos4210.dtsi +++ b/arch/arm/boot/dts/exynos4210.dtsi @@ -52,8 +52,6 @@ 400000 975000 200000 950000 >; - cooling-min-level = <4>; - cooling-max-level = <2>; #cooling-cells = <2>; /* min followed by max */ }; diff --git a/arch/arm/boot/dts/exynos4412.dtsi b/arch/arm/boot/dts/exynos4412.dtsi index 7ff03a7e8fb93..1a35e6336e53b 100644 --- a/arch/arm/boot/dts/exynos4412.dtsi +++ b/arch/arm/boot/dts/exynos4412.dtsi @@ -45,8 +45,6 @@ clocks = <&clock CLK_ARM_CLK>; clock-names = "cpu"; operating-points-v2 = <&cpu0_opp_table>; - cooling-min-level = <13>; - cooling-max-level = <7>; #cooling-cells = <2>; /* min followed by max */ }; diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi index 35b1949a3e3cf..d7fc0e0164472 100644 --- a/arch/arm/boot/dts/exynos5250.dtsi +++ b/arch/arm/boot/dts/exynos5250.dtsi @@ -80,8 +80,6 @@ 300000 937500 200000 925000 >; - cooling-min-level = <15>; - cooling-max-level = <9>; #cooling-cells = <2>; /* min followed by max */ }; cpu@1 { diff --git a/arch/arm/boot/dts/exynos5420-cpus.dtsi b/arch/arm/boot/dts/exynos5420-cpus.dtsi index 5c052d7ff5546..7e6b55561b1d7 100644 --- a/arch/arm/boot/dts/exynos5420-cpus.dtsi +++ b/arch/arm/boot/dts/exynos5420-cpus.dtsi @@ -33,8 +33,6 @@ clock-frequency = <1800000000>; cci-control-port = <&cci_control1>; operating-points-v2 = <&cluster_a15_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <11>; #cooling-cells = <2>; /* min followed by max */ }; @@ -45,8 +43,6 @@ clock-frequency = <1800000000>; cci-control-port = <&cci_control1>; operating-points-v2 = <&cluster_a15_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <11>; #cooling-cells = <2>; /* min followed by max */ }; @@ -57,8 +53,6 @@ clock-frequency = <1800000000>; cci-control-port = <&cci_control1>; operating-points-v2 = <&cluster_a15_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <11>; #cooling-cells = <2>; /* min followed by max */ }; @@ -69,8 +63,6 @@ clock-frequency = <1800000000>; cci-control-port = <&cci_control1>; operating-points-v2 = <&cluster_a15_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <11>; #cooling-cells = <2>; /* min followed by max */ }; @@ -82,8 +74,6 @@ clock-frequency = <1000000000>; cci-control-port = <&cci_control0>; operating-points-v2 = <&cluster_a7_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <7>; #cooling-cells = <2>; /* min followed by max */ }; @@ -94,8 +84,6 @@ clock-frequency = <1000000000>; cci-control-port = <&cci_control0>; operating-points-v2 = <&cluster_a7_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <7>; #cooling-cells = <2>; /* min followed by max */ }; @@ -106,8 +94,6 @@ clock-frequency = <1000000000>; cci-control-port = <&cci_control0>; operating-points-v2 = <&cluster_a7_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <7>; #cooling-cells = <2>; /* min followed by max */ }; @@ -118,8 +104,6 @@ clock-frequency = <1000000000>; cci-control-port = <&cci_control0>; operating-points-v2 = <&cluster_a7_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <7>; #cooling-cells = <2>; /* min followed by max */ }; }; diff --git a/arch/arm/boot/dts/exynos5422-cpus.dtsi b/arch/arm/boot/dts/exynos5422-cpus.dtsi index bf3c6f1ec4ee3..c8afdf821a779 100644 --- a/arch/arm/boot/dts/exynos5422-cpus.dtsi +++ b/arch/arm/boot/dts/exynos5422-cpus.dtsi @@ -32,8 +32,6 @@ clock-frequency = <1000000000>; cci-control-port = <&cci_control0>; operating-points-v2 = <&cluster_a7_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <11>; #cooling-cells = <2>; /* min followed by max */ }; @@ -44,8 +42,6 @@ clock-frequency = <1000000000>; cci-control-port = <&cci_control0>; operating-points-v2 = <&cluster_a7_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <11>; #cooling-cells = <2>; /* min followed by max */ }; @@ -56,8 +52,6 @@ clock-frequency = <1000000000>; cci-control-port = <&cci_control0>; operating-points-v2 = <&cluster_a7_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <11>; #cooling-cells = <2>; /* min followed by max */ }; @@ -68,8 +62,6 @@ clock-frequency = <1000000000>; cci-control-port = <&cci_control0>; operating-points-v2 = <&cluster_a7_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <11>; #cooling-cells = <2>; /* min followed by max */ }; @@ -81,8 +73,6 @@ clock-frequency = <1800000000>; cci-control-port = <&cci_control1>; operating-points-v2 = <&cluster_a15_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <15>; #cooling-cells = <2>; /* min followed by max */ }; @@ -93,8 +83,6 @@ clock-frequency = <1800000000>; cci-control-port = <&cci_control1>; operating-points-v2 = <&cluster_a15_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <15>; #cooling-cells = <2>; /* min followed by max */ }; @@ -105,8 +93,6 @@ clock-frequency = <1800000000>; cci-control-port = <&cci_control1>; operating-points-v2 = <&cluster_a15_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <15>; #cooling-cells = <2>; /* min followed by max */ }; @@ -117,8 +103,6 @@ clock-frequency = <1800000000>; cci-control-port = <&cci_control1>; operating-points-v2 = <&cluster_a15_opp_table>; - cooling-min-level = <0>; - cooling-max-level = <15>; #cooling-cells = <2>; /* min followed by max */ }; }; -- GitLab From 4b93b6d7e3f2b6532525c12af06a014a6b71f19f Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 25 May 2018 16:01:53 +0530 Subject: [PATCH 1369/2269] arm: dts: exynos: Add missing cooling device properties for CPUs commit 672f33198bee21ee91e6af2cb8f67cfc8bc97ec1 upstream. The cooling device properties, like "#cooling-cells" and "dynamic-power-coefficient", should either be present for all the CPUs of a cluster or none. If these are present only for a subset of CPUs of a cluster then things will start falling apart as soon as the CPUs are brought online in a different order. For example, this will happen because the operating system looks for such properties in the CPU node it is trying to bring up, so that it can register a cooling device. Add such missing properties. Fix other missing properties (clocks, OPP, clock latency) as well to make it all work. Signed-off-by: Viresh Kumar Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/exynos3250.dtsi | 16 ++++++++++++++++ arch/arm/boot/dts/exynos4210.dtsi | 13 +++++++++++++ arch/arm/boot/dts/exynos5250.dtsi | 23 +++++++++++++++++++++++ 3 files changed, 52 insertions(+) diff --git a/arch/arm/boot/dts/exynos3250.dtsi b/arch/arm/boot/dts/exynos3250.dtsi index 590ee442d0ae1..3ed3d1a0fd405 100644 --- a/arch/arm/boot/dts/exynos3250.dtsi +++ b/arch/arm/boot/dts/exynos3250.dtsi @@ -82,6 +82,22 @@ compatible = "arm,cortex-a7"; reg = <1>; clock-frequency = <1000000000>; + clocks = <&cmu CLK_ARM_CLK>; + clock-names = "cpu"; + #cooling-cells = <2>; + + operating-points = < + 1000000 1150000 + 900000 1112500 + 800000 1075000 + 700000 1037500 + 600000 1000000 + 500000 962500 + 400000 925000 + 300000 887500 + 200000 850000 + 100000 850000 + >; }; }; diff --git a/arch/arm/boot/dts/exynos4210.dtsi b/arch/arm/boot/dts/exynos4210.dtsi index 4e8325b173ead..27e17471ab7a6 100644 --- a/arch/arm/boot/dts/exynos4210.dtsi +++ b/arch/arm/boot/dts/exynos4210.dtsi @@ -59,6 +59,19 @@ device_type = "cpu"; compatible = "arm,cortex-a9"; reg = <0x901>; + clocks = <&clock CLK_ARM_CLK>; + clock-names = "cpu"; + clock-latency = <160000>; + + operating-points = < + 1200000 1250000 + 1000000 1150000 + 800000 1075000 + 500000 975000 + 400000 975000 + 200000 950000 + >; + #cooling-cells = <2>; /* min followed by max */ }; }; diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi index d7fc0e0164472..b1ed7c57c51ee 100644 --- a/arch/arm/boot/dts/exynos5250.dtsi +++ b/arch/arm/boot/dts/exynos5250.dtsi @@ -87,6 +87,29 @@ compatible = "arm,cortex-a15"; reg = <1>; clock-frequency = <1700000000>; + clocks = <&clock CLK_ARM_CLK>; + clock-names = "cpu"; + clock-latency = <140000>; + + operating-points = < + 1700000 1300000 + 1600000 1250000 + 1500000 1225000 + 1400000 1200000 + 1300000 1150000 + 1200000 1125000 + 1100000 1100000 + 1000000 1075000 + 900000 1050000 + 800000 1025000 + 700000 1012500 + 600000 1000000 + 500000 975000 + 400000 950000 + 300000 937500 + 200000 925000 + >; + #cooling-cells = <2>; /* min followed by max */ }; }; -- GitLab From 8f9121b4aa0ff10407b5ec91afb4b33317e9f461 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 7 Aug 2018 12:48:48 +0200 Subject: [PATCH 1370/2269] ARM: dts: exynos: Convert exynos5250.dtsi to opp-v2 bindings commit eb9e16d8573e243f8175647f851eb5085dbe97a4 upstream. Convert Exynos5250 to OPP-v2 bindings. This is a preparation to add proper support for suspend operation point, which cannot be marked in opp-v1. Cc: # 4.3.x: cd6f55457eb4: ARM: dts: exynos: Remove "cooling-{min|max}-level" for CPU nodes Cc: # 4.3.x: 672f33198bee: arm: dts: exynos: Add missing cooling device properties for CPUs Cc: # 4.3.x Signed-off-by: Marek Szyprowski Reviewed-by: Chanwoo Choi Acked-by: Bartlomiej Zolnierkiewicz Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/exynos5250.dtsi | 130 ++++++++++++++++++++---------- 1 file changed, 88 insertions(+), 42 deletions(-) diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi index b1ed7c57c51ee..033e4331e7ca0 100644 --- a/arch/arm/boot/dts/exynos5250.dtsi +++ b/arch/arm/boot/dts/exynos5250.dtsi @@ -57,62 +57,108 @@ device_type = "cpu"; compatible = "arm,cortex-a15"; reg = <0>; - clock-frequency = <1700000000>; clocks = <&clock CLK_ARM_CLK>; clock-names = "cpu"; - clock-latency = <140000>; - - operating-points = < - 1700000 1300000 - 1600000 1250000 - 1500000 1225000 - 1400000 1200000 - 1300000 1150000 - 1200000 1125000 - 1100000 1100000 - 1000000 1075000 - 900000 1050000 - 800000 1025000 - 700000 1012500 - 600000 1000000 - 500000 975000 - 400000 950000 - 300000 937500 - 200000 925000 - >; + operating-points-v2 = <&cpu0_opp_table>; #cooling-cells = <2>; /* min followed by max */ }; cpu@1 { device_type = "cpu"; compatible = "arm,cortex-a15"; reg = <1>; - clock-frequency = <1700000000>; clocks = <&clock CLK_ARM_CLK>; clock-names = "cpu"; - clock-latency = <140000>; - - operating-points = < - 1700000 1300000 - 1600000 1250000 - 1500000 1225000 - 1400000 1200000 - 1300000 1150000 - 1200000 1125000 - 1100000 1100000 - 1000000 1075000 - 900000 1050000 - 800000 1025000 - 700000 1012500 - 600000 1000000 - 500000 975000 - 400000 950000 - 300000 937500 - 200000 925000 - >; + operating-points-v2 = <&cpu0_opp_table>; #cooling-cells = <2>; /* min followed by max */ }; }; + cpu0_opp_table: opp_table0 { + compatible = "operating-points-v2"; + opp-shared; + + opp-200000000 { + opp-hz = /bits/ 64 <200000000>; + opp-microvolt = <925000>; + clock-latency-ns = <140000>; + }; + opp-300000000 { + opp-hz = /bits/ 64 <300000000>; + opp-microvolt = <937500>; + clock-latency-ns = <140000>; + }; + opp-400000000 { + opp-hz = /bits/ 64 <400000000>; + opp-microvolt = <950000>; + clock-latency-ns = <140000>; + }; + opp-500000000 { + opp-hz = /bits/ 64 <500000000>; + opp-microvolt = <975000>; + clock-latency-ns = <140000>; + }; + opp-600000000 { + opp-hz = /bits/ 64 <600000000>; + opp-microvolt = <1000000>; + clock-latency-ns = <140000>; + }; + opp-700000000 { + opp-hz = /bits/ 64 <700000000>; + opp-microvolt = <1012500>; + clock-latency-ns = <140000>; + }; + opp-800000000 { + opp-hz = /bits/ 64 <800000000>; + opp-microvolt = <1025000>; + clock-latency-ns = <140000>; + }; + opp-900000000 { + opp-hz = /bits/ 64 <900000000>; + opp-microvolt = <1050000>; + clock-latency-ns = <140000>; + }; + opp-1000000000 { + opp-hz = /bits/ 64 <1000000000>; + opp-microvolt = <1075000>; + clock-latency-ns = <140000>; + }; + opp-1100000000 { + opp-hz = /bits/ 64 <1100000000>; + opp-microvolt = <1100000>; + clock-latency-ns = <140000>; + }; + opp-1200000000 { + opp-hz = /bits/ 64 <1200000000>; + opp-microvolt = <1125000>; + clock-latency-ns = <140000>; + }; + opp-1300000000 { + opp-hz = /bits/ 64 <1300000000>; + opp-microvolt = <1150000>; + clock-latency-ns = <140000>; + }; + opp-1400000000 { + opp-hz = /bits/ 64 <1400000000>; + opp-microvolt = <1200000>; + clock-latency-ns = <140000>; + }; + opp-1500000000 { + opp-hz = /bits/ 64 <1500000000>; + opp-microvolt = <1225000>; + clock-latency-ns = <140000>; + }; + opp-1600000000 { + opp-hz = /bits/ 64 <1600000000>; + opp-microvolt = <1250000>; + clock-latency-ns = <140000>; + }; + opp-1700000000 { + opp-hz = /bits/ 64 <1700000000>; + opp-microvolt = <1300000>; + clock-latency-ns = <140000>; + }; + }; + soc: soc { sysram@02020000 { compatible = "mmio-sram"; -- GitLab From 6c0bcd457b943c36c177cf1e901848ec38f5a8c5 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 7 Aug 2018 12:48:49 +0200 Subject: [PATCH 1371/2269] ARM: dts: exynos: Mark 1 GHz CPU OPP as suspend OPP on Exynos5250 commit 645b23da6f8b47f295fa87051335d41d139717a5 upstream. 1 GHz CPU OPP is the default boot value for the Exynos5250 SOC, so mark it as suspend OPP. This fixes suspend/resume on Samsung Exynos5250 Snow Chomebook, which was broken since switching to generic cpufreq-dt driver in v4.3. Cc: # 4.3.x: cd6f55457eb4: ARM: dts: exynos: Remove "cooling-{min|max}-level" for CPU nodes Cc: # 4.3.x: 672f33198bee: arm: dts: exynos: Add missing cooling device properties for CPUs Cc: # 4.3.x Signed-off-by: Marek Szyprowski Reviewed-by: Chanwoo Choi Acked-by: Bartlomiej Zolnierkiewicz Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/exynos5250.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi index 033e4331e7ca0..9f73a8bf6e1c7 100644 --- a/arch/arm/boot/dts/exynos5250.dtsi +++ b/arch/arm/boot/dts/exynos5250.dtsi @@ -121,6 +121,7 @@ opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <1075000>; clock-latency-ns = <140000>; + opp-suspend; }; opp-1100000000 { opp-hz = /bits/ 64 <1100000000>; -- GitLab From 9b86c5a510fdc72a083a918a0d4d00e1cce5287a Mon Sep 17 00:00:00 2001 From: Joe Jin Date: Tue, 16 Oct 2018 15:21:16 -0700 Subject: [PATCH 1372/2269] xen-swiotlb: use actually allocated size on check physical continuous commit 7250f422da0480d8512b756640f131b9b893ccda upstream. xen_swiotlb_{alloc,free}_coherent() allocate/free memory based on the order of the pages and not size argument (bytes). This is inconsistent with range_straddles_page_boundary and memset which use the 'size' value, which may lead to not exchanging memory with Xen (range_straddles_page_boundary() returned true). And then the call to xen_swiotlb_free_coherent() would actually try to exchange the memory with Xen, leading to the kernel hitting an BUG (as the hypercall returned an error). This patch fixes it by making the 'size' variable be of the same size as the amount of memory allocated. CC: stable@vger.kernel.org Signed-off-by: Joe Jin Cc: Konrad Rzeszutek Wilk Cc: Boris Ostrovsky Cc: Christoph Helwig Cc: Dongli Zhang Cc: John Sobecki Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman --- drivers/xen/swiotlb-xen.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index f98b8c135db91..95dbee89b758a 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -317,6 +317,9 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, */ flags &= ~(__GFP_DMA | __GFP_HIGHMEM); + /* Convert the size to actually allocated. */ + size = 1UL << (order + XEN_PAGE_SHIFT); + /* On ARM this function returns an ioremap'ped virtual address for * which virt_to_phys doesn't return the corresponding physical * address. In fact on ARM virt_to_phys only works for kernel direct @@ -365,6 +368,9 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, * physical address */ phys = xen_bus_to_phys(dev_addr); + /* Convert the size to actually allocated. */ + size = 1UL << (order + XEN_PAGE_SHIFT); + if (((dev_addr + size - 1 <= dma_mask)) || range_straddles_page_boundary(phys, size)) xen_destroy_contiguous_region(phys, order); -- GitLab From d079cf7527518c96ecebda09c2bbf6b6438279dc Mon Sep 17 00:00:00 2001 From: "Dr. Greg Wettstein" Date: Mon, 17 Sep 2018 18:53:33 -0400 Subject: [PATCH 1373/2269] tpm: Restore functionality to xen vtpm driver. commit e487a0f52301293152a6f8c4e217f2a11dd808e3 upstream. Functionality of the xen-tpmfront driver was lost secondary to the introduction of xenbus multi-page support in commit ccc9d90a9a8b ("xenbus_client: Extend interface to support multi-page ring"). In this commit pointer to location of where the shared page address is stored was being passed to the xenbus_grant_ring() function rather then the address of the shared page itself. This resulted in a situation where the driver would attach to the vtpm-stubdom but any attempt to send a command to the stub domain would timeout. A diagnostic finding for this regression is the following error message being generated when the xen-tpmfront driver probes for a device: <3>vtpm vtpm-0: tpm_transmit: tpm_send: error -62 <3>vtpm vtpm-0: A TPM error (-62) occurred attempting to determine the timeouts This fix is relevant to all kernels from 4.1 forward which is the release in which multi-page xenbus support was introduced. Daniel De Graaf formulated the fix by code inspection after the regression point was located. Fixes: ccc9d90a9a8b ("xenbus_client: Extend interface to support multi-page ring") Signed-off-by: Dr. Greg Wettstein Signed-off-by: Greg Kroah-Hartman [boris: Updated commit message, added Fixes tag] Signed-off-by: Boris Ostrovsky Cc: stable@vger.kernel.org # v4.1+ Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/xen-tpmfront.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c index 656e8af95d528..2cffaf567d996 100644 --- a/drivers/char/tpm/xen-tpmfront.c +++ b/drivers/char/tpm/xen-tpmfront.c @@ -203,7 +203,7 @@ static int setup_ring(struct xenbus_device *dev, struct tpm_private *priv) return -ENOMEM; } - rv = xenbus_grant_ring(dev, &priv->shr, 1, &gref); + rv = xenbus_grant_ring(dev, priv->shr, 1, &gref); if (rv < 0) return rv; -- GitLab From 57cd3a09665ad99de90b9aa8fd31cec26039163e Mon Sep 17 00:00:00 2001 From: Vasilis Liaskovitis Date: Mon, 15 Oct 2018 15:25:08 +0200 Subject: [PATCH 1374/2269] xen/blkfront: avoid NULL blkfront_info dereference on device removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f92898e7f32e3533bfd95be174044bc349d416ca upstream. If a block device is hot-added when we are out of grants, gnttab_grant_foreign_access fails with -ENOSPC (log message "28 granting access to ring page") in this code path: talk_to_blkback -> setup_blkring -> xenbus_grant_ring -> gnttab_grant_foreign_access and the failing path in talk_to_blkback sets the driver_data to NULL: destroy_blkring: blkif_free(info, 0); mutex_lock(&blkfront_mutex); free_info(info); mutex_unlock(&blkfront_mutex); dev_set_drvdata(&dev->dev, NULL); This results in a NULL pointer BUG when blkfront_remove and blkif_free try to access the failing device's NULL struct blkfront_info. Cc: stable@vger.kernel.org # 4.5 and later Signed-off-by: Vasilis Liaskovitis Reviewed-by: Roger Pau Monné Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/xen-blkfront.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 7d23225f79ed3..4b2dcb65b011f 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2471,6 +2471,9 @@ static int blkfront_remove(struct xenbus_device *xbdev) dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename); + if (!info) + return 0; + blkif_free(info, 0); mutex_lock(&info->mutex); -- GitLab From bdc96cef37441af23ce3c2661de5f1984be7ee0c Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Sun, 7 Oct 2018 16:05:38 -0400 Subject: [PATCH 1375/2269] xen/balloon: Support xend-based toolstack commit 3aa6c19d2f38be9c6e9a8ad5fa8e3c9d29ee3c35 upstream. Xend-based toolstacks don't have static-max entry in xenstore. The equivalent node for those toolstacks is memory_static_max. Fixes: 5266b8e4445c (xen: fix booting ballooned down hvm guest) Signed-off-by: Boris Ostrovsky Cc: # 4.13 Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- drivers/xen/xen-balloon.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c index 294f35ce9e46b..cf8ef8cee5a0a 100644 --- a/drivers/xen/xen-balloon.c +++ b/drivers/xen/xen-balloon.c @@ -75,12 +75,15 @@ static void watch_target(struct xenbus_watch *watch, if (!watch_fired) { watch_fired = true; - err = xenbus_scanf(XBT_NIL, "memory", "static-max", "%llu", - &static_max); - if (err != 1) - static_max = new_target; - else + + if ((xenbus_scanf(XBT_NIL, "memory", "static-max", + "%llu", &static_max) == 1) || + (xenbus_scanf(XBT_NIL, "memory", "memory_static_max", + "%llu", &static_max) == 1)) static_max >>= PAGE_SHIFT - 10; + else + static_max = new_target; + target_diff = (xen_pv_domain() || xen_initial_domain()) ? 0 : static_max - balloon_stats.target_pages; } -- GitLab From d3e63ec6468a60c25570451ea587b41025e737e0 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 1 Oct 2018 07:57:42 +0200 Subject: [PATCH 1376/2269] xen: fix race in xen_qlock_wait() commit 2ac2a7d4d9ff4e01e36f9c3d116582f6f655ab47 upstream. In the following situation a vcpu waiting for a lock might not be woken up from xen_poll_irq(): CPU 1: CPU 2: CPU 3: takes a spinlock tries to get lock -> xen_qlock_wait() frees the lock -> xen_qlock_kick(cpu2) -> xen_clear_irq_pending() takes lock again tries to get lock -> *lock = _Q_SLOW_VAL -> *lock == _Q_SLOW_VAL ? -> xen_poll_irq() frees the lock -> xen_qlock_kick(cpu3) And cpu 2 will sleep forever. This can be avoided easily by modifying xen_qlock_wait() to call xen_poll_irq() only if the related irq was not pending and to call xen_clear_irq_pending() only if it was pending. Cc: stable@vger.kernel.org Cc: Waiman.Long@hp.com Cc: peterz@infradead.org Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/spinlock.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 08324c64005db..e730d8e759cad 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -46,17 +46,12 @@ static void xen_qlock_wait(u8 *byte, u8 val) if (irq == -1) return; - /* clear pending */ - xen_clear_irq_pending(irq); - barrier(); + /* If irq pending already clear it and return. */ + if (xen_test_irq_pending(irq)) { + xen_clear_irq_pending(irq); + return; + } - /* - * We check the byte value after clearing pending IRQ to make sure - * that we won't miss a wakeup event because of the clearing. - * - * The sync_clear_bit() call in xen_clear_irq_pending() is atomic. - * So it is effectively a memory barrier for x86. - */ if (READ_ONCE(*byte) != val) return; -- GitLab From cbc3fb7b48dc47d4c14cda0f40721af5b2054c0d Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 1 Oct 2018 07:57:42 +0200 Subject: [PATCH 1377/2269] xen: make xen_qlock_wait() nestable commit a856531951dc8094359dfdac21d59cee5969c18e upstream. xen_qlock_wait() isn't safe for nested calls due to interrupts. A call of xen_qlock_kick() might be ignored in case a deeper nesting level was active right before the call of xen_poll_irq(): CPU 1: CPU 2: spin_lock(lock1) spin_lock(lock1) -> xen_qlock_wait() -> xen_clear_irq_pending() Interrupt happens spin_unlock(lock1) -> xen_qlock_kick(CPU 2) spin_lock_irqsave(lock2) spin_lock_irqsave(lock2) -> xen_qlock_wait() -> xen_clear_irq_pending() clears kick for lock1 -> xen_poll_irq() spin_unlock_irq_restore(lock2) -> xen_qlock_kick(CPU 2) wakes up spin_unlock_irq_restore(lock2) IRET resumes in xen_qlock_wait() -> xen_poll_irq() never wakes up The solution is to disable interrupts in xen_qlock_wait() and not to poll for the irq in case xen_qlock_wait() is called in nmi context. Cc: stable@vger.kernel.org Cc: Waiman.Long@hp.com Cc: peterz@infradead.org Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/spinlock.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index e730d8e759cad..3243fe1b6ea4a 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -40,29 +40,25 @@ static void xen_qlock_kick(int cpu) */ static void xen_qlock_wait(u8 *byte, u8 val) { + unsigned long flags; int irq = __this_cpu_read(lock_kicker_irq); /* If kicker interrupts not initialized yet, just spin */ - if (irq == -1) + if (irq == -1 || in_nmi()) return; - /* If irq pending already clear it and return. */ + /* Guard against reentry. */ + local_irq_save(flags); + + /* If irq pending already clear it. */ if (xen_test_irq_pending(irq)) { xen_clear_irq_pending(irq); - return; + } else if (READ_ONCE(*byte) == val) { + /* Block until irq becomes pending (or a spurious wakeup) */ + xen_poll_irq(irq); } - if (READ_ONCE(*byte) != val) - return; - - /* - * If an interrupt happens here, it will leave the wakeup irq - * pending, which will cause xen_poll_irq() to return - * immediately. - */ - - /* Block until irq becomes pending (or perhaps a spurious wakeup) */ - xen_poll_irq(irq); + local_irq_restore(flags); } static irqreturn_t dummy_handler(int irq, void *dev_id) -- GitLab From ba94ecfc74e4e74acb46afe9a7bea10d869657b9 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Tue, 9 Oct 2018 12:32:37 +0200 Subject: [PATCH 1378/2269] xen/pvh: increase early stack size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7deecbda3026f5e2a8cc095d7ef7261a920efcf2 upstream. While booting on an AMD EPYC box the stack canary would detect stack overflows when using the current PVH early stack size (256). Switch to using the value defined by BOOT_STACK_SIZE, which prevents the stack overflow. Cc: # 4.11 Signed-off-by: Roger Pau Monné Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/xen-pvh.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S index 5d7554c025fd3..7ecbd3dde2ea0 100644 --- a/arch/x86/xen/xen-pvh.S +++ b/arch/x86/xen/xen-pvh.S @@ -178,7 +178,7 @@ canary: .fill 48, 1, 0 early_stack: - .fill 256, 1, 0 + .fill BOOT_STACK_SIZE, 1, 0 early_stack_end: ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, -- GitLab From 9ba9232f654da32311cb5f71a85fc4e792e84d91 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 25 Oct 2018 09:54:15 +0200 Subject: [PATCH 1379/2269] xen/pvh: don't try to unplug emulated devices commit e6111161c0a02d58919d776eec94b313bb57911f upstream. A Xen PVH guest has no associated qemu device model, so trying to unplug any emulated devices is making no sense at all. Bail out early from xen_unplug_emulated_devices() when running as PVH guest. This will avoid issuing the boot message: [ 0.000000] Xen Platform PCI: unrecognised magic value Cc: # 4.11 Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/platform-pci-unplug.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index 33a783c77d969..184b369223979 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c @@ -146,6 +146,10 @@ void xen_unplug_emulated_devices(void) { int r; + /* PVH guests don't have emulated devices. */ + if (xen_pvh_domain()) + return; + /* user explicitly requested no unplug */ if (xen_emul_unplug & XEN_UNPLUG_NEVER) return; -- GitLab From acc14d41da801242ef7ad21c55a7f2569a1fd2f1 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Sat, 6 Oct 2018 22:12:32 +0200 Subject: [PATCH 1380/2269] libertas: don't set URB_ZERO_PACKET on IN USB transfer commit 6528d88047801b80d2a5370ad46fb6eff2f509e0 upstream. The USB core gets rightfully upset: usb 1-1: BOGUS urb flags, 240 --> 200 WARNING: CPU: 0 PID: 60 at drivers/usb/core/urb.c:503 usb_submit_urb+0x2f8/0x3ed Modules linked in: CPU: 0 PID: 60 Comm: kworker/0:3 Not tainted 4.19.0-rc6-00319-g5206d00a45c7 #39 Hardware name: OLPC XO/XO, BIOS OLPC Ver 1.00.01 06/11/2014 Workqueue: events request_firmware_work_func EIP: usb_submit_urb+0x2f8/0x3ed Code: 75 06 8b 8f 80 00 00 00 8d 47 78 89 4d e4 89 55 e8 e8 35 1c f6 ff 8b 55 e8 56 52 8b 4d e4 51 50 68 e3 ce c7 c0 e8 ed 18 c6 ff <0f> 0b 83 c4 14 80 7d ef 01 74 0a 80 7d ef 03 0f 85 b8 00 00 00 8b EAX: 00000025 EBX: ce7d4980 ECX: 00000000 EDX: 00000001 ESI: 00000200 EDI: ce7d8800 EBP: ce7f5ea8 ESP: ce7f5e70 DS: 007b ES: 007b FS: 0000 GS: 00e0 SS: 0068 EFLAGS: 00210292 CR0: 80050033 CR2: 00000000 CR3: 00e80000 CR4: 00000090 Call Trace: ? if_usb_fw_timeo+0x64/0x64 __if_usb_submit_rx_urb+0x85/0xe6 ? if_usb_fw_timeo+0x64/0x64 if_usb_submit_rx_urb_fwload+0xd/0xf if_usb_prog_firmware+0xc0/0x3db ? _request_firmware+0x54/0x47b ? _request_firmware+0x89/0x47b ? if_usb_probe+0x412/0x412 lbs_fw_loaded+0x55/0xa6 ? debug_smp_processor_id+0x12/0x14 helper_firmware_cb+0x3c/0x3f request_firmware_work_func+0x37/0x6f process_one_work+0x164/0x25a worker_thread+0x1c4/0x284 kthread+0xec/0xf1 ? cancel_delayed_work_sync+0xf/0xf ? kthread_create_on_node+0x1a/0x1a ret_from_fork+0x2e/0x38 ---[ end trace 3ef1e3b2dd53852f ]--- Cc: stable@vger.kernel.org Signed-off-by: Lubomir Rintel Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/marvell/libertas/if_usb.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/marvell/libertas/if_usb.c b/drivers/net/wireless/marvell/libertas/if_usb.c index 16e54c757dd07..e4ae2b5a71c25 100644 --- a/drivers/net/wireless/marvell/libertas/if_usb.c +++ b/drivers/net/wireless/marvell/libertas/if_usb.c @@ -456,8 +456,6 @@ static int __if_usb_submit_rx_urb(struct if_usb_card *cardp, MRVDRV_ETH_RX_PACKET_BUFFER_SIZE, callbackfn, cardp); - cardp->rx_urb->transfer_flags |= URB_ZERO_PACKET; - lbs_deb_usb2(&cardp->udev->dev, "Pointer for rx_urb %p\n", cardp->rx_urb); if ((ret = usb_submit_urb(cardp->rx_urb, GFP_ATOMIC))) { lbs_deb_usbd(&cardp->udev->dev, "Submit Rx URB failed: %d\n", ret); -- GitLab From 255624a35b4ca228bac9a1f135263e28c27c4640 Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Thu, 18 Oct 2018 10:19:29 -0600 Subject: [PATCH 1381/2269] usbip:vudc: BUG kmalloc-2048 (Not tainted): Poison overwritten commit e28fd56ad5273be67d0fae5bedc7e1680e729952 upstream. In rmmod path, usbip_vudc does platform_device_put() twice once from platform_device_unregister() and then from put_vudc_device(). The second put results in: BUG kmalloc-2048 (Not tainted): Poison overwritten error or BUG: KASAN: use-after-free in kobject_put+0x1e/0x230 if KASAN is enabled. [ 169.042156] calling init+0x0/0x1000 [usbip_vudc] @ 1697 [ 169.042396] ============================================================================= [ 169.043678] probe of usbip-vudc.0 returned 1 after 350 usecs [ 169.044508] BUG kmalloc-2048 (Not tainted): Poison overwritten [ 169.044509] ----------------------------------------------------------------------------- ... [ 169.057849] INFO: Freed in device_release+0x2b/0x80 age=4223 cpu=3 pid=1693 [ 169.057852] kobject_put+0x86/0x1b0 [ 169.057853] 0xffffffffc0c30a96 [ 169.057855] __x64_sys_delete_module+0x157/0x240 Fix it to call platform_device_del() instead and let put_vudc_device() do the platform_device_put(). Reported-by: Randy Dunlap Signed-off-by: Shuah Khan (Samsung OSG) Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vudc_main.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/usb/usbip/vudc_main.c b/drivers/usb/usbip/vudc_main.c index 9e655714e3891..916e2eefc886b 100644 --- a/drivers/usb/usbip/vudc_main.c +++ b/drivers/usb/usbip/vudc_main.c @@ -85,6 +85,10 @@ static int __init init(void) cleanup: list_for_each_entry_safe(udc_dev, udc_dev2, &vudc_devices, dev_entry) { list_del(&udc_dev->dev_entry); + /* + * Just do platform_device_del() here, put_vudc_device() + * calls the platform_device_put() + */ platform_device_del(udc_dev->pdev); put_vudc_device(udc_dev); } @@ -101,7 +105,11 @@ static void __exit cleanup(void) list_for_each_entry_safe(udc_dev, udc_dev2, &vudc_devices, dev_entry) { list_del(&udc_dev->dev_entry); - platform_device_unregister(udc_dev->pdev); + /* + * Just do platform_device_del() here, put_vudc_device() + * calls the platform_device_put() + */ + platform_device_del(udc_dev->pdev); put_vudc_device(udc_dev); } platform_driver_unregister(&vudc_driver); -- GitLab From c459fed018c33ae79dbb892ba2a5874cc7932781 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 2 Oct 2018 20:57:44 +0900 Subject: [PATCH 1382/2269] usb: gadget: udc: renesas_usb3: Fix b-device mode for "workaround" commit afc92514a34c7414b28047b1205a6b709103c699 upstream. If the "workaround_for_vbus" is true, the driver will not call usb_disconnect(). So, since the controller keeps some registers' value, the driver doesn't re-enumarate suitable speed after the b-device mode is disabled. To fix the issue, this patch adds usb_disconnect() calling in renesas_usb3_b_device_write() if workaround_for_vbus is true. Fixes: 43ba968b00ea ("usb: gadget: udc: renesas_usb3: add debugfs to set the b-device mode") Cc: # v4.14+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/renesas_usb3.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index 36a706f475d25..ade0723787e5e 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -2374,6 +2374,9 @@ static ssize_t renesas_usb3_b_device_write(struct file *file, else usb3->forced_b_device = false; + if (usb3->workaround_for_vbus) + usb3_disconnect(usb3); + /* Let this driver call usb3_connect() anyway */ usb3_check_id(usb3); -- GitLab From 0c38cad1d15004c2d6dc188a6f3f362a98883334 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Sat, 13 Oct 2018 09:46:08 +0300 Subject: [PATCH 1383/2269] iwlwifi: mvm: check return value of rs_rate_from_ucode_rate() commit 3d71c3f1f50cf309bd20659422af549bc784bfff upstream. The rs_rate_from_ucode_rate() function may return -EINVAL if the rate is invalid, but none of the callsites check for the error, potentially making us access arrays with index IWL_RATE_INVALID, which is larger than the arrays, causing an out-of-bounds access. This will trigger KASAN warnings, such as the one reported in the bugzilla issue mentioned below. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=200659 Cc: stable@vger.kernel.org Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/mvm/rs.c | 24 ++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c index 386fdee23eb0e..bd48cd0eb395d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c @@ -1226,7 +1226,11 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta, !(info->flags & IEEE80211_TX_STAT_AMPDU)) return; - rs_rate_from_ucode_rate(tx_resp_hwrate, info->band, &tx_resp_rate); + if (rs_rate_from_ucode_rate(tx_resp_hwrate, info->band, + &tx_resp_rate)) { + WARN_ON_ONCE(1); + return; + } #ifdef CONFIG_MAC80211_DEBUGFS /* Disable last tx check if we are debugging with fixed rate but @@ -1277,7 +1281,10 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta, */ table = &lq_sta->lq; lq_hwrate = le32_to_cpu(table->rs_table[0]); - rs_rate_from_ucode_rate(lq_hwrate, info->band, &lq_rate); + if (rs_rate_from_ucode_rate(lq_hwrate, info->band, &lq_rate)) { + WARN_ON_ONCE(1); + return; + } /* Here we actually compare this rate to the latest LQ command */ if (lq_color != LQ_FLAG_COLOR_GET(table->flags)) { @@ -1379,8 +1386,12 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta, /* Collect data for each rate used during failed TX attempts */ for (i = 0; i <= retries; ++i) { lq_hwrate = le32_to_cpu(table->rs_table[i]); - rs_rate_from_ucode_rate(lq_hwrate, info->band, - &lq_rate); + if (rs_rate_from_ucode_rate(lq_hwrate, info->band, + &lq_rate)) { + WARN_ON_ONCE(1); + return; + } + /* * Only collect stats if retried rate is in the same RS * table as active/search. @@ -3244,7 +3255,10 @@ static void rs_build_rates_table_from_fixed(struct iwl_mvm *mvm, for (i = 0; i < num_rates; i++) lq_cmd->rs_table[i] = ucode_rate_le32; - rs_rate_from_ucode_rate(ucode_rate, band, &rate); + if (rs_rate_from_ucode_rate(ucode_rate, band, &rate)) { + WARN_ON_ONCE(1); + return; + } if (is_mimo(&rate)) lq_cmd->mimo_delim = num_rates - 1; -- GitLab From 1d982ccf0e67d9248a10a5aab1f3d96825099a19 Mon Sep 17 00:00:00 2001 From: Stefan Nuernberger Date: Mon, 17 Sep 2018 19:46:53 +0200 Subject: [PATCH 1384/2269] net/ipv4: defensive cipso option parsing commit 076ed3da0c9b2f88d9157dbe7044a45641ae369e upstream. commit 40413955ee26 ("Cipso: cipso_v4_optptr enter infinite loop") fixed a possible infinite loop in the IP option parsing of CIPSO. The fix assumes that ip_options_compile filtered out all zero length options and that no other one-byte options beside IPOPT_END and IPOPT_NOOP exist. While this assumption currently holds true, add explicit checks for zero length and invalid length options to be safe for the future. Even though ip_options_compile should have validated the options, the introduction of new one-byte options can still confuse this code without the additional checks. Signed-off-by: Stefan Nuernberger Cc: David Woodhouse Cc: Simon Veith Cc: stable@vger.kernel.org Acked-by: Paul Moore Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/cipso_ipv4.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 82178cc69c961..777fa3b7fb13d 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1512,7 +1512,7 @@ static int cipso_v4_parsetag_loc(const struct cipso_v4_doi *doi_def, * * Description: * Parse the packet's IP header looking for a CIPSO option. Returns a pointer - * to the start of the CIPSO option on success, NULL if one if not found. + * to the start of the CIPSO option on success, NULL if one is not found. * */ unsigned char *cipso_v4_optptr(const struct sk_buff *skb) @@ -1522,10 +1522,8 @@ unsigned char *cipso_v4_optptr(const struct sk_buff *skb) int optlen; int taglen; - for (optlen = iph->ihl*4 - sizeof(struct iphdr); optlen > 0; ) { + for (optlen = iph->ihl*4 - sizeof(struct iphdr); optlen > 1; ) { switch (optptr[0]) { - case IPOPT_CIPSO: - return optptr; case IPOPT_END: return NULL; case IPOPT_NOOP: @@ -1534,6 +1532,11 @@ unsigned char *cipso_v4_optptr(const struct sk_buff *skb) default: taglen = optptr[1]; } + if (!taglen || taglen > optlen) + return NULL; + if (optptr[0] == IPOPT_CIPSO) + return optptr; + optlen -= taglen; optptr += taglen; } -- GitLab From 1e4465dae54959cb92d1bb949df67b0b40372eac Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Sun, 14 Oct 2018 23:28:50 +0200 Subject: [PATCH 1385/2269] dmaengine: ppc4xx: fix off-by-one build failure commit 27d8d2d7a9b7eb05c4484b74b749eaee7b50b845 upstream. There are two poly_store, but one should have been poly_show. |adma.c:4382:16: error: conflicting types for 'poly_store' | static ssize_t poly_store(struct device_driver *dev, const char *buf, | ^~~~~~~~~~ |adma.c:4363:16: note: previous definition of 'poly_store' was here | static ssize_t poly_store(struct device_driver *dev, char *buf) | ^~~~~~~~~~ CC: stable@vger.kernel.org Fixes: 13efe1a05384 ("dmaengine: ppc4xx: remove DRIVER_ATTR() usage") Signed-off-by: Christian Lamparter Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/ppc4xx/adma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c index 4cf0d4d0cecfb..25610286979f6 100644 --- a/drivers/dma/ppc4xx/adma.c +++ b/drivers/dma/ppc4xx/adma.c @@ -4360,7 +4360,7 @@ static ssize_t enable_store(struct device_driver *dev, const char *buf, } static DRIVER_ATTR_RW(enable); -static ssize_t poly_store(struct device_driver *dev, char *buf) +static ssize_t poly_show(struct device_driver *dev, char *buf) { ssize_t size = 0; u32 reg; -- GitLab From 381fa28746b552875e3eccfbfdd45a7bc97f4d92 Mon Sep 17 00:00:00 2001 From: Pierre Yves MORDRET Date: Tue, 13 Mar 2018 17:42:06 +0100 Subject: [PATCH 1386/2269] dmaengine: stm32-dma: fix incomplete configuration in cyclic mode commit e57cb3b3f10d005410f09d4598cc6d62b833f2b0 upstream. When in cyclic mode, the configuration is updated after having started the DMA hardware (STM32_DMA_SCR_EN) leading to incomplete configuration of SMxAR registers. Signed-off-by: Pierre-Yves MORDRET Signed-off-by: Hugues Fruchet Signed-off-by: Vinod Koul Cc: "Joel Fernandes (Google)" Signed-off-by: Greg Kroah-Hartman --- drivers/dma/stm32-dma.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c index 786fc8fcc38ed..32192e98159b2 100644 --- a/drivers/dma/stm32-dma.c +++ b/drivers/dma/stm32-dma.c @@ -429,6 +429,8 @@ static void stm32_dma_dump_reg(struct stm32_dma_chan *chan) dev_dbg(chan2dev(chan), "SFCR: 0x%08x\n", sfcr); } +static void stm32_dma_configure_next_sg(struct stm32_dma_chan *chan); + static void stm32_dma_start_transfer(struct stm32_dma_chan *chan) { struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); @@ -471,6 +473,9 @@ static void stm32_dma_start_transfer(struct stm32_dma_chan *chan) if (status) stm32_dma_irq_clear(chan, status); + if (chan->desc->cyclic) + stm32_dma_configure_next_sg(chan); + stm32_dma_dump_reg(chan); /* Start DMA */ @@ -564,8 +569,7 @@ static void stm32_dma_issue_pending(struct dma_chan *c) if (vchan_issue_pending(&chan->vchan) && !chan->desc && !chan->busy) { dev_dbg(chan2dev(chan), "vchan %p: issued\n", &chan->vchan); stm32_dma_start_transfer(chan); - if (chan->desc->cyclic) - stm32_dma_configure_next_sg(chan); + } spin_unlock_irqrestore(&chan->vchan.lock, flags); } -- GitLab From 1490de2bb0836fc0631c04d0559fdf81545b672f Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 25 Sep 2018 13:53:02 -0700 Subject: [PATCH 1387/2269] libnvdimm: Hold reference on parent while scheduling async init commit b6eae0f61db27748606cc00dafcfd1e2c032f0a5 upstream. Unlike asynchronous initialization in the core we have not yet associated the device with the parent, and as such the device doesn't hold a reference to the parent. In order to resolve that we should be holding a reference on the parent until the asynchronous initialization has completed. Cc: Fixes: 4d88a97aa9e8 ("libnvdimm: ...base ... infrastructure") Signed-off-by: Alexander Duyck Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/bus.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index fb5ab5812a22f..a6746a1f20ae1 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -484,6 +484,8 @@ static void nd_async_device_register(void *d, async_cookie_t cookie) put_device(dev); } put_device(dev); + if (dev->parent) + put_device(dev->parent); } static void nd_async_device_unregister(void *d, async_cookie_t cookie) @@ -503,6 +505,8 @@ void __nd_device_register(struct device *dev) if (!dev) return; dev->bus = &nvdimm_bus_type; + if (dev->parent) + get_device(dev->parent); get_device(dev); async_schedule_domain(nd_async_device_register, dev, &nd_async_domain); -- GitLab From 75c7ba4853719d8efec51f45d3f26ff2b4991a2e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 27 Sep 2018 15:01:55 -0700 Subject: [PATCH 1388/2269] libnvdimm, region: Fail badblocks listing for inactive regions commit 5d394eee2c102453278d81d9a7cf94c80253486a upstream. While experimenting with region driver loading the following backtrace was triggered: INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. [..] Call Trace: dump_stack+0x85/0xcb register_lock_class+0x571/0x580 ? __lock_acquire+0x2ba/0x1310 ? kernfs_seq_start+0x2a/0x80 __lock_acquire+0xd4/0x1310 ? dev_attr_show+0x1c/0x50 ? __lock_acquire+0x2ba/0x1310 ? kernfs_seq_start+0x2a/0x80 ? lock_acquire+0x9e/0x1a0 lock_acquire+0x9e/0x1a0 ? dev_attr_show+0x1c/0x50 badblocks_show+0x70/0x190 ? dev_attr_show+0x1c/0x50 dev_attr_show+0x1c/0x50 This results from a missing successful call to devm_init_badblocks() from nd_region_probe(). Block attempts to show badblocks while the region is not enabled. Fixes: 6a6bef90425e ("libnvdimm: add mechanism to publish badblocks...") Cc: Reviewed-by: Johannes Thumshirn Reviewed-by: Dave Jiang Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/region_devs.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index abaf38c612206..050deb56ee62a 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -513,10 +513,17 @@ static ssize_t region_badblocks_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nd_region *nd_region = to_nd_region(dev); + ssize_t rc; - return badblocks_show(&nd_region->bb, buf, 0); -} + device_lock(dev); + if (dev->driver) + rc = badblocks_show(&nd_region->bb, buf, 0); + else + rc = -ENXIO; + device_unlock(dev); + return rc; +} static DEVICE_ATTR(badblocks, 0444, region_badblocks_show, NULL); static ssize_t resource_show(struct device *dev, -- GitLab From 83fc44a1498347d5fcb670d3b7536bea356a5c9f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 3 Oct 2018 19:31:44 +0200 Subject: [PATCH 1389/2269] ASoC: intel: skylake: Add missing break in skl_tplg_get_token() commit 9c80c5a8831471e0a3e139aad1b0d4c0fdc50b2f upstream. skl_tplg_get_token() misses a break in the big switch() block for SKL_TKN_U8_CORE_ID entry. Spotted nicely by -Wimplicit-fallthrough compiler option. Fixes: 6277e83292a2 ("ASoC: Intel: Skylake: Parse vendor tokens to build module data") Cc: Signed-off-by: Takashi Iwai Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/skylake/skl-topology.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c index 22f768ca3c73b..b45c1ae60f940 100644 --- a/sound/soc/intel/skylake/skl-topology.c +++ b/sound/soc/intel/skylake/skl-topology.c @@ -2360,6 +2360,7 @@ static int skl_tplg_get_token(struct device *dev, case SKL_TKN_U8_CORE_ID: mconfig->core_id = tkn_elem->value; + break; case SKL_TKN_U8_MOD_TYPE: mconfig->m_type = tkn_elem->value; -- GitLab From 97a063dfcbdf910bfe330c1ca53bd7c70ae1ae75 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Mon, 15 Oct 2018 14:13:35 +0300 Subject: [PATCH 1390/2269] IB/mlx5: Fix MR cache initialization commit 013c2403bf32e48119aeb13126929f81352cc7ac upstream. Schedule MR cache work only after bucket was initialized. Cc: # 4.10 Fixes: 49780d42dfc9 ("IB/mlx5: Expose MR cache for mlx5_ib") Signed-off-by: Artemy Kovalyov Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 9866c5d1b99fe..e88bb71056cd6 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -675,7 +675,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) init_completion(&ent->compl); INIT_WORK(&ent->work, cache_work_func); INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); - queue_work(cache->wq, &ent->work); if (i > MR_CACHE_LAST_STD_ENTRY) { mlx5_odp_init_mr_cache_entry(ent); @@ -694,6 +693,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ent->limit = dev->mdev->profile->mr_cache[i].limit; else ent->limit = 0; + queue_work(cache->wq, &ent->work); } err = mlx5_mr_cache_debugfs_init(dev); -- GitLab From cf08282805374fbf0c3c6a124f8e4687c21c6615 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 5 Oct 2018 18:44:40 -0400 Subject: [PATCH 1391/2269] jbd2: fix use after free in jbd2_log_do_checkpoint() commit ccd3c4373eacb044eb3832966299d13d2631f66f upstream. The code cleaning transaction's lists of checkpoint buffers has a bug where it increases bh refcount only after releasing journal->j_list_lock. Thus the following race is possible: CPU0 CPU1 jbd2_log_do_checkpoint() jbd2_journal_try_to_free_buffers() __journal_try_to_free_buffer(bh) ... while (transaction->t_checkpoint_io_list) ... if (buffer_locked(bh)) { <-- IO completes now, buffer gets unlocked --> spin_unlock(&journal->j_list_lock); spin_lock(&journal->j_list_lock); __jbd2_journal_remove_checkpoint(jh); spin_unlock(&journal->j_list_lock); try_to_free_buffers(page); get_bh(bh) <-- accesses freed bh Fix the problem by grabbing bh reference before unlocking journal->j_list_lock. Fixes: dc6e8d669cf5 ("jbd2: don't call get_bh() before calling __jbd2_journal_remove_checkpoint()") Fixes: be1158cc615f ("jbd2: fold __process_buffer() into jbd2_log_do_checkpoint()") Reported-by: syzbot+7f4a27091759e2fe7453@syzkaller.appspotmail.com CC: stable@vger.kernel.org Reviewed-by: Lukas Czerner Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/checkpoint.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 4055f51617eff..fe4fe155b7fbe 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -254,8 +254,8 @@ restart: bh = jh2bh(jh); if (buffer_locked(bh)) { - spin_unlock(&journal->j_list_lock); get_bh(bh); + spin_unlock(&journal->j_list_lock); wait_on_buffer(bh); /* the journal_head may have gone by now */ BUFFER_TRACE(bh, "brelse"); @@ -336,8 +336,8 @@ restart2: jh = transaction->t_checkpoint_io_list; bh = jh2bh(jh); if (buffer_locked(bh)) { - spin_unlock(&journal->j_list_lock); get_bh(bh); + spin_unlock(&journal->j_list_lock); wait_on_buffer(bh); /* the journal_head may have gone by now */ BUFFER_TRACE(bh, "brelse"); -- GitLab From 63be2065bf4d66c50d3d43935c4687d25642dbbd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 13 Oct 2018 00:19:13 -0400 Subject: [PATCH 1392/2269] gfs2_meta: ->mount() can get NULL dev_name commit 3df629d873f8683af6f0d34dfc743f637966d483 upstream. get in sync with mount_bdev() handling of the same Reported-by: syzbot+c54f8e94e6bba03b04e9@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/ops_fstype.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index a3711f5434052..28d6c65c8bb38 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1352,6 +1352,9 @@ static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type, struct path path; int error; + if (!dev_name || !*dev_name) + return ERR_PTR(-EINVAL); + error = kern_path(dev_name, LOOKUP_FOLLOW, &path); if (error) { pr_warn("path_lookup on %s returned error %d\n", -- GitLab From 13b63ba40348d9c43c3842632d3f371a97ca5fe4 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Tue, 2 Oct 2018 21:18:45 -0400 Subject: [PATCH 1393/2269] ext4: initialize retries variable in ext4_da_write_inline_data_begin() commit 625ef8a3acd111d5f496d190baf99d1a815bd03e upstream. Variable retries is not initialized in ext4_da_write_inline_data_begin() which can lead to nondeterministic number of retries in case we hit ENOSPC. Initialize retries to zero as we do everywhere else. Signed-off-by: Lukas Czerner Signed-off-by: Theodore Ts'o Fixes: bc0ca9df3b2a ("ext4: retry allocation when inline->extent conversion failed") Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 4e1d62ba0703f..ac2e0516c16ff 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -869,7 +869,7 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping, handle_t *handle; struct page *page; struct ext4_iloc iloc; - int retries; + int retries = 0; ret = ext4_get_inode_loc(inode, &iloc); if (ret) -- GitLab From 65dc3dd8c550d7b16b4901b2fe4c4cd022931a0e Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 3 Oct 2018 10:33:32 -0400 Subject: [PATCH 1394/2269] ext4: fix setattr project check in fssetxattr ioctl commit dc7ac6c4cae3b58724c2f1e21a7c05ce19ecd5a8 upstream. Currently, project quota could be changed by fssetxattr ioctl, and existed permission check inode_owner_or_capable() is obviously not enough, just think that common users could change project id of file, that could make users to break project quota easily. This patch try to follow same regular of xfs project quota: "Project Quota ID state is only allowed to change from within the init namespace. Enforce that restriction only if we are trying to change the quota ID state. Everything else is allowed in user namespaces." Besides that, check and set project id'state should be an atomic operation, protect whole operation with inode lock, ext4_ioctl_setproject() is only used for ioctl EXT4_IOC_FSSETXATTR, we have held mnt_want_write_file() before ext4_ioctl_setflags(), and ext4_ioctl_setproject() is called after ext4_ioctl_setflags(), we could share codes, so remove it inside ext4_ioctl_setproject(). Signed-off-by: Wang Shilong Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ioctl.c | 60 ++++++++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 23 deletions(-) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 1eb68e6269313..d75b7aae3d747 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -344,19 +344,14 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid) if (projid_eq(kprojid, EXT4_I(inode)->i_projid)) return 0; - err = mnt_want_write_file(filp); - if (err) - return err; - err = -EPERM; - inode_lock(inode); /* Is it quota file? Do not allow user to mess with it */ if (ext4_is_quota_file(inode)) - goto out_unlock; + return err; err = ext4_get_inode_loc(inode, &iloc); if (err) - goto out_unlock; + return err; raw_inode = ext4_raw_inode(&iloc); if (!EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) { @@ -364,7 +359,7 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid) EXT4_SB(sb)->s_want_extra_isize, &iloc); if (err) - goto out_unlock; + return err; } else { brelse(iloc.bh); } @@ -374,10 +369,8 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid) handle = ext4_journal_start(inode, EXT4_HT_QUOTA, EXT4_QUOTA_INIT_BLOCKS(sb) + EXT4_QUOTA_DEL_BLOCKS(sb) + 3); - if (IS_ERR(handle)) { - err = PTR_ERR(handle); - goto out_unlock; - } + if (IS_ERR(handle)) + return PTR_ERR(handle); err = ext4_reserve_inode_write(handle, inode, &iloc); if (err) @@ -405,9 +398,6 @@ out_dirty: err = rc; out_stop: ext4_journal_stop(handle); -out_unlock: - inode_unlock(inode); - mnt_drop_write_file(filp); return err; } #else @@ -592,6 +582,30 @@ static int ext4_ioc_getfsmap(struct super_block *sb, return 0; } +static int ext4_ioctl_check_project(struct inode *inode, struct fsxattr *fa) +{ + /* + * Project Quota ID state is only allowed to change from within the init + * namespace. Enforce that restriction only if we are trying to change + * the quota ID state. Everything else is allowed in user namespaces. + */ + if (current_user_ns() == &init_user_ns) + return 0; + + if (__kprojid_val(EXT4_I(inode)->i_projid) != fa->fsx_projid) + return -EINVAL; + + if (ext4_test_inode_flag(inode, EXT4_INODE_PROJINHERIT)) { + if (!(fa->fsx_xflags & FS_XFLAG_PROJINHERIT)) + return -EINVAL; + } else { + if (fa->fsx_xflags & FS_XFLAG_PROJINHERIT) + return -EINVAL; + } + + return 0; +} + long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -1029,19 +1043,19 @@ resizefs_out: return err; inode_lock(inode); + err = ext4_ioctl_check_project(inode, &fa); + if (err) + goto out; flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) | (flags & EXT4_FL_XFLAG_VISIBLE); err = ext4_ioctl_setflags(inode, flags); - inode_unlock(inode); - mnt_drop_write_file(filp); if (err) - return err; - + goto out; err = ext4_ioctl_setproject(filp, fa.fsx_projid); - if (err) - return err; - - return 0; +out: + inode_unlock(inode); + mnt_drop_write_file(filp); + return err; } case EXT4_IOC_SHUTDOWN: return ext4_shutdown(sb, arg); -- GitLab From d197b725e9f0ef87ba014247211b0b900d868e04 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 3 Oct 2018 12:19:21 -0400 Subject: [PATCH 1395/2269] ext4: propagate error from dquot_initialize() in EXT4_IOC_FSSETXATTR commit 182a79e0c17147d2c2d3990a9a7b6b58a1561c7a upstream. We return most failure of dquota_initialize() except inode evict, this could make a bit sense, for example we allow file removal even quota files are broken? But it dosen't make sense to allow setting project if quota files etc are broken. Signed-off-by: Wang Shilong Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ioctl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index d75b7aae3d747..b2a47058e04c5 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -364,7 +364,9 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid) brelse(iloc.bh); } - dquot_initialize(inode); + err = dquot_initialize(inode); + if (err) + return err; handle = ext4_journal_start(inode, EXT4_HT_QUOTA, EXT4_QUOTA_INIT_BLOCKS(sb) + -- GitLab From 44dcd01ee170f776fec7bd6386d08a525d713b91 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 12 Oct 2018 09:28:09 -0400 Subject: [PATCH 1396/2269] ext4: fix use-after-free race in ext4_remount()'s error path commit 33458eaba4dfe778a426df6a19b7aad2ff9f7eec upstream. It's possible for ext4_show_quota_options() to try reading s_qf_names[i] while it is being modified by ext4_remount() --- most notably, in ext4_remount's error path when the original values of the quota file name gets restored. Reported-by: syzbot+a2872d6feea6918008a9@syzkaller.appspotmail.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 3.2+ Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 3 +- fs/ext4/super.c | 73 ++++++++++++++++++++++++++++++++----------------- 2 files changed, 50 insertions(+), 26 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c96778c39885b..c0c6562b3c440 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1421,7 +1421,8 @@ struct ext4_sb_info { u32 s_min_batch_time; struct block_device *journal_bdev; #ifdef CONFIG_QUOTA - char *s_qf_names[EXT4_MAXQUOTAS]; /* Names of quota files with journalled quota */ + /* Names of quota files with journalled quota */ + char __rcu *s_qf_names[EXT4_MAXQUOTAS]; int s_jquota_fmt; /* Format of quota to use */ #endif unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9dbd27f7b7782..46ad267ef6d6c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -855,6 +855,18 @@ static inline void ext4_quota_off_umount(struct super_block *sb) for (type = 0; type < EXT4_MAXQUOTAS; type++) ext4_quota_off(sb, type); } + +/* + * This is a helper function which is used in the mount/remount + * codepaths (which holds s_umount) to fetch the quota file name. + */ +static inline char *get_qf_name(struct super_block *sb, + struct ext4_sb_info *sbi, + int type) +{ + return rcu_dereference_protected(sbi->s_qf_names[type], + lockdep_is_held(&sb->s_umount)); +} #else static inline void ext4_quota_off_umount(struct super_block *sb) { @@ -907,7 +919,7 @@ static void ext4_put_super(struct super_block *sb) percpu_free_rwsem(&sbi->s_journal_flag_rwsem); #ifdef CONFIG_QUOTA for (i = 0; i < EXT4_MAXQUOTAS; i++) - kfree(sbi->s_qf_names[i]); + kfree(get_qf_name(sb, sbi, i)); #endif /* Debugging code just in case the in-memory inode orphan list @@ -1473,11 +1485,10 @@ static const char deprecated_msg[] = static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) { struct ext4_sb_info *sbi = EXT4_SB(sb); - char *qname; + char *qname, *old_qname = get_qf_name(sb, sbi, qtype); int ret = -1; - if (sb_any_quota_loaded(sb) && - !sbi->s_qf_names[qtype]) { + if (sb_any_quota_loaded(sb) && !old_qname) { ext4_msg(sb, KERN_ERR, "Cannot change journaled " "quota options when quota turned on"); @@ -1494,8 +1505,8 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) "Not enough memory for storing quotafile name"); return -1; } - if (sbi->s_qf_names[qtype]) { - if (strcmp(sbi->s_qf_names[qtype], qname) == 0) + if (old_qname) { + if (strcmp(old_qname, qname) == 0) ret = 1; else ext4_msg(sb, KERN_ERR, @@ -1508,7 +1519,7 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) "quotafile must be on filesystem root"); goto errout; } - sbi->s_qf_names[qtype] = qname; + rcu_assign_pointer(sbi->s_qf_names[qtype], qname); set_opt(sb, QUOTA); return 1; errout: @@ -1520,15 +1531,16 @@ static int clear_qf_name(struct super_block *sb, int qtype) { struct ext4_sb_info *sbi = EXT4_SB(sb); + char *old_qname = get_qf_name(sb, sbi, qtype); - if (sb_any_quota_loaded(sb) && - sbi->s_qf_names[qtype]) { + if (sb_any_quota_loaded(sb) && old_qname) { ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options" " when quota turned on"); return -1; } - kfree(sbi->s_qf_names[qtype]); - sbi->s_qf_names[qtype] = NULL; + rcu_assign_pointer(sbi->s_qf_names[qtype], NULL); + synchronize_rcu(); + kfree(old_qname); return 1; } #endif @@ -1901,7 +1913,7 @@ static int parse_options(char *options, struct super_block *sb, int is_remount) { struct ext4_sb_info *sbi = EXT4_SB(sb); - char *p; + char *p, __maybe_unused *usr_qf_name, __maybe_unused *grp_qf_name; substring_t args[MAX_OPT_ARGS]; int token; @@ -1932,11 +1944,13 @@ static int parse_options(char *options, struct super_block *sb, "Cannot enable project quota enforcement."); return 0; } - if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { - if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) + usr_qf_name = get_qf_name(sb, sbi, USRQUOTA); + grp_qf_name = get_qf_name(sb, sbi, GRPQUOTA); + if (usr_qf_name || grp_qf_name) { + if (test_opt(sb, USRQUOTA) && usr_qf_name) clear_opt(sb, USRQUOTA); - if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) + if (test_opt(sb, GRPQUOTA) && grp_qf_name) clear_opt(sb, GRPQUOTA); if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { @@ -1970,6 +1984,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq, { #if defined(CONFIG_QUOTA) struct ext4_sb_info *sbi = EXT4_SB(sb); + char *usr_qf_name, *grp_qf_name; if (sbi->s_jquota_fmt) { char *fmtname = ""; @@ -1988,11 +2003,14 @@ static inline void ext4_show_quota_options(struct seq_file *seq, seq_printf(seq, ",jqfmt=%s", fmtname); } - if (sbi->s_qf_names[USRQUOTA]) - seq_show_option(seq, "usrjquota", sbi->s_qf_names[USRQUOTA]); - - if (sbi->s_qf_names[GRPQUOTA]) - seq_show_option(seq, "grpjquota", sbi->s_qf_names[GRPQUOTA]); + rcu_read_lock(); + usr_qf_name = rcu_dereference(sbi->s_qf_names[USRQUOTA]); + grp_qf_name = rcu_dereference(sbi->s_qf_names[GRPQUOTA]); + if (usr_qf_name) + seq_show_option(seq, "usrjquota", usr_qf_name); + if (grp_qf_name) + seq_show_option(seq, "grpjquota", grp_qf_name); + rcu_read_unlock(); #endif } @@ -5038,6 +5056,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) int err = 0; #ifdef CONFIG_QUOTA int i, j; + char *to_free[EXT4_MAXQUOTAS]; #endif char *orig_data = kstrdup(data, GFP_KERNEL); @@ -5054,8 +5073,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) old_opts.s_jquota_fmt = sbi->s_jquota_fmt; for (i = 0; i < EXT4_MAXQUOTAS; i++) if (sbi->s_qf_names[i]) { - old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i], - GFP_KERNEL); + char *qf_name = get_qf_name(sb, sbi, i); + + old_opts.s_qf_names[i] = kstrdup(qf_name, GFP_KERNEL); if (!old_opts.s_qf_names[i]) { for (j = 0; j < i; j++) kfree(old_opts.s_qf_names[j]); @@ -5277,9 +5297,12 @@ restore_opts: #ifdef CONFIG_QUOTA sbi->s_jquota_fmt = old_opts.s_jquota_fmt; for (i = 0; i < EXT4_MAXQUOTAS; i++) { - kfree(sbi->s_qf_names[i]); - sbi->s_qf_names[i] = old_opts.s_qf_names[i]; + to_free[i] = get_qf_name(sb, sbi, i); + rcu_assign_pointer(sbi->s_qf_names[i], old_opts.s_qf_names[i]); } + synchronize_rcu(); + for (i = 0; i < EXT4_MAXQUOTAS; i++) + kfree(to_free[i]); #endif kfree(orig_data); return err; @@ -5469,7 +5492,7 @@ static int ext4_write_info(struct super_block *sb, int type) */ static int ext4_quota_on_mount(struct super_block *sb, int type) { - return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], + return dquot_quota_on_mount(sb, get_qf_name(sb, EXT4_SB(sb), type), EXT4_SB(sb)->s_jquota_fmt, type); } -- GitLab From 872f9d86659a84fca92c4908c3c0fbaebf9d007c Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 19 Oct 2018 17:01:33 -0300 Subject: [PATCH 1397/2269] HID: hiddev: fix potential Spectre v1 commit f11274396a538b31bc010f782e05c2ce3f804c13 upstream. uref->usage_index can be indirectly controlled by userspace, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This field is used as an array index by the hiddev_ioctl_usage() function, when 'cmd' is either HIDIOCGCOLLECTIONINDEX, HIDIOCGUSAGES or HIDIOCSUSAGES. For cmd == HIDIOCGCOLLECTIONINDEX case, uref->usage_index is compared to field->maxusage and then used as an index to dereference field->usage array. The same thing happens to the cmd == HIDIOC{G,S}USAGES cases, where uref->usage_index is checked against an array maximum value and then it is used as an index in an array. This is a summary of the HIDIOCGCOLLECTIONINDEX case, which matches the traditional Spectre V1 first load: copy_from_user(uref, user_arg, sizeof(*uref)) if (uref->usage_index >= field->maxusage) goto inval; i = field->usage[uref->usage_index].collection_index; return i; This patch fixes this by sanitizing field uref->usage_index before using it to index field->usage (HIDIOCGCOLLECTIONINDEX) or field->value in HIDIOC{G,S}USAGES arrays, thus, avoiding speculation in the first load. Cc: Signed-off-by: Breno Leitao v2: Contemplate cmd == HIDIOC{G,S}USAGES case Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/usbhid/hiddev.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index cf307bdc3d539..89761551c15d6 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -512,14 +512,24 @@ static noinline int hiddev_ioctl_usage(struct hiddev *hiddev, unsigned int cmd, if (cmd == HIDIOCGCOLLECTIONINDEX) { if (uref->usage_index >= field->maxusage) goto inval; + uref->usage_index = + array_index_nospec(uref->usage_index, + field->maxusage); } else if (uref->usage_index >= field->report_count) goto inval; } - if ((cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) && - (uref_multi->num_values > HID_MAX_MULTI_USAGES || - uref->usage_index + uref_multi->num_values > field->report_count)) - goto inval; + if (cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) { + if (uref_multi->num_values > HID_MAX_MULTI_USAGES || + uref->usage_index + uref_multi->num_values > + field->report_count) + goto inval; + + uref->usage_index = + array_index_nospec(uref->usage_index, + field->report_count - + uref_multi->num_values); + } switch (cmd) { case HIDIOCGUSAGE: -- GitLab From 52314c7f81d8a3af54c40b213bfa3d9b060038e1 Mon Sep 17 00:00:00 2001 From: Michael Jin Date: Thu, 16 Aug 2018 15:28:40 -0400 Subject: [PATCH 1398/2269] EDAC, amd64: Add Family 17h, models 10h-2fh support commit 8960de4a5ca7980ed1e19e7ca5a774d3b7a55c38 upstream. Add new device IDs for family 17h, models 10h-2fh. This is required by amd64_edac_mod in order to properly detect PCI device functions 0 and 6. Signed-off-by: Michael Jin Reviewed-by: Yazen Ghannam Cc: Link: http://lkml.kernel.org/r/20180816192840.31166-1-mikhail.jin@gmail.com Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- drivers/edac/amd64_edac.c | 14 ++++++++++++++ drivers/edac/amd64_edac.h | 3 +++ 2 files changed, 17 insertions(+) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 59ce32e405ac6..667f5ba0403c0 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2200,6 +2200,15 @@ static struct amd64_family_type family_types[] = { .dbam_to_cs = f17_base_addr_to_cs_size, } }, + [F17_M10H_CPUS] = { + .ctl_name = "F17h_M10h", + .f0_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F0, + .f6_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F6, + .ops = { + .early_channel_count = f17_early_channel_count, + .dbam_to_cs = f17_base_addr_to_cs_size, + } + }, }; /* @@ -3188,6 +3197,11 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) break; case 0x17: + if (pvt->model >= 0x10 && pvt->model <= 0x2f) { + fam_type = &family_types[F17_M10H_CPUS]; + pvt->ops = &family_types[F17_M10H_CPUS].ops; + break; + } fam_type = &family_types[F17_CPUS]; pvt->ops = &family_types[F17_CPUS].ops; break; diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 1d4b74e9a037f..4242f8e39c18f 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -115,6 +115,8 @@ #define PCI_DEVICE_ID_AMD_16H_M30H_NB_F2 0x1582 #define PCI_DEVICE_ID_AMD_17H_DF_F0 0x1460 #define PCI_DEVICE_ID_AMD_17H_DF_F6 0x1466 +#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F0 0x15e8 +#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F6 0x15ee /* * Function 1 - Address Map @@ -281,6 +283,7 @@ enum amd_families { F16_CPUS, F16_M30H_CPUS, F17_CPUS, + F17_M10H_CPUS, NUM_FAMILIES, }; -- GitLab From 65ce0542891852aa680a5478e091ea2f9adec7d8 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 28 Sep 2018 14:39:34 -0700 Subject: [PATCH 1399/2269] EDAC, {i7core,sb,skx}_edac: Fix uncorrected error counting commit 432de7fd7630c84ad24f1c2acd1e3bb4ce3741ca upstream. The count of errors is picked up from bits 52:38 of the machine check bank status register. But this is the count of *corrected* errors. If an uncorrected error is being logged, the h/w sets this field to 0. Which means that when edac_mc_handle_error() is called, the EDAC core will carefully add zero to the appropriate uncorrected error counts. Signed-off-by: Tony Luck [ Massage commit message. ] Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Cc: Aristeu Rozanski Cc: Mauro Carvalho Chehab Cc: Qiuxu Zhuo Cc: linux-edac Link: http://lkml.kernel.org/r/20180928213934.19890-1-tony.luck@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/edac/i7core_edac.c | 1 + drivers/edac/sb_edac.c | 1 + drivers/edac/skx_edac.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 6c7d5f20eacbd..2054a24b41d78 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -1711,6 +1711,7 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci, u32 errnum = find_first_bit(&error, 32); if (uncorrected_error) { + core_err_cnt = 1; if (ripv) tp_event = HW_EVENT_ERR_FATAL; else diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 0dc0d595c47cb..b0b390a1da154 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -2891,6 +2891,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, recoverable = GET_BITFIELD(m->status, 56, 56); if (uncorrected_error) { + core_err_cnt = 1; if (ripv) { type = "FATAL"; tp_event = HW_EVENT_ERR_FATAL; diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c index 16dea97568a1e..8547eb8656a26 100644 --- a/drivers/edac/skx_edac.c +++ b/drivers/edac/skx_edac.c @@ -895,6 +895,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, recoverable = GET_BITFIELD(m->status, 56, 56); if (uncorrected_error) { + core_err_cnt = 1; if (ripv) { type = "FATAL"; tp_event = HW_EVENT_ERR_FATAL; -- GitLab From 074df512d4d02eaf793ffddf46ab45cdaba374e1 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Tue, 9 Oct 2018 10:20:25 -0700 Subject: [PATCH 1400/2269] EDAC, skx_edac: Fix logical channel intermediate decoding commit 8f18973877204dc8ca4ce1004a5d28683b9a7086 upstream. The code "lchan = (lchan << 1) | ~lchan" for logical channel intermediate decoding is wrong. The wrong intermediate decoding result is {0xffffffff, 0xfffffffe}. Fix it by replacing '~' with '!'. The correct intermediate decoding result is {0x1, 0x2}. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov CC: Aristeu Rozanski CC: Mauro Carvalho Chehab CC: linux-edac Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20181009172025.18594-1-tony.luck@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/edac/skx_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c index 8547eb8656a26..5dafd4fa8f5ef 100644 --- a/drivers/edac/skx_edac.c +++ b/drivers/edac/skx_edac.c @@ -604,7 +604,7 @@ sad_found: break; case 2: lchan = (addr >> shift) % 2; - lchan = (lchan << 1) | ~lchan; + lchan = (lchan << 1) | !lchan; break; case 3: lchan = ((addr >> shift) % 2) << 1; -- GitLab From 255fb2e03694901f8178d9766ab3c043be6ed33b Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Tue, 25 Sep 2018 10:51:51 +0530 Subject: [PATCH 1401/2269] ARM: dts: dra7: Fix up unaligned access setting for PCIe EP commit 6d0af44a82be87c13f2320821e9fbb8b8cf5a56f upstream. Bit positions of PCIE_SS1_AXI2OCP_LEGACY_MODE_ENABLE and PCIE_SS1_AXI2OCP_LEGACY_MODE_ENABLE in CTRL_CORE_SMA_SW_7 are incorrectly documented in the TRM. In fact, the bit positions are swapped. Update the DT bindings for PCIe EP to reflect the same. Fixes: d23f3839fe97 ("ARM: dts: DRA7: Add pcie1 dt node for EP mode") Cc: stable@vger.kernel.org Signed-off-by: Vignesh R Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/dra7.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index a5bd8f0205e8e..0bf354024ef55 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -333,7 +333,7 @@ ti,hwmods = "pcie1"; phys = <&pcie1_phy>; phy-names = "pcie-phy0"; - ti,syscon-unaligned-access = <&scm_conf1 0x14 2>; + ti,syscon-unaligned-access = <&scm_conf1 0x14 1>; status = "disabled"; }; }; -- GitLab From 19e14e8816d0a079671eb917606a4b586321ec12 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 4 Sep 2018 12:34:18 -0500 Subject: [PATCH 1402/2269] PCI/ASPM: Fix link_state teardown on device removal commit aeae4f3e5c38d47bdaef50446dc0ec857307df68 upstream. Upon removal of the last device on a bus, the link_state of the bridge leading to that bus is sought to be torn down by having pci_stop_dev() call pcie_aspm_exit_link_state(). When ASPM was originally introduced by commit 7d715a6c1ae5 ("PCI: add PCI Express ASPM support"), it determined whether the device being removed is the last one by calling list_empty() on the bridge's subordinate devices list. That didn't work because the device is only removed from the list slightly later in pci_destroy_dev(). Commit 3419c75e15f8 ("PCI: properly clean up ASPM link state on device remove") attempted to fix it by calling list_is_last(), but that's not correct either because it checks whether the device is at the *end* of the list, not whether it's the last one *left* in the list. If the user removes the device which happens to be at the end of the list via sysfs but other devices are preceding the device in the list, the link_state is torn down prematurely. The real fix is to move the invocation of pcie_aspm_exit_link_state() to pci_destroy_dev() and reinstate the call to list_empty(). Remove a duplicate check for dev->bus->self because pcie_aspm_exit_link_state() already contains an identical check. Fixes: 7d715a6c1ae5 ("PCI: add PCI Express ASPM support") Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Cc: Shaohua Li Cc: stable@vger.kernel.org # v2.6.26 Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pcie/aspm.c | 2 +- drivers/pci/remove.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 633e55c57b136..c0e1985e4c757 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -937,7 +937,7 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev) * All PCIe functions are in one slot, remove one function will remove * the whole slot, so just wait until we are the last function left. */ - if (!list_is_last(&pdev->bus_list, &parent->subordinate->devices)) + if (!list_empty(&parent->subordinate->devices)) goto out; link = parent->link_state; diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index 2fa0dbde36b7a..0911217467bcd 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -24,9 +24,6 @@ static void pci_stop_dev(struct pci_dev *dev) pci_remove_sysfs_dev_files(dev); dev->is_added = 0; } - - if (dev->bus->self) - pcie_aspm_exit_link_state(dev); } static void pci_destroy_dev(struct pci_dev *dev) @@ -40,6 +37,7 @@ static void pci_destroy_dev(struct pci_dev *dev) list_del(&dev->bus_list); up_write(&pci_bus_sem); + pcie_aspm_exit_link_state(dev); pci_bridge_d3_update(dev); pci_free_resources(dev); put_device(&dev->dev); -- GitLab From 2b216de5804f6dac761bbf6667bc707ea1aeb771 Mon Sep 17 00:00:00 2001 From: Bin Meng Date: Wed, 26 Sep 2018 08:14:01 -0700 Subject: [PATCH 1403/2269] PCI: Add Device IDs for Intel GPU "spurious interrupt" quirk commit d0c9606b31a21028fb5b753c8ad79626292accfd upstream. Add Device IDs to the Intel GPU "spurious interrupt" quirk table. For these devices, unplugging the VGA cable and plugging it in again causes spurious interrupts from the IGD. Linux eventually disables the interrupt, but of course that disables any other devices sharing the interrupt. The theory is that this is a VGA BIOS defect: it should have disabled the IGD interrupt but failed to do so. See f67fd55fa96f ("PCI: Add quirk for still enabled interrupts on Intel Sandy Bridge GPUs") and 7c82126a94e6 ("PCI: Add new ID for Intel GPU "spurious interrupt" quirk") for some history. [bhelgaas: See link below for discussion about how to fix this more generically instead of adding device IDs for every new Intel GPU. I hope this is the last patch to add device IDs.] Link: https://lore.kernel.org/linux-pci/1537974841-29928-1-git-send-email-bmeng.cn@gmail.com Signed-off-by: Bin Meng [bhelgaas: changelog] Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v3.4+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 35c9b2f4b293a..d442afa195ab6 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3163,7 +3163,11 @@ static void disable_igfx_irq(struct pci_dev *dev) pci_iounmap(dev, regs); } +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0042, disable_igfx_irq); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0046, disable_igfx_irq); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x004a, disable_igfx_irq); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0102, disable_igfx_irq); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0106, disable_igfx_irq); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x010a, disable_igfx_irq); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0152, disable_igfx_irq); -- GitLab From 635c8c9ce51bcee0902c5a4d2996163e30213320 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 8 May 2018 10:00:22 -0600 Subject: [PATCH 1404/2269] PCI: vmd: White list for fast interrupt handlers commit a7f58b9ecfd3c0f63703ec10f4a592cc38dbd1b8 upstream. Devices with slow interrupt handlers are significantly harming performance when their interrupt vector is shared with a fast device. Create a class code white list for devices with known fast interrupt handlers and let all other devices share a single vector so that they don't interfere with performance. At the moment, only the NVM Express class code is on the list, but more may be added if VMD users desire to use other low-latency devices in these domains. Signed-off-by: Keith Busch [lorenzo.pieralisi@arm.com: changelog] Signed-off-by: Lorenzo Pieralisi Acked-by: Jon Derrick: Cc: "Heitke, Kenneth" Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/vmd.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/pci/host/vmd.c b/drivers/pci/host/vmd.c index 509893bc3e63e..2537b022f42d4 100644 --- a/drivers/pci/host/vmd.c +++ b/drivers/pci/host/vmd.c @@ -183,9 +183,20 @@ static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *d int i, best = 1; unsigned long flags; - if (pci_is_bridge(msi_desc_to_pci_dev(desc)) || vmd->msix_count == 1) + if (vmd->msix_count == 1) return &vmd->irqs[0]; + /* + * White list for fast-interrupt handlers. All others will share the + * "slow" interrupt vector. + */ + switch (msi_desc_to_pci_dev(desc)->class) { + case PCI_CLASS_STORAGE_EXPRESS: + break; + default: + return &vmd->irqs[0]; + } + raw_spin_lock_irqsave(&list_lock, flags); for (i = 1; i < vmd->msix_count; i++) if (vmd->irqs[i].count < vmd->irqs[best].count) -- GitLab From eb7f3c513d1c898abdc66809406d62ce9ec7c50d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 13 Sep 2018 11:28:01 +0200 Subject: [PATCH 1405/2269] signal/GenWQE: Fix sending of SIGKILL commit 0ab93e9c99f8208c0a1a7b7170c827936268c996 upstream. The genweq_add_file and genwqe_del_file by caching current without using reference counting embed the assumption that a file descriptor will never be passed from one process to another. It even embeds the assumption that the the thread that opened the file will be in existence when the process terminates. Neither of which are guaranteed to be true. Therefore replace caching the task_struct of the opener with pid of the openers thread group id. All the knowledge of the opener is used for is as the target of SIGKILL and a SIGKILL will kill the entire process group. Rename genwqe_force_sig to genwqe_terminate, remove it's unncessary signal argument, update it's ownly caller, and use kill_pid instead of force_sig. The work force_sig does in changing signal handling state is not relevant to SIGKILL sent as SEND_SIG_PRIV. The exact same processess will be killed just with less work, and less confusion. The work done by force_sig is really only needed for handling syncrhonous exceptions. It will still be possible to cause genwqe_device_remove to wait 8 seconds by passing a file descriptor to another process but the possible user after free is fixed. Fixes: eaf4722d4645 ("GenWQE Character device and DDCB queue") Cc: stable@vger.kernel.org Cc: Greg Kroah-Hartman Cc: Frank Haverkamp Cc: Joerg-Stephan Vogt Cc: Michael Jung Cc: Michael Ruettger Cc: Kleber Sacilotto de Souza Cc: Sebastian Ott Cc: Eberhard S. Amann Cc: Gabriel Krisman Bertazi Cc: Guilherme G. Piccoli Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- drivers/misc/genwqe/card_base.h | 2 +- drivers/misc/genwqe/card_dev.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/misc/genwqe/card_base.h b/drivers/misc/genwqe/card_base.h index 5813b5f250065..135e02f257c15 100644 --- a/drivers/misc/genwqe/card_base.h +++ b/drivers/misc/genwqe/card_base.h @@ -403,7 +403,7 @@ struct genwqe_file { struct file *filp; struct fasync_struct *async_queue; - struct task_struct *owner; + struct pid *opener; struct list_head list; /* entry in list of open files */ spinlock_t map_lock; /* lock for dma_mappings */ diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c index dd4617764f147..dbd5eaa69311b 100644 --- a/drivers/misc/genwqe/card_dev.c +++ b/drivers/misc/genwqe/card_dev.c @@ -52,7 +52,7 @@ static void genwqe_add_file(struct genwqe_dev *cd, struct genwqe_file *cfile) { unsigned long flags; - cfile->owner = current; + cfile->opener = get_pid(task_tgid(current)); spin_lock_irqsave(&cd->file_lock, flags); list_add(&cfile->list, &cd->file_list); spin_unlock_irqrestore(&cd->file_lock, flags); @@ -65,6 +65,7 @@ static int genwqe_del_file(struct genwqe_dev *cd, struct genwqe_file *cfile) spin_lock_irqsave(&cd->file_lock, flags); list_del(&cfile->list); spin_unlock_irqrestore(&cd->file_lock, flags); + put_pid(cfile->opener); return 0; } @@ -275,7 +276,7 @@ static int genwqe_kill_fasync(struct genwqe_dev *cd, int sig) return files; } -static int genwqe_force_sig(struct genwqe_dev *cd, int sig) +static int genwqe_terminate(struct genwqe_dev *cd) { unsigned int files = 0; unsigned long flags; @@ -283,7 +284,7 @@ static int genwqe_force_sig(struct genwqe_dev *cd, int sig) spin_lock_irqsave(&cd->file_lock, flags); list_for_each_entry(cfile, &cd->file_list, list) { - force_sig(sig, cfile->owner); + kill_pid(cfile->opener, SIGKILL, 1); files++; } spin_unlock_irqrestore(&cd->file_lock, flags); @@ -1356,7 +1357,7 @@ static int genwqe_inform_and_stop_processes(struct genwqe_dev *cd) dev_warn(&pci_dev->dev, "[%s] send SIGKILL and wait ...\n", __func__); - rc = genwqe_force_sig(cd, SIGKILL); /* force terminate */ + rc = genwqe_terminate(cd); if (rc) { /* Give kill_timout more seconds to end processes */ for (i = 0; (i < genwqe_kill_timeout) && -- GitLab From 51f62e827191f4ba02c2a001b9e6f25605bfa649 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 10 Oct 2018 20:29:44 -0500 Subject: [PATCH 1406/2269] signal: Guard against negative signal numbers in copy_siginfo_from_user32 commit a36700589b85443e28170be59fa11c8a104130a5 upstream. While fixing an out of bounds array access in known_siginfo_layout reported by the kernel test robot it became apparent that the same bug exists in siginfo_layout and affects copy_siginfo_from_user32. The straight forward fix that makes guards against making this mistake in the future and should keep the code size small is to just take an unsigned signal number instead of a signed signal number, as I did to fix known_siginfo_layout. Cc: stable@vger.kernel.org Fixes: cc731525f26a ("signal: Remove kernel interal si_code magic") Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- include/linux/signal.h | 2 +- kernel/signal.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/signal.h b/include/linux/signal.h index 042968dd98f01..843bd62b1eadf 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -34,7 +34,7 @@ enum siginfo_layout { #endif }; -enum siginfo_layout siginfo_layout(int sig, int si_code); +enum siginfo_layout siginfo_layout(unsigned sig, int si_code); /* * Define some primitives to manipulate sigset_t. diff --git a/kernel/signal.c b/kernel/signal.c index 7181215e9b64e..164c36ef08258 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2700,7 +2700,7 @@ COMPAT_SYSCALL_DEFINE2(rt_sigpending, compat_sigset_t __user *, uset, } #endif -enum siginfo_layout siginfo_layout(int sig, int si_code) +enum siginfo_layout siginfo_layout(unsigned sig, int si_code) { enum siginfo_layout layout = SIL_KILL; if ((si_code > SI_USER) && (si_code < SI_KERNEL)) { -- GitLab From e86f4842f84a4494227aa4d1ab76acb68d86fb1a Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Thu, 13 Sep 2018 10:51:31 +0200 Subject: [PATCH 1407/2269] crypto: lrw - Fix out-of bounds access on counter overflow commit fbe1a850b3b1522e9fc22319ccbbcd2ab05328d2 upstream. When the LRW block counter overflows, the current implementation returns 128 as the index to the precomputed multiplication table, which has 128 entries. This patch fixes it to return the correct value (127). Fixes: 64470f1b8510 ("[CRYPTO] lrw: Liskov Rivest Wagner, a tweakable narrow block cipher mode") Cc: # 2.6.20+ Reported-by: Eric Biggers Signed-off-by: Ondrej Mosnacek Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/lrw.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/crypto/lrw.c b/crypto/lrw.c index fdba6dd6db635..886f91f2426c6 100644 --- a/crypto/lrw.c +++ b/crypto/lrw.c @@ -139,7 +139,12 @@ static inline int get_index128(be128 *block) return x + ffz(val); } - return x; + /* + * If we get here, then x == 128 and we are incrementing the counter + * from all ones to all zeros. This means we must return index 127, i.e. + * the one corresponding to key2*{ 1,...,1 }. + */ + return 127; } static int post_crypt(struct skcipher_request *req) -- GitLab From 0c8496c52a98039b6d8ca46db2f4478779b957fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Wed, 12 Sep 2018 16:20:48 +0300 Subject: [PATCH 1408/2269] crypto: tcrypt - fix ghash-generic speed test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 331351f89c36bf7d03561a28b6f64fa10a9f6f3a upstream. ghash is a keyed hash algorithm, thus setkey needs to be called. Otherwise the following error occurs: $ modprobe tcrypt mode=318 sec=1 testing speed of async ghash-generic (ghash-generic) tcrypt: test 0 ( 16 byte blocks, 16 bytes per update, 1 updates): tcrypt: hashing failed ret=-126 Cc: # 4.6+ Fixes: 0660511c0bee ("crypto: tcrypt - Use ahash") Tested-by: Franck Lenormand Signed-off-by: Horia Geantă Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/tcrypt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index e339960dcac7b..f7affe7cf0b47 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -727,6 +727,9 @@ static void test_ahash_speed_common(const char *algo, unsigned int secs, break; } + if (speed[i].klen) + crypto_ahash_setkey(tfm, tvmem[0], speed[i].klen); + pr_info("test%3u " "(%5u byte blocks,%5u bytes per update,%4u updates): ", i, speed[i].blen, speed[i].plen, speed[i].blen / speed[i].plen); -- GitLab From 0c5e357fa89d64f414014046ec14979f72de0261 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 26 Oct 2018 15:02:16 -0700 Subject: [PATCH 1409/2269] mm: /proc/pid/smaps_rollup: fix NULL pointer deref in smaps_pte_range() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fa76da461bb0be13c8339d984dcf179151167c8f upstream. Leonardo reports an apparent regression in 4.19-rc7: BUG: unable to handle kernel NULL pointer dereference at 00000000000000f0 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 3 PID: 6032 Comm: python Not tainted 4.19.0-041900rc7-lowlatency #201810071631 Hardware name: LENOVO 80UG/Toronto 4A2, BIOS 0XCN45WW 08/09/2018 RIP: 0010:smaps_pte_range+0x32d/0x540 Code: 80 00 00 00 00 74 a9 48 89 de 41 f6 40 52 40 0f 85 04 02 00 00 49 2b 30 48 c1 ee 0c 49 03 b0 98 00 00 00 49 8b 80 a0 00 00 00 <48> 8b b8 f0 00 00 00 e8 b7 ef ec ff 48 85 c0 0f 84 71 ff ff ff a8 RSP: 0018:ffffb0cbc484fb88 EFLAGS: 00010202 RAX: 0000000000000000 RBX: 0000560ddb9e9000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000560ddb9e9 RDI: 0000000000000001 RBP: ffffb0cbc484fbc0 R08: ffff94a5a227a578 R09: ffff94a5a227a578 R10: 0000000000000000 R11: 0000560ddbbe7000 R12: ffffe903098ba728 R13: ffffb0cbc484fc78 R14: ffffb0cbc484fcf8 R15: ffff94a5a2e9cf48 FS: 00007f6dfb683740(0000) GS:ffff94a5aaf80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000f0 CR3: 000000011c118001 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __walk_page_range+0x3c2/0x6f0 walk_page_vma+0x42/0x60 smap_gather_stats+0x79/0xe0 ? gather_pte_stats+0x320/0x320 ? gather_hugetlb_stats+0x70/0x70 show_smaps_rollup+0xcd/0x1c0 seq_read+0x157/0x400 __vfs_read+0x3a/0x180 ? security_file_permission+0x93/0xc0 ? security_file_permission+0x93/0xc0 vfs_read+0x8f/0x140 ksys_read+0x55/0xc0 __x64_sys_read+0x1a/0x20 do_syscall_64+0x5a/0x110 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Decoded code matched to local compilation+disassembly points to smaps_pte_entry(): } else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap && pte_none(*pte))) { page = find_get_entry(vma->vm_file->f_mapping, linear_page_index(vma, addr)); Here, vma->vm_file is NULL. mss->check_shmem_swap should be false in that case, however for smaps_rollup, smap_gather_stats() can set the flag true for one vma and leave it true for subsequent vma's where it should be false. To fix, reset the check_shmem_swap flag to false. There's also related bug which sets mss->swap to shmem_swapped, which in the context of smaps_rollup overwrites any value accumulated from previous vma's. Fix that as well. Note that the report suggests a regression between 4.17.19 and 4.19-rc7, which makes the 4.19 series ending with commit 258f669e7e88 ("mm: /proc/pid/smaps_rollup: convert to single value seq_file") suspicious. But the mss was reused for rollup since 493b0e9d945f ("mm: add /proc/pid/smaps_rollup") so let's play it safe with the stable backport. Link: http://lkml.kernel.org/r/555fbd1f-4ac9-0b58-dcd4-5dc4380ff7ca@suse.cz Link: https://bugzilla.kernel.org/show_bug.cgi?id=201377 Fixes: 493b0e9d945f ("mm: add /proc/pid/smaps_rollup") Signed-off-by: Vlastimil Babka Reported-by: Leonardo Soares Müller Tested-by: Leonardo Soares Müller Cc: Greg Kroah-Hartman Cc: Daniel Colascione Cc: Alexey Dobriyan Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/proc/task_mmu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 519522d39bdeb..2b47757c9c689 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -768,6 +768,8 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) smaps_walk.private = mss; #ifdef CONFIG_SHMEM + /* In case of smaps_rollup, reset the value from previous vma */ + mss->check_shmem_swap = false; if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) { /* * For shared or readonly shmem mappings we know that all @@ -783,7 +785,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) if (!shmem_swapped || (vma->vm_flags & VM_SHARED) || !(vma->vm_flags & VM_WRITE)) { - mss->swap = shmem_swapped; + mss->swap += shmem_swapped; } else { mss->check_shmem_swap = true; smaps_walk.pte_hole = smaps_pte_hole; -- GitLab From 4c6fda12ff1d187590083a5615cee5501c60142c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 7 Sep 2018 14:33:24 -0700 Subject: [PATCH 1410/2269] ima: fix showing large 'violations' or 'runtime_measurements_count' commit 1e4c8dafbb6bf72fb5eca035b861e39c5896c2b7 upstream. The 12 character temporary buffer is not necessarily long enough to hold a 'long' value. Increase it. Signed-off-by: Eric Biggers Cc: stable@vger.kernel.org Signed-off-by: Mimi Zohar Signed-off-by: Greg Kroah-Hartman --- security/integrity/ima/ima_fs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c index ad491c51e8339..2c4e83f6409e3 100644 --- a/security/integrity/ima/ima_fs.c +++ b/security/integrity/ima/ima_fs.c @@ -39,14 +39,14 @@ static int __init default_canonical_fmt_setup(char *str) __setup("ima_canonical_fmt", default_canonical_fmt_setup); static int valid_policy = 1; -#define TMPBUFLEN 12 + static ssize_t ima_show_htable_value(char __user *buf, size_t count, loff_t *ppos, atomic_long_t *val) { - char tmpbuf[TMPBUFLEN]; + char tmpbuf[32]; /* greater than largest 'long' string value */ ssize_t len; - len = scnprintf(tmpbuf, TMPBUFLEN, "%li\n", atomic_long_read(val)); + len = scnprintf(tmpbuf, sizeof(tmpbuf), "%li\n", atomic_long_read(val)); return simple_read_from_buffer(buf, count, ppos, tmpbuf, len); } -- GitLab From d197121a077a39f6c33729ae5d4bef2d1a9c1596 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Fri, 26 Oct 2018 15:10:58 -0700 Subject: [PATCH 1411/2269] hugetlbfs: dirty pages as they are added to pagecache commit 22146c3ce98962436e401f7b7016a6f664c9ffb5 upstream. Some test systems were experiencing negative huge page reserve counts and incorrect file block counts. This was traced to /proc/sys/vm/drop_caches removing clean pages from hugetlbfs file pagecaches. When non-hugetlbfs explicit code removes the pages, the appropriate accounting is not performed. This can be recreated as follows: fallocate -l 2M /dev/hugepages/foo echo 1 > /proc/sys/vm/drop_caches fallocate -l 2M /dev/hugepages/foo grep -i huge /proc/meminfo AnonHugePages: 0 kB ShmemHugePages: 0 kB HugePages_Total: 2048 HugePages_Free: 2047 HugePages_Rsvd: 18446744073709551615 HugePages_Surp: 0 Hugepagesize: 2048 kB Hugetlb: 4194304 kB ls -lsh /dev/hugepages/foo 4.0M -rw-r--r--. 1 root root 2.0M Oct 17 20:05 /dev/hugepages/foo To address this issue, dirty pages as they are added to pagecache. This can easily be reproduced with fallocate as shown above. Read faulted pages will eventually end up being marked dirty. But there is a window where they are clean and could be impacted by code such as drop_caches. So, just dirty them all as they are added to the pagecache. Link: http://lkml.kernel.org/r/b5be45b8-5afe-56cd-9482-28384699a049@oracle.com Fixes: 6bda666a03f0 ("hugepages: fold find_or_alloc_pages into huge_no_page()") Signed-off-by: Mike Kravetz Acked-by: Mihcla Hocko Reviewed-by: Khalid Aziz Cc: Hugh Dickins Cc: Naoya Horiguchi Cc: "Aneesh Kumar K . V" Cc: Andrea Arcangeli Cc: "Kirill A . Shutemov" Cc: Davidlohr Bueso Cc: Alexander Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 9801dc0250e20..e073099083ca3 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3644,6 +3644,12 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping, return err; ClearPagePrivate(page); + /* + * set page dirty so that it will not be removed from cache/file + * by non-hugetlbfs specific code paths. + */ + set_page_dirty(page); + spin_lock(&inode->i_lock); inode->i_blocks += blocks_per_huge_page(h); spin_unlock(&inode->i_lock); -- GitLab From 9d3cc761c5d7cbcef96758bac4e4cee53d2136c8 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Tue, 30 Oct 2018 15:04:11 -0700 Subject: [PATCH 1412/2269] mm/rmap: map_pte() was not handling private ZONE_DEVICE page properly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit aab8d0520e6e7c2a61f71195e6ce7007a4843afb upstream. Private ZONE_DEVICE pages use a special pte entry and thus are not present. Properly handle this case in map_pte(), it is already handled in check_pte(), the map_pte() part was lost in some rebase most probably. Without this patch the slow migration path can not migrate back to any private ZONE_DEVICE memory to regular memory. This was found after stress testing migration back to system memory. This ultimatly can lead to the CPU constantly page fault looping on the special swap entry. Link: http://lkml.kernel.org/r/20181019160442.18723-3-jglisse@redhat.com Signed-off-by: Ralph Campbell Signed-off-by: Jérôme Glisse Reviewed-by: Balbir Singh Cc: Andrew Morton Cc: Kirill A. Shutemov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/page_vma_mapped.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 9560156143953..e00d985a51c56 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -21,7 +21,29 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw) if (!is_swap_pte(*pvmw->pte)) return false; } else { - if (!pte_present(*pvmw->pte)) + /* + * We get here when we are trying to unmap a private + * device page from the process address space. Such + * page is not CPU accessible and thus is mapped as + * a special swap entry, nonetheless it still does + * count as a valid regular mapping for the page (and + * is accounted as such in page maps count). + * + * So handle this special case as if it was a normal + * page mapping ie lock CPU page table and returns + * true. + * + * For more details on device private memory see HMM + * (include/linux/hmm.h or mm/hmm.c). + */ + if (is_swap_pte(*pvmw->pte)) { + swp_entry_t entry; + + /* Handle un-addressable ZONE_DEVICE memory */ + entry = pte_to_swp_entry(*pvmw->pte); + if (!is_device_private_entry(entry)) + return false; + } else if (!pte_present(*pvmw->pte)) return false; } } -- GitLab From f1df765432518a30b1bcf56d1d03bdd5a1821dee Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 17 Oct 2018 17:42:10 +0100 Subject: [PATCH 1413/2269] KVM: arm64: Fix caching of host MDCR_EL2 value commit da5a3ce66b8bb51b0ea8a89f42aac153903f90fb upstream. At boot time, KVM stashes the host MDCR_EL2 value, but only does this when the kernel is not running in hyp mode (i.e. is non-VHE). In these cases, the stashed value of MDCR_EL2.HPMN happens to be zero, which can lead to CONSTRAINED UNPREDICTABLE behaviour. Since we use this value to derive the MDCR_EL2 value when switching to/from a guest, after a guest have been run, the performance counters do not behave as expected. This has been observed to result in accesses via PMXEVTYPER_EL0 and PMXEVCNTR_EL0 not affecting the relevant counters, resulting in events not being counted. In these cases, only the fixed-purpose cycle counter appears to work as expected. Fix this by always stashing the host MDCR_EL2 value, regardless of VHE. Cc: Christopher Dall Cc: James Morse Cc: Will Deacon Cc: stable@vger.kernel.org Fixes: 1e947bad0b63b351 ("arm64: KVM: Skip HYP setup when already running in HYP") Tested-by: Robin Murphy Signed-off-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/arm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index d5f1d83645716..ed42b8cf6f5bd 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1148,8 +1148,6 @@ static void cpu_init_hyp_mode(void *dummy) __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); __cpu_init_stage2(); - - kvm_arm_init_debug(); } static void cpu_hyp_reset(void) @@ -1173,6 +1171,8 @@ static void cpu_hyp_reinit(void) cpu_init_hyp_mode(NULL); } + kvm_arm_init_debug(); + if (vgic_present) kvm_vgic_init_cpu_hardware(); } -- GitLab From d6ea9f3055378e7e520264025b6cc63c88615577 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 30 Oct 2018 15:07:32 -0700 Subject: [PATCH 1414/2269] kbuild: fix kernel/bounds.c 'W=1' warning commit 6a32c2469c3fbfee8f25bcd20af647326650a6cf upstream. Building any configuration with 'make W=1' produces a warning: kernel/bounds.c:16:6: warning: no previous prototype for 'foo' [-Wmissing-prototypes] When also passing -Werror, this prevents us from building any other files. Nobody ever calls the function, but we can't make it 'static' either since we want the compiler output. Calling it 'main' instead however avoids the warning, because gcc does not insist on having a declaration for main. Link: http://lkml.kernel.org/r/20181005083313.2088252-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Reported-by: Kieran Bingham Reviewed-by: Kieran Bingham Cc: David Laight Cc: Masahiro Yamada Cc: Greg Kroah-Hartman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/bounds.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bounds.c b/kernel/bounds.c index c373e887c0664..9795d75b09b23 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -13,7 +13,7 @@ #include #include -void foo(void) +int main(void) { /* The enum constants to put into include/generated/bounds.h */ DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); @@ -23,4 +23,6 @@ void foo(void) #endif DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t)); /* End of constants */ + + return 0; } -- GitLab From 8b38d82b0d278d5725e02e470f3cdd46423ba7b1 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Fri, 28 Sep 2018 11:23:40 +0200 Subject: [PATCH 1415/2269] iio: ad5064: Fix regulator handling commit 8911a43bc198877fad9f4b0246a866b26bb547ab upstream. The correct way to handle errors returned by regualtor_get() and friends is to propagate the error since that means that an regulator was specified, but something went wrong when requesting it. For handling optional regulators, e.g. when the device has an internal vref, regulator_get_optional() should be used to avoid getting the dummy regulator that the regulator core otherwise provides. Signed-off-by: Lars-Peter Clausen Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/dac/ad5064.c | 53 ++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/drivers/iio/dac/ad5064.c b/drivers/iio/dac/ad5064.c index 3f9399c278691..efc762da2ba81 100644 --- a/drivers/iio/dac/ad5064.c +++ b/drivers/iio/dac/ad5064.c @@ -809,6 +809,40 @@ static int ad5064_set_config(struct ad5064_state *st, unsigned int val) return ad5064_write(st, cmd, 0, val, 0); } +static int ad5064_request_vref(struct ad5064_state *st, struct device *dev) +{ + unsigned int i; + int ret; + + for (i = 0; i < ad5064_num_vref(st); ++i) + st->vref_reg[i].supply = ad5064_vref_name(st, i); + + if (!st->chip_info->internal_vref) + return devm_regulator_bulk_get(dev, ad5064_num_vref(st), + st->vref_reg); + + /* + * This assumes that when the regulator has an internal VREF + * there is only one external VREF connection, which is + * currently the case for all supported devices. + */ + st->vref_reg[0].consumer = devm_regulator_get_optional(dev, "vref"); + if (!IS_ERR(st->vref_reg[0].consumer)) + return 0; + + ret = PTR_ERR(st->vref_reg[0].consumer); + if (ret != -ENODEV) + return ret; + + /* If no external regulator was supplied use the internal VREF */ + st->use_internal_vref = true; + ret = ad5064_set_config(st, AD5064_CONFIG_INT_VREF_ENABLE); + if (ret) + dev_err(dev, "Failed to enable internal vref: %d\n", ret); + + return ret; +} + static int ad5064_probe(struct device *dev, enum ad5064_type type, const char *name, ad5064_write_func write) { @@ -829,22 +863,11 @@ static int ad5064_probe(struct device *dev, enum ad5064_type type, st->dev = dev; st->write = write; - for (i = 0; i < ad5064_num_vref(st); ++i) - st->vref_reg[i].supply = ad5064_vref_name(st, i); + ret = ad5064_request_vref(st, dev); + if (ret) + return ret; - ret = devm_regulator_bulk_get(dev, ad5064_num_vref(st), - st->vref_reg); - if (ret) { - if (!st->chip_info->internal_vref) - return ret; - st->use_internal_vref = true; - ret = ad5064_set_config(st, AD5064_CONFIG_INT_VREF_ENABLE); - if (ret) { - dev_err(dev, "Failed to enable internal vref: %d\n", - ret); - return ret; - } - } else { + if (!st->use_internal_vref) { ret = regulator_bulk_enable(ad5064_num_vref(st), st->vref_reg); if (ret) return ret; -- GitLab From 39b6d86b316e5d3d338d661ab6468beef1b5bb91 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 22 Sep 2018 00:58:02 +0300 Subject: [PATCH 1416/2269] iio: adc: imx25-gcq: Fix leak of device_node in mx25_gcq_setup_cfgs() commit d3fa21c73c391975488818b085b894c2980ea052 upstream. Leaving for_each_child_of_node loop we should release child device node, if it is not stored for future use. Found by Linux Driver Verification project (linuxtesting.org). JC: I'm not sending this as a quick fix as it's been wrong for years, but good to pick up for stable after the merge window. Signed-off-by: Alexey Khoroshilov Fixes: 6df2e98c3ea56 ("iio: adc: Add imx25-gcq ADC driver") Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/fsl-imx25-gcq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iio/adc/fsl-imx25-gcq.c b/drivers/iio/adc/fsl-imx25-gcq.c index ea264fa9e567a..929c617db3645 100644 --- a/drivers/iio/adc/fsl-imx25-gcq.c +++ b/drivers/iio/adc/fsl-imx25-gcq.c @@ -209,12 +209,14 @@ static int mx25_gcq_setup_cfgs(struct platform_device *pdev, ret = of_property_read_u32(child, "reg", ®); if (ret) { dev_err(dev, "Failed to get reg property\n"); + of_node_put(child); return ret; } if (reg >= MX25_NUM_CFGS) { dev_err(dev, "reg value is greater than the number of available configuration registers\n"); + of_node_put(child); return -EINVAL; } @@ -228,6 +230,7 @@ static int mx25_gcq_setup_cfgs(struct platform_device *pdev, if (IS_ERR(priv->vref[refp])) { dev_err(dev, "Error, trying to use external voltage reference without a vref-%s regulator.", mx25_gcq_refp_names[refp]); + of_node_put(child); return PTR_ERR(priv->vref[refp]); } priv->channel_vref_mv[reg] = @@ -240,6 +243,7 @@ static int mx25_gcq_setup_cfgs(struct platform_device *pdev, break; default: dev_err(dev, "Invalid positive reference %d\n", refp); + of_node_put(child); return -EINVAL; } @@ -254,10 +258,12 @@ static int mx25_gcq_setup_cfgs(struct platform_device *pdev, if ((refp & MX25_ADCQ_CFG_REFP_MASK) != refp) { dev_err(dev, "Invalid fsl,adc-refp property value\n"); + of_node_put(child); return -EINVAL; } if ((refn & MX25_ADCQ_CFG_REFN_MASK) != refn) { dev_err(dev, "Invalid fsl,adc-refn property value\n"); + of_node_put(child); return -EINVAL; } -- GitLab From c4ecff9bc4daf9b0d7d77ae8211ada97ac732079 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 24 Sep 2018 10:51:43 +0300 Subject: [PATCH 1417/2269] iio: adc: at91: fix acking DRDY irq on simple conversions commit bc1b45326223e7e890053cf6266357adfa61942d upstream. When doing simple conversions, the driver did not acknowledge the DRDY irq. If this irq status is not acked, it will be left pending, and as soon as a trigger is enabled, the irq handler will be called, it doesn't know why this status has occurred because no channel is pending, and then it will go int a irq loop and board will hang. To avoid this situation, read the LCDR after a raw conversion is done. Fixes: 0e589d5fb ("ARM: AT91: IIO: Add AT91 ADC driver.") Cc: Maxime Ripard Signed-off-by: Eugen Hristev Acked-by: Ludovic Desroches Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/at91_adc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c index 15109728cae79..8882b80419f6e 100644 --- a/drivers/iio/adc/at91_adc.c +++ b/drivers/iio/adc/at91_adc.c @@ -279,6 +279,8 @@ static void handle_adc_eoc_trigger(int irq, struct iio_dev *idev) iio_trigger_poll(idev->trig); } else { st->last_value = at91_adc_readl(st, AT91_ADC_CHAN(st, st->chnb)); + /* Needed to ACK the DRDY interruption */ + at91_adc_readl(st, AT91_ADC_LCDR); st->done = true; wake_up_interruptible(&st->wq_data_avail); } -- GitLab From 28a1aaeda80f815bccb7c0dada9a0cbe86889e6b Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 24 Sep 2018 10:51:44 +0300 Subject: [PATCH 1418/2269] iio: adc: at91: fix wrong channel number in triggered buffer mode commit aea835f2dc8a682942b859179c49ad1841a6c8b9 upstream. When channels are registered, the hardware channel number is not the actual iio channel number. This is because the driver is probed with a certain number of accessible channels. Some pins are routed and some not, depending on the description of the board in the DT. Because of that, channels 0,1,2,3 can correspond to hardware channels 2,3,4,5 for example. In the buffered triggered case, we need to do the translation accordingly. Fixed the channel number to stop reading the wrong channel. Fixes: 0e589d5fb ("ARM: AT91: IIO: Add AT91 ADC driver.") Cc: Maxime Ripard Signed-off-by: Eugen Hristev Acked-by: Ludovic Desroches Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/at91_adc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c index 8882b80419f6e..cd686179aa925 100644 --- a/drivers/iio/adc/at91_adc.c +++ b/drivers/iio/adc/at91_adc.c @@ -248,12 +248,14 @@ static irqreturn_t at91_adc_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *idev = pf->indio_dev; struct at91_adc_state *st = iio_priv(idev); + struct iio_chan_spec const *chan; int i, j = 0; for (i = 0; i < idev->masklength; i++) { if (!test_bit(i, idev->active_scan_mask)) continue; - st->buffer[j] = at91_adc_readl(st, AT91_ADC_CHAN(st, i)); + chan = idev->channels + i; + st->buffer[j] = at91_adc_readl(st, AT91_ADC_CHAN(st, chan->channel)); j++; } -- GitLab From 6838bb61b194aa17981ee242e331bd65b41661a1 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Sat, 22 Sep 2018 21:20:54 +0200 Subject: [PATCH 1419/2269] w1: omap-hdq: fix missing bus unregister at removal commit a007734618fee1bf35556c04fa498d41d42c7301 upstream. The bus master was not removed after unloading the module or unbinding the driver. That lead to oopses like this [ 127.842987] Unable to handle kernel paging request at virtual address bf01d04c [ 127.850646] pgd = 70e3cd9a [ 127.853698] [bf01d04c] *pgd=8f908811, *pte=00000000, *ppte=00000000 [ 127.860412] Internal error: Oops: 80000007 [#1] PREEMPT SMP ARM [ 127.866668] Modules linked in: bq27xxx_battery overlay [last unloaded: omap_hdq] [ 127.874542] CPU: 0 PID: 1022 Comm: w1_bus_master1 Not tainted 4.19.0-rc4-00001-g2d51da718324 #12 [ 127.883819] Hardware name: Generic OMAP36xx (Flattened Device Tree) [ 127.890441] PC is at 0xbf01d04c [ 127.893798] LR is at w1_search_process_cb+0x4c/0xfc [ 127.898956] pc : [] lr : [] psr: a0070013 [ 127.905609] sp : cf885f48 ip : bf01d04c fp : ddf1e11c [ 127.911132] r10: cf8fe040 r9 : c05f8d00 r8 : cf8fe040 [ 127.916656] r7 : 000000f0 r6 : cf8fe02c r5 : cf8fe000 r4 : cf8fe01c [ 127.923553] r3 : c05f8d00 r2 : 000000f0 r1 : cf8fe000 r0 : dde1ef10 [ 127.930450] Flags: NzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none [ 127.938018] Control: 10c5387d Table: 8f8f0019 DAC: 00000051 [ 127.944091] Process w1_bus_master1 (pid: 1022, stack limit = 0x9135699f) [ 127.951171] Stack: (0xcf885f48 to 0xcf886000) [ 127.955810] 5f40: cf8fe000 00000000 cf884000 cf8fe090 000003e8 c05f8d00 [ 127.964477] 5f60: dde5fc34 c05f9700 ddf1e100 ddf1e540 cf884000 cf8fe000 c05f9694 00000000 [ 127.973114] 5f80: dde5fc34 c01499a4 00000000 ddf1e540 c0149874 00000000 00000000 00000000 [ 127.981781] 5fa0: 00000000 00000000 00000000 c01010e8 00000000 00000000 00000000 00000000 [ 127.990447] 5fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 127.999114] 5fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000 [ 128.007781] [] (w1_search_process_cb) from [] (w1_process+0x6c/0x118) [ 128.016479] [] (w1_process) from [] (kthread+0x130/0x148) [ 128.024047] [] (kthread) from [] (ret_from_fork+0x14/0x2c) [ 128.031677] Exception stack(0xcf885fb0 to 0xcf885ff8) [ 128.037017] 5fa0: 00000000 00000000 00000000 00000000 [ 128.045684] 5fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 128.054351] 5fe0: 00000000 00000000 00000000 00000000 00000013 00000000 [ 128.061340] Code: bad PC value [ 128.064697] ---[ end trace af066e33c0e14119 ]--- Cc: Signed-off-by: Andreas Kemnade Signed-off-by: Greg Kroah-Hartman --- drivers/w1/masters/omap_hdq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/w1/masters/omap_hdq.c b/drivers/w1/masters/omap_hdq.c index 83fc9aab34e87..3099052e12433 100644 --- a/drivers/w1/masters/omap_hdq.c +++ b/drivers/w1/masters/omap_hdq.c @@ -763,6 +763,8 @@ static int omap_hdq_remove(struct platform_device *pdev) /* remove module dependency */ pm_runtime_disable(&pdev->dev); + w1_remove_master_device(&omap_w1_master); + return 0; } -- GitLab From 54341b9fa4cf5022170302e8f9ac6026546e24f4 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 15 Sep 2018 23:04:41 -0500 Subject: [PATCH 1420/2269] smb3: allow stats which track session and share reconnects to be reset commit 2c887635cd6ab3af619dc2be94e5bf8f2e172b78 upstream. Currently, "echo 0 > /proc/fs/cifs/Stats" resets all of the stats except the session and share reconnect counts. Fix it to reset those as well. CC: Stable Signed-off-by: Steve French Reviewed-by: Aurelien Aptel Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifs_debug.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 2565cee702e48..106a715101f9f 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -289,6 +289,9 @@ static ssize_t cifs_stats_proc_write(struct file *file, atomic_set(&totBufAllocCount, 0); atomic_set(&totSmBufAllocCount, 0); #endif /* CONFIG_CIFS_STATS2 */ + atomic_set(&tcpSesReconnectCount, 0); + atomic_set(&tconInfoReconnectCount, 0); + spin_lock(&GlobalMid_Lock); GlobalMaxActiveXid = 0; GlobalCurrentXid = 0; -- GitLab From 122f2bd8d3e0998561f0c4c51bf242c0f6267861 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 19 Oct 2018 00:45:21 -0500 Subject: [PATCH 1421/2269] smb3: do not attempt cifs operation in smb3 query info error path commit 1e77a8c204c9d1b655c61751b8ad0fde22421dbb upstream. If backupuid mount option is sent, we can incorrectly retry (on access denied on query info) with a cifs (FindFirst) operation on an smb3 mount which causes the server to force the session close. We set backup intent on open so no need for this fallback. See kernel bugzilla 201435 Signed-off-by: Steve French CC: Stable Reviewed-by: Ronnie Sahlberg Signed-off-by: Greg Kroah-Hartman --- fs/cifs/inode.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index d01cbca84701f..a90a637ae79ab 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -776,7 +776,15 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, } else if (rc == -EREMOTE) { cifs_create_dfs_fattr(&fattr, sb); rc = 0; - } else if (rc == -EACCES && backup_cred(cifs_sb)) { + } else if ((rc == -EACCES) && backup_cred(cifs_sb) && + (strcmp(server->vals->version_string, SMB1_VERSION_STRING) + == 0)) { + /* + * For SMB2 and later the backup intent flag is already + * sent if needed on open and there is no path based + * FindFirst operation to use to retry with + */ + srchinf = kzalloc(sizeof(struct cifs_search_info), GFP_KERNEL); if (srchinf == NULL) { -- GitLab From e629461b6b2d1a115779281411b49640cd9a8db8 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 28 Oct 2018 13:13:23 -0500 Subject: [PATCH 1422/2269] smb3: on kerberos mount if server doesn't specify auth type use krb5 commit 926674de6705f0f1dbf29a62fd758d0977f535d6 upstream. Some servers (e.g. Azure) do not include a spnego blob in the SMB3 negotiate protocol response, so on kerberos mounts ("sec=krb5") we can fail, as we expected the server to list its supported auth types (OIDs in the spnego blob in the negprot response). Change this so that on krb5 mounts we default to trying krb5 if the server doesn't list its supported protocol mechanisms. Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifs_spnego.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index b611fc2e8984e..7f01c6e607918 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -147,8 +147,10 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo) sprintf(dp, ";sec=krb5"); else if (server->sec_mskerberos) sprintf(dp, ";sec=mskrb5"); - else - goto out; + else { + cifs_dbg(VFS, "unknown or missing server auth type, use krb5\n"); + sprintf(dp, ";sec=krb5"); + } dp = description + strlen(description); sprintf(dp, ";uid=0x%x", -- GitLab From cc4dcea8b0f76c01dd6b64f1f84032c768896bba Mon Sep 17 00:00:00 2001 From: He Zhe Date: Sun, 30 Sep 2018 00:45:50 +0800 Subject: [PATCH 1423/2269] printk: Fix panic caused by passing log_buf_len to command line commit 277fcdb2cfee38ccdbe07e705dbd4896ba0c9930 upstream. log_buf_len_setup does not check input argument before passing it to simple_strtoull. The argument would be a NULL pointer if "log_buf_len", without its value, is set in command line and thus causes the following panic. PANIC: early exception 0xe3 IP 10:ffffffffaaeacd0d error 0 cr2 0x0 [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 4.19.0-rc4-yocto-standard+ #1 [ 0.000000] RIP: 0010:_parse_integer_fixup_radix+0xd/0x70 ... [ 0.000000] Call Trace: [ 0.000000] simple_strtoull+0x29/0x70 [ 0.000000] memparse+0x26/0x90 [ 0.000000] log_buf_len_setup+0x17/0x22 [ 0.000000] do_early_param+0x57/0x8e [ 0.000000] parse_args+0x208/0x320 [ 0.000000] ? rdinit_setup+0x30/0x30 [ 0.000000] parse_early_options+0x29/0x2d [ 0.000000] ? rdinit_setup+0x30/0x30 [ 0.000000] parse_early_param+0x36/0x4d [ 0.000000] setup_arch+0x336/0x99e [ 0.000000] start_kernel+0x6f/0x4ee [ 0.000000] x86_64_start_reservations+0x24/0x26 [ 0.000000] x86_64_start_kernel+0x6f/0x72 [ 0.000000] secondary_startup_64+0xa4/0xb0 This patch adds a check to prevent the panic. Link: http://lkml.kernel.org/r/1538239553-81805-1-git-send-email-zhe.he@windriver.com Cc: stable@vger.kernel.org Cc: rostedt@goodmis.org Cc: linux-kernel@vger.kernel.org Signed-off-by: He Zhe Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Signed-off-by: Greg Kroah-Hartman --- kernel/printk/printk.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index f0223a7d9ed1b..7161312593ddc 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1043,7 +1043,12 @@ static void __init log_buf_len_update(unsigned size) /* save requested log_buf_len since it's too early to process it */ static int __init log_buf_len_setup(char *str) { - unsigned size = memparse(str, &str); + unsigned int size; + + if (!str) + return -EINVAL; + + size = memparse(str, &str); log_buf_len_update(size); -- GitLab From e6b8a4d76ae25fbb26b9e72f03007406329bd021 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 18 Oct 2018 15:15:05 +0200 Subject: [PATCH 1424/2269] genirq: Fix race on spurious interrupt detection commit 746a923b863a1065ef77324e1e43f19b1a3eab5c upstream. Commit 1e77d0a1ed74 ("genirq: Sanitize spurious interrupt detection of threaded irqs") made detection of spurious interrupts work for threaded handlers by: a) incrementing a counter every time the thread returns IRQ_HANDLED, and b) checking whether that counter has increased every time the thread is woken. However for oneshot interrupts, the commit unmasks the interrupt before incrementing the counter. If another interrupt occurs right after unmasking but before the counter is incremented, that interrupt is incorrectly considered spurious: time | irq_thread() | irq_thread_fn() | action->thread_fn() | irq_finalize_oneshot() | unmask_threaded_irq() /* interrupt is unmasked */ | | /* interrupt fires, incorrectly deemed spurious */ | | atomic_inc(&desc->threads_handled); /* counter is incremented */ v This is observed with a hi3110 CAN controller receiving data at high volume (from a separate machine sending with "cangen -g 0 -i -x"): The controller signals a huge number of interrupts (hundreds of millions per day) and every second there are about a dozen which are deemed spurious. In theory with high CPU load and the presence of higher priority tasks, the number of incorrectly detected spurious interrupts might increase beyond the 99,900 threshold and cause disablement of the interrupt. In practice it just increments the spurious interrupt count. But that can cause people to waste time investigating it over and over. Fix it by moving the accounting before the invocation of irq_finalize_oneshot(). [ tglx: Folded change log update ] Fixes: 1e77d0a1ed74 ("genirq: Sanitize spurious interrupt detection of threaded irqs") Signed-off-by: Lukas Wunner Signed-off-by: Thomas Gleixner Cc: Mathias Duckeck Cc: Akshay Bhat Cc: Casey Fitzpatrick Cc: stable@vger.kernel.org # v3.16+ Link: https://lkml.kernel.org/r/1dfd8bbd16163940648045495e3e9698e63b50ad.1539867047.git.lukas@wunner.de Signed-off-by: Greg Kroah-Hartman --- kernel/irq/manage.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 069311541577c..4cd85870f00e6 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -882,6 +882,9 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) local_bh_disable(); ret = action->thread_fn(action->irq, action->dev_id); + if (ret == IRQ_HANDLED) + atomic_inc(&desc->threads_handled); + irq_finalize_oneshot(desc, action); local_bh_enable(); return ret; @@ -898,6 +901,9 @@ static irqreturn_t irq_thread_fn(struct irq_desc *desc, irqreturn_t ret; ret = action->thread_fn(action->irq, action->dev_id); + if (ret == IRQ_HANDLED) + atomic_inc(&desc->threads_handled); + irq_finalize_oneshot(desc, action); return ret; } @@ -975,8 +981,6 @@ static int irq_thread(void *data) irq_thread_check_affinity(desc, action); action_ret = handler_fn(desc, action); - if (action_ret == IRQ_HANDLED) - atomic_inc(&desc->threads_handled); if (action_ret == IRQ_WAKE_THREAD) irq_wake_secondary(desc, action); -- GitLab From 1796b3898e51d4f435f17b8ff54458f5fd541d46 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Sep 2018 10:07:44 -0400 Subject: [PATCH 1425/2269] NFSv4.1: Fix the r/wsize checking commit 943cff67b842839f4f35364ba2db5c2d3f025d94 upstream. The intention of nfs4_session_set_rwsize() was to cap the r/wsize to the buffer sizes negotiated by the CREATE_SESSION. The initial code had a bug whereby we would not check the values negotiated by nfs_probe_fsinfo() (the assumption being that CREATE_SESSION will always negotiate buffer values that are sane w.r.t. the server's preferred r/wsizes) but would only check values set by the user in the 'mount' command. The code was changed in 4.11 to _always_ set the r/wsize, meaning that we now never use the server preferred r/wsizes. This is the regression that this patch fixes. Also rename the function to nfs4_session_limit_rwsize() in order to avoid future confusion. Fixes: 033853325fe3 (NFSv4.1 respect server's max size in CREATE_SESSION") Cc: stable@vger.kernel.org # v4.11+ Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4client.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index fb85d04fdc4c2..fed9c8005c17d 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -925,10 +925,10 @@ EXPORT_SYMBOL_GPL(nfs4_set_ds_client); /* * Session has been established, and the client marked ready. - * Set the mount rsize and wsize with negotiated fore channel - * attributes which will be bound checked in nfs_server_set_fsinfo. + * Limit the mount rsize, wsize and dtsize using negotiated fore + * channel attributes. */ -static void nfs4_session_set_rwsize(struct nfs_server *server) +static void nfs4_session_limit_rwsize(struct nfs_server *server) { #ifdef CONFIG_NFS_V4_1 struct nfs4_session *sess; @@ -941,9 +941,11 @@ static void nfs4_session_set_rwsize(struct nfs_server *server) server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead; server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead; - if (!server->rsize || server->rsize > server_resp_sz) + if (server->dtsize > server_resp_sz) + server->dtsize = server_resp_sz; + if (server->rsize > server_resp_sz) server->rsize = server_resp_sz; - if (!server->wsize || server->wsize > server_rqst_sz) + if (server->wsize > server_rqst_sz) server->wsize = server_rqst_sz; #endif /* CONFIG_NFS_V4_1 */ } @@ -990,12 +992,12 @@ static int nfs4_server_common_setup(struct nfs_server *server, (unsigned long long) server->fsid.minor); nfs_display_fhandle(mntfh, "Pseudo-fs root FH"); - nfs4_session_set_rwsize(server); - error = nfs_probe_fsinfo(server, mntfh, fattr); if (error < 0) goto out; + nfs4_session_limit_rwsize(server); + if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) server->namelen = NFS4_MAXNAMLEN; -- GitLab From eecbf2c22b3f0dca05a199a8992ac2158266dfbd Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 18 Oct 2018 15:01:48 -0400 Subject: [PATCH 1426/2269] nfs: Fix a missed page unlock after pg_doio() commit fdbd1a2e4a71adcb1ae219fcfd964930d77a7f84 upstream. We must check pg_error and call error_cleanup after any call to pg_doio. Currently, we are skipping the unlock of a page if we encounter an error in nfs_pageio_complete() before handing off the work to the RPC layer. Signed-off-by: Benjamin Coddington Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/pagelist.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index d0543e19098a3..37f20d7a26ed9 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -1110,6 +1110,20 @@ static int nfs_pageio_add_request_mirror(struct nfs_pageio_descriptor *desc, return ret; } +static void nfs_pageio_error_cleanup(struct nfs_pageio_descriptor *desc) +{ + u32 midx; + struct nfs_pgio_mirror *mirror; + + if (!desc->pg_error) + return; + + for (midx = 0; midx < desc->pg_mirror_count; midx++) { + mirror = &desc->pg_mirrors[midx]; + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); + } +} + int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { @@ -1160,25 +1174,11 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, return 1; out_failed: - /* - * We might have failed before sending any reqs over wire. - * Clean up rest of the reqs in mirror pg_list. - */ - if (desc->pg_error) { - struct nfs_pgio_mirror *mirror; - void (*func)(struct list_head *); - - /* remember fatal errors */ - if (nfs_error_is_fatal(desc->pg_error)) - nfs_context_set_write_error(req->wb_context, - desc->pg_error); - - func = desc->pg_completion_ops->error_cleanup; - for (midx = 0; midx < desc->pg_mirror_count; midx++) { - mirror = &desc->pg_mirrors[midx]; - func(&mirror->pg_list); - } - } + /* remember fatal errors */ + if (nfs_error_is_fatal(desc->pg_error)) + nfs_context_set_write_error(req->wb_context, + desc->pg_error); + nfs_pageio_error_cleanup(desc); return 0; } @@ -1250,6 +1250,8 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) for (midx = 0; midx < desc->pg_mirror_count; midx++) nfs_pageio_complete_mirror(desc, midx); + if (desc->pg_error < 0) + nfs_pageio_error_cleanup(desc); if (desc->pg_ops->pg_cleanup) desc->pg_ops->pg_cleanup(desc); nfs_pageio_cleanup_mirroring(desc); -- GitLab From eecd97e13c1553dfff1c159513b6ab041f53a70a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 9 Oct 2018 15:54:15 -0400 Subject: [PATCH 1427/2269] nfsd: Fix an Oops in free_session() commit bb6ad5572c0022e17e846b382d7413cdcf8055be upstream. In call_xpt_users(), we delete the entry from the list, but we do not reinitialise it. This triggers the list poisoning when we later call unregister_xpt_user() in nfsd4_del_conns(). Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/svc_xprt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index d16a8b423c20b..ea7b5a3a53f07 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1040,7 +1040,7 @@ static void call_xpt_users(struct svc_xprt *xprt) spin_lock(&xprt->xpt_lock); while (!list_empty(&xprt->xpt_users)) { u = list_first_entry(&xprt->xpt_users, struct svc_xpt_user, list); - list_del(&u->list); + list_del_init(&u->list); u->callback(u); } spin_unlock(&xprt->xpt_lock); -- GitLab From a45ce0e1b9cd86443406462375c4004f55f7a881 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 28 Sep 2018 20:41:48 +0300 Subject: [PATCH 1428/2269] lockd: fix access beyond unterminated strings in prints commit 93f38b6fae0ea8987e22d9e6c38f8dfdccd867ee upstream. printk format used %*s instead of %.*s, so hostname_len does not limit the number of bytes accessed from hostname. Signed-off-by: Amir Goldstein Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/lockd/host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 0d4e590e05498..c4504ed9f6807 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -341,7 +341,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, }; struct lockd_net *ln = net_generic(net, lockd_net_id); - dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__, + dprintk("lockd: %s(host='%.*s', vers=%u, proto=%s)\n", __func__, (int)hostname_len, hostname, rqstp->rq_vers, (rqstp->rq_prot == IPPROTO_UDP ? "udp" : "tcp")); -- GitLab From a0a4629065e4cc4a79b91d9942afb5565d86b462 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Wed, 3 Oct 2018 11:43:59 -0500 Subject: [PATCH 1429/2269] dm ioctl: harden copy_params()'s copy_from_user() from malicious users commit 800a7340ab7dd667edf95e74d8e4f23a17e87076 upstream. In copy_params(), the struct 'dm_ioctl' is first copied from the user space buffer 'user' to 'param_kernel' and the field 'data_size' is checked against 'minimum_data_size' (size of 'struct dm_ioctl' payload up to its 'data' member). If the check fails, an error code EINVAL will be returned. Otherwise, param_kernel->data_size is used to do a second copy, which copies from the same user-space buffer to 'dmi'. After the second copy, only 'dmi->data_size' is checked against 'param_kernel->data_size'. Given that the buffer 'user' resides in the user space, a malicious user-space process can race to change the content in the buffer between the two copies. This way, the attacker can inject inconsistent data into 'dmi' (versus previously validated 'param_kernel'). Fix redundant copying of 'minimum_data_size' from user-space buffer by using the first copy stored in 'param_kernel'. Also remove the 'data_size' check after the second copy because it is now unnecessary. Cc: stable@vger.kernel.org Signed-off-by: Wenwen Wang Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-ioctl.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index e52676fa9832c..ca948155191ac 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1719,8 +1719,7 @@ static void free_params(struct dm_ioctl *param, size_t param_size, int param_fla } static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kernel, - int ioctl_flags, - struct dm_ioctl **param, int *param_flags) + int ioctl_flags, struct dm_ioctl **param, int *param_flags) { struct dm_ioctl *dmi; int secure_data; @@ -1761,18 +1760,13 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern *param_flags |= DM_PARAMS_MALLOC; - if (copy_from_user(dmi, user, param_kernel->data_size)) - goto bad; + /* Copy from param_kernel (which was already copied from user) */ + memcpy(dmi, param_kernel, minimum_data_size); -data_copied: - /* - * Abort if something changed the ioctl data while it was being copied. - */ - if (dmi->data_size != param_kernel->data_size) { - DMERR("rejecting ioctl: data size modified while processing parameters"); + if (copy_from_user(&dmi->data, (char __user *)user + minimum_data_size, + param_kernel->data_size - minimum_data_size)) goto bad; - } - +data_copied: /* Wipe the user buffer so we do not return it to userspace */ if (secure_data && clear_user(user, param_kernel->data_size)) goto bad; -- GitLab From 229cbc6123b4b1d2d46956a869e6dbf6905265a4 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 17 Oct 2018 18:05:07 +0900 Subject: [PATCH 1430/2269] dm zoned: fix metadata block ref counting commit 33c2865f8d011a2ca9f67124ddab9dc89382e9f1 upstream. Since the ref field of struct dmz_mblock is always used with the spinlock of struct dmz_metadata locked, there is no need to use an atomic_t type. Change the type of the ref field to an unsigne integer. Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-zoned-metadata.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 70485de37b669..e1dce7f8f4fd6 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -99,7 +99,7 @@ struct dmz_mblock { struct rb_node node; struct list_head link; sector_t no; - atomic_t ref; + unsigned int ref; unsigned long state; struct page *page; void *data; @@ -296,7 +296,7 @@ static struct dmz_mblock *dmz_alloc_mblock(struct dmz_metadata *zmd, RB_CLEAR_NODE(&mblk->node); INIT_LIST_HEAD(&mblk->link); - atomic_set(&mblk->ref, 0); + mblk->ref = 0; mblk->state = 0; mblk->no = mblk_no; mblk->data = page_address(mblk->page); @@ -397,7 +397,7 @@ static struct dmz_mblock *dmz_fetch_mblock(struct dmz_metadata *zmd, return NULL; spin_lock(&zmd->mblk_lock); - atomic_inc(&mblk->ref); + mblk->ref++; set_bit(DMZ_META_READING, &mblk->state); dmz_insert_mblock(zmd, mblk); spin_unlock(&zmd->mblk_lock); @@ -484,7 +484,8 @@ static void dmz_release_mblock(struct dmz_metadata *zmd, spin_lock(&zmd->mblk_lock); - if (atomic_dec_and_test(&mblk->ref)) { + mblk->ref--; + if (mblk->ref == 0) { if (test_bit(DMZ_META_ERROR, &mblk->state)) { rb_erase(&mblk->node, &zmd->mblk_rbtree); dmz_free_mblock(zmd, mblk); @@ -511,7 +512,8 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd, mblk = dmz_lookup_mblock(zmd, mblk_no); if (mblk) { /* Cache hit: remove block from LRU list */ - if (atomic_inc_return(&mblk->ref) == 1 && + mblk->ref++; + if (mblk->ref == 1 && !test_bit(DMZ_META_DIRTY, &mblk->state)) list_del_init(&mblk->link); } @@ -753,7 +755,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) spin_lock(&zmd->mblk_lock); clear_bit(DMZ_META_DIRTY, &mblk->state); - if (atomic_read(&mblk->ref) == 0) + if (mblk->ref == 0) list_add_tail(&mblk->link, &zmd->mblk_lru_list); spin_unlock(&zmd->mblk_lock); } @@ -2308,7 +2310,7 @@ static void dmz_cleanup_metadata(struct dmz_metadata *zmd) mblk = list_first_entry(&zmd->mblk_dirty_list, struct dmz_mblock, link); dmz_dev_warn(zmd->dev, "mblock %llu still in dirty list (ref %u)", - (u64)mblk->no, atomic_read(&mblk->ref)); + (u64)mblk->no, mblk->ref); list_del_init(&mblk->link); rb_erase(&mblk->node, &zmd->mblk_rbtree); dmz_free_mblock(zmd, mblk); @@ -2326,8 +2328,8 @@ static void dmz_cleanup_metadata(struct dmz_metadata *zmd) root = &zmd->mblk_rbtree; rbtree_postorder_for_each_entry_safe(mblk, next, root, node) { dmz_dev_warn(zmd->dev, "mblock %llu ref %u still in rbtree", - (u64)mblk->no, atomic_read(&mblk->ref)); - atomic_set(&mblk->ref, 0); + (u64)mblk->no, mblk->ref); + mblk->ref = 0; dmz_free_mblock(zmd, mblk); } -- GitLab From 37531246704e708a5b3ee4a9498493cc4ff0c4bb Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 17 Oct 2018 18:05:08 +0900 Subject: [PATCH 1431/2269] dm zoned: fix various dmz_get_mblock() issues commit 3d4e738311327bc4ba1d55fbe2f1da3de4a475f9 upstream. dmz_fetch_mblock() called from dmz_get_mblock() has a race since the allocation of the new metadata block descriptor and its insertion in the cache rbtree with the READING state is not atomic. Two different contexts requesting the same block may end up each adding two different descriptors of the same block to the cache. Another problem for this function is that the BIO for processing the block read is allocated after the metadata block descriptor is inserted in the cache rbtree. If the BIO allocation fails, the metadata block descriptor is freed without first being removed from the rbtree. Fix the first problem by checking again if the requested block is not in the cache right before inserting the newly allocated descriptor, atomically under the mblk_lock spinlock. The second problem is fixed by simply allocating the BIO before inserting the new block in the cache. Finally, since dmz_fetch_mblock() also increments a block reference counter, rename the function to dmz_get_mblock_slow(). To be symmetric and clear, also rename dmz_lookup_mblock() to dmz_get_mblock_fast() and increment the block reference counter directly in that function rather than in dmz_get_mblock(). Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-zoned-metadata.c | 66 +++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 24 deletions(-) diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index e1dce7f8f4fd6..34968ca6b84a5 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -339,10 +339,11 @@ static void dmz_insert_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk) } /* - * Lookup a metadata block in the rbtree. + * Lookup a metadata block in the rbtree. If the block is found, increment + * its reference count. */ -static struct dmz_mblock *dmz_lookup_mblock(struct dmz_metadata *zmd, - sector_t mblk_no) +static struct dmz_mblock *dmz_get_mblock_fast(struct dmz_metadata *zmd, + sector_t mblk_no) { struct rb_root *root = &zmd->mblk_rbtree; struct rb_node *node = root->rb_node; @@ -350,8 +351,17 @@ static struct dmz_mblock *dmz_lookup_mblock(struct dmz_metadata *zmd, while (node) { mblk = container_of(node, struct dmz_mblock, node); - if (mblk->no == mblk_no) + if (mblk->no == mblk_no) { + /* + * If this is the first reference to the block, + * remove it from the LRU list. + */ + mblk->ref++; + if (mblk->ref == 1 && + !test_bit(DMZ_META_DIRTY, &mblk->state)) + list_del_init(&mblk->link); return mblk; + } node = (mblk->no < mblk_no) ? node->rb_left : node->rb_right; } @@ -382,32 +392,47 @@ static void dmz_mblock_bio_end_io(struct bio *bio) } /* - * Read a metadata block from disk. + * Read an uncached metadata block from disk and add it to the cache. */ -static struct dmz_mblock *dmz_fetch_mblock(struct dmz_metadata *zmd, - sector_t mblk_no) +static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd, + sector_t mblk_no) { - struct dmz_mblock *mblk; + struct dmz_mblock *mblk, *m; sector_t block = zmd->sb[zmd->mblk_primary].block + mblk_no; struct bio *bio; - /* Get block and insert it */ + /* Get a new block and a BIO to read it */ mblk = dmz_alloc_mblock(zmd, mblk_no); if (!mblk) return NULL; - spin_lock(&zmd->mblk_lock); - mblk->ref++; - set_bit(DMZ_META_READING, &mblk->state); - dmz_insert_mblock(zmd, mblk); - spin_unlock(&zmd->mblk_lock); - bio = bio_alloc(GFP_NOIO, 1); if (!bio) { dmz_free_mblock(zmd, mblk); return NULL; } + spin_lock(&zmd->mblk_lock); + + /* + * Make sure that another context did not start reading + * the block already. + */ + m = dmz_get_mblock_fast(zmd, mblk_no); + if (m) { + spin_unlock(&zmd->mblk_lock); + dmz_free_mblock(zmd, mblk); + bio_put(bio); + return m; + } + + mblk->ref++; + set_bit(DMZ_META_READING, &mblk->state); + dmz_insert_mblock(zmd, mblk); + + spin_unlock(&zmd->mblk_lock); + + /* Submit read BIO */ bio->bi_iter.bi_sector = dmz_blk2sect(block); bio_set_dev(bio, zmd->dev->bdev); bio->bi_private = mblk; @@ -509,19 +534,12 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd, /* Check rbtree */ spin_lock(&zmd->mblk_lock); - mblk = dmz_lookup_mblock(zmd, mblk_no); - if (mblk) { - /* Cache hit: remove block from LRU list */ - mblk->ref++; - if (mblk->ref == 1 && - !test_bit(DMZ_META_DIRTY, &mblk->state)) - list_del_init(&mblk->link); - } + mblk = dmz_get_mblock_fast(zmd, mblk_no); spin_unlock(&zmd->mblk_lock); if (!mblk) { /* Cache miss: read the block from disk */ - mblk = dmz_fetch_mblock(zmd, mblk_no); + mblk = dmz_get_mblock_slow(zmd, mblk_no); if (!mblk) return ERR_PTR(-ENOMEM); } -- GitLab From dfed53c894d5578031175d11bc56e3031ce0991b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 19 Oct 2018 06:12:50 +0000 Subject: [PATCH 1432/2269] powerpc/msi: Fix compile error on mpc83xx commit 0f99153def98134403c9149128e59d3e1786cf04 upstream. mpic_get_primary_version() is not defined when not using MPIC. The compile error log like: arch/powerpc/sysdev/built-in.o: In function `fsl_of_msi_probe': fsl_msi.c:(.text+0x150c): undefined reference to `fsl_mpic_primary_get_version' Signed-off-by: Jia Hongtao Signed-off-by: Scott Wood Reported-by: Radu Rendec Fixes: 807d38b73b6 ("powerpc/mpic: Add get_version API both for internal and external use") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/mpic.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h index fad8ddd697ac4..0abf2e7fd2226 100644 --- a/arch/powerpc/include/asm/mpic.h +++ b/arch/powerpc/include/asm/mpic.h @@ -393,7 +393,14 @@ extern struct bus_type mpic_subsys; #define MPIC_REGSET_TSI108 MPIC_REGSET(1) /* Tsi108/109 PIC */ /* Get the version of primary MPIC */ +#ifdef CONFIG_MPIC extern u32 fsl_mpic_primary_get_version(void); +#else +static inline u32 fsl_mpic_primary_get_version(void) +{ + return 0; +} +#endif /* Allocate the controller structure and setup the linux irq descs * for the range if interrupts passed in. No HW initialization is -- GitLab From d70d2e3c4ab8c29330b91b210514e32bee13ec1e Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sat, 27 Oct 2018 01:46:34 +0300 Subject: [PATCH 1433/2269] MIPS: OCTEON: fix out of bounds array access on CN68XX commit c0fae7e2452b90c31edd2d25eb3baf0c76b400ca upstream. The maximum number of interfaces is returned by cvmx_helper_get_number_of_interfaces(), and the value is used to access interface_port_count[]. When CN68XX support was added, we forgot to increase the array size. Fix that. Fixes: 2c8c3f0201333 ("MIPS: Octeon: Support additional interfaces on CN68XX") Signed-off-by: Aaro Koskinen Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/20949/ Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v4.3+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/cavium-octeon/executive/cvmx-helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper.c b/arch/mips/cavium-octeon/executive/cvmx-helper.c index f24be0b5db507..c683c369bca58 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-helper.c +++ b/arch/mips/cavium-octeon/executive/cvmx-helper.c @@ -67,7 +67,7 @@ void (*cvmx_override_pko_queue_priority) (int pko_port, void (*cvmx_override_ipd_port_setup) (int ipd_port); /* Port count per interface */ -static int interface_port_count[5]; +static int interface_port_count[9]; /** * Return the number of interfaces the chip has. Each interface -- GitLab From 34e0ab871c813d9f8939f30198a972d9a476c20c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 1 Oct 2018 12:42:49 +0100 Subject: [PATCH 1434/2269] iommu/arm-smmu: Ensure that page-table updates are visible before TLBI commit 7d321bd3542500caf125249f44dc37cb4e738013 upstream. The IO-pgtable code relies on the driver TLB invalidation callbacks to ensure that all page-table updates are visible to the IOMMU page-table walker. In the case that the page-table walker is cache-coherent, we cannot rely on an implicit DSB from the DMA-mapping code, so we must ensure that we execute a DSB in our tlb_add_flush() callback prior to triggering the invalidation. Cc: Cc: Robin Murphy Fixes: 2df7a25ce4a7 ("iommu/arm-smmu: Clean up DMA API usage") Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/arm-smmu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 2c436376f13eb..15b5856475fcd 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -475,6 +475,9 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx); + if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) + wmb(); + if (stage1) { reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA; @@ -516,6 +519,9 @@ static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size, struct arm_smmu_domain *smmu_domain = cookie; void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu); + if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) + wmb(); + writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID); } -- GitLab From e7f6b41ad2aab1453f0ff64b014027209e8ea632 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 3 Oct 2018 13:21:07 +0100 Subject: [PATCH 1435/2269] TC: Set DMA masks for devices commit 3f2aa244ee1a0d17ed5b6c86564d2c1b24d1c96b upstream. Fix a TURBOchannel support regression with commit 205e1b7f51e4 ("dma-mapping: warn when there is no coherent_dma_mask") that caused coherent DMA allocations to produce a warning such as: defxx: v1.11 2014/07/01 Lawrence V. Stefani and others tc1: DEFTA at MMIO addr = 0x1e900000, IRQ = 20, Hardware addr = 08-00-2b-a3-a3-29 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1 at ./include/linux/dma-mapping.h:516 dfx_dev_register+0x670/0x678 Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 4.19.0-rc6 #2 Stack : ffffffff8009ffc0 fffffffffffffec0 0000000000000000 ffffffff80647650 0000000000000000 0000000000000000 ffffffff806f5f80 ffffffffffffffff 0000000000000000 0000000000000000 0000000000000001 ffffffff8065d4e8 98000000031b6300 ffffffff80563478 ffffffff805685b0 ffffffffffffffff 0000000000000000 ffffffff805d6720 0000000000000204 ffffffff80388df8 0000000000000000 0000000000000009 ffffffff8053efd0 ffffffff806657d0 0000000000000000 ffffffff803177f8 0000000000000000 ffffffff806d0000 9800000003078000 980000000307b9e0 000000001e900000 ffffffff80067940 0000000000000000 ffffffff805d6720 0000000000000204 ffffffff80388df8 ffffffff805176c0 ffffffff8004dc78 0000000000000000 ffffffff80067940 ... Call Trace: [] show_stack+0xa0/0x130 [] __warn+0x128/0x170 ---[ end trace b1d1e094f67f3bb2 ]--- This is because the TURBOchannel bus driver fails to set the coherent DMA mask for devices enumerated. Set the regular and coherent DMA masks for TURBOchannel devices then, observing that the bus protocol supports a 34-bit (16GiB) DMA address space, by interpreting the value presented in the address cycle across the 32 `ad' lines as a 32-bit word rather than byte address[1]. The architectural size of the TURBOchannel DMA address space exceeds the maximum amount of RAM any actual TURBOchannel system in existence may have, hence both masks are the same. This removes the warning shown above. References: [1] "TURBOchannel Hardware Specification", EK-369AA-OD-007B, Digital Equipment Corporation, January 1993, Section "DMA", pp. 1-15 -- 1-17 Signed-off-by: Maciej W. Rozycki Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/20835/ Fixes: 205e1b7f51e4 ("dma-mapping: warn when there is no coherent_dma_mask") Cc: stable@vger.kernel.org # 4.16+ Cc: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- drivers/tc/tc.c | 8 +++++++- include/linux/tc.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/tc/tc.c b/drivers/tc/tc.c index 3be9519654e51..cf3fad2cb8714 100644 --- a/drivers/tc/tc.c +++ b/drivers/tc/tc.c @@ -2,7 +2,7 @@ * TURBOchannel bus services. * * Copyright (c) Harald Koerfgen, 1998 - * Copyright (c) 2001, 2003, 2005, 2006 Maciej W. Rozycki + * Copyright (c) 2001, 2003, 2005, 2006, 2018 Maciej W. Rozycki * Copyright (c) 2005 James Simmons * * This file is subject to the terms and conditions of the GNU @@ -10,6 +10,7 @@ * directory of this archive for more details. */ #include +#include #include #include #include @@ -92,6 +93,11 @@ static void __init tc_bus_add_devices(struct tc_bus *tbus) tdev->dev.bus = &tc_bus_type; tdev->slot = slot; + /* TURBOchannel has 34-bit DMA addressing (16GiB space). */ + tdev->dma_mask = DMA_BIT_MASK(34); + tdev->dev.dma_mask = &tdev->dma_mask; + tdev->dev.coherent_dma_mask = DMA_BIT_MASK(34); + for (i = 0; i < 8; i++) { tdev->firmware[i] = readb(module + offset + TC_FIRM_VER + 4 * i); diff --git a/include/linux/tc.h b/include/linux/tc.h index f92511e57cdbb..a60639f379639 100644 --- a/include/linux/tc.h +++ b/include/linux/tc.h @@ -84,6 +84,7 @@ struct tc_dev { device. */ struct device dev; /* Generic device interface. */ struct resource resource; /* Address space of this device. */ + u64 dma_mask; /* DMA addressable range. */ char vendor[9]; char name[9]; char firmware[9]; -- GitLab From fdff1cf08931439f5807dcf336947c2adaeec37d Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 8 Oct 2018 15:08:27 -0400 Subject: [PATCH 1436/2269] media: v4l2-tpg: fix kernel oops when enabling HFLIP and OSD commit 250854eed5d45a73d81e4137dfd85180af6f2ec3 upstream. When the OSD is on (i.e. vivid displays text on top of the test pattern), and you enable hflip, then the driver crashes. The cause turned out to be a division of a negative number by an unsigned value. You expect that -8 / 2U would be -4, but in reality it is 2147483644 :-( Fixes: 3e14e7a82c1ef ("vivid-tpg: add hor/vert downsampling support to tpg_gen_text") Signed-off-by: Hans Verkuil Reported-by: Mauro Carvalho Chehab Cc: # for v4.1 and up Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/common/v4l2-tpg/v4l2-tpg-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c b/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c index a772976cfe263..a1aacd6fb96fe 100644 --- a/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c +++ b/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c @@ -1765,7 +1765,7 @@ typedef struct { u16 __; u8 _; } __packed x24; pos[7] = (chr & (0x01 << 0) ? fg : bg); \ } \ \ - pos += (tpg->hflip ? -8 : 8) / hdiv; \ + pos += (tpg->hflip ? -8 : 8) / (int)hdiv; \ } \ } \ } while (0) -- GitLab From 8305d98c58d238b82c0def1e4395724381c58c9e Mon Sep 17 00:00:00 2001 From: He Zhe Date: Fri, 17 Aug 2018 22:42:28 +0800 Subject: [PATCH 1437/2269] kgdboc: Passing ekgdboc to command line causes panic commit 1bd54d851f50dea6af30c3e6ff4f3e9aab5558f9 upstream. kgdboc_option_setup does not check input argument before passing it to strlen. The argument would be a NULL pointer if "ekgdboc", without its value, is set in command line and thus cause the following panic. PANIC: early exception 0xe3 IP 10:ffffffff8fbbb620 error 0 cr2 0x0 [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 4.18-rc8+ #1 [ 0.000000] RIP: 0010:strlen+0x0/0x20 ... [ 0.000000] Call Trace [ 0.000000] ? kgdboc_option_setup+0x9/0xa0 [ 0.000000] ? kgdboc_early_init+0x6/0x1b [ 0.000000] ? do_early_param+0x4d/0x82 [ 0.000000] ? parse_args+0x212/0x330 [ 0.000000] ? rdinit_setup+0x26/0x26 [ 0.000000] ? parse_early_options+0x20/0x23 [ 0.000000] ? rdinit_setup+0x26/0x26 [ 0.000000] ? parse_early_param+0x2d/0x39 [ 0.000000] ? setup_arch+0x2f7/0xbf4 [ 0.000000] ? start_kernel+0x5e/0x4c2 [ 0.000000] ? load_ucode_bsp+0x113/0x12f [ 0.000000] ? secondary_startup_64+0xa5/0xb0 This patch adds a check to prevent the panic. Cc: stable@vger.kernel.org Cc: jason.wessel@windriver.com Cc: gregkh@linuxfoundation.org Cc: jslaby@suse.com Signed-off-by: He Zhe Reviewed-by: Daniel Thompson Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/kgdboc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c index a260cde743e27..2db68dfe497df 100644 --- a/drivers/tty/serial/kgdboc.c +++ b/drivers/tty/serial/kgdboc.c @@ -133,6 +133,11 @@ static void kgdboc_unregister_kbd(void) static int kgdboc_option_setup(char *opt) { + if (!opt) { + pr_err("kgdboc: config string not provided\n"); + return -EINVAL; + } + if (strlen(opt) >= MAX_CONFIG_LEN) { printk(KERN_ERR "kgdboc: config string too long\n"); return -ENOSPC; -- GitLab From 034680fcf1e96977d0e80ff8a6ceb9c1b5326bac Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 8 Nov 2018 08:35:06 +0100 Subject: [PATCH 1438/2269] xen: fix xen_qlock_wait() commit d3132b3860f6cf35ff7609a76bbcdbb814bd027c upstream. Commit a856531951dc80 ("xen: make xen_qlock_wait() nestable") introduced a regression for Xen guests running fully virtualized (HVM or PVH mode). The Xen hypervisor wouldn't return from the poll hypercall with interrupts disabled in case of an interrupt (for PV guests it does). So instead of disabling interrupts in xen_qlock_wait() use a nesting counter to avoid calling xen_clear_irq_pending() in case xen_qlock_wait() is nested. Fixes: a856531951dc80 ("xen: make xen_qlock_wait() nestable") Cc: stable@vger.kernel.org Reported-by: Sander Eikelenboom Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Tested-by: Sander Eikelenboom Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/spinlock.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 3243fe1b6ea4a..2527540051ff0 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -9,6 +9,7 @@ #include #include #include +#include #include @@ -20,6 +21,7 @@ static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; static DEFINE_PER_CPU(char *, irq_name); +static DEFINE_PER_CPU(atomic_t, xen_qlock_wait_nest); static bool xen_pvspin = true; #include @@ -40,25 +42,25 @@ static void xen_qlock_kick(int cpu) */ static void xen_qlock_wait(u8 *byte, u8 val) { - unsigned long flags; int irq = __this_cpu_read(lock_kicker_irq); + atomic_t *nest_cnt = this_cpu_ptr(&xen_qlock_wait_nest); /* If kicker interrupts not initialized yet, just spin */ if (irq == -1 || in_nmi()) return; - /* Guard against reentry. */ - local_irq_save(flags); + /* Detect reentry. */ + atomic_inc(nest_cnt); - /* If irq pending already clear it. */ - if (xen_test_irq_pending(irq)) { + /* If irq pending already and no nested call clear it. */ + if (atomic_read(nest_cnt) == 1 && xen_test_irq_pending(irq)) { xen_clear_irq_pending(irq); } else if (READ_ONCE(*byte) == val) { /* Block until irq becomes pending (or a spurious wakeup) */ xen_poll_irq(irq); } - local_irq_restore(flags); + atomic_dec(nest_cnt); } static irqreturn_t dummy_handler(int irq, void *dev_id) -- GitLab From 4e6d30de2e44fab869078194a6b1686659579a70 Mon Sep 17 00:00:00 2001 From: Manjunath Patil Date: Tue, 30 Oct 2018 09:49:21 -0700 Subject: [PATCH 1439/2269] xen-blkfront: fix kernel panic with negotiate_mq error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6cc4a0863c9709c512280c64e698d68443ac8053 upstream. info->nr_rings isn't adjusted in case of ENOMEM error from negotiate_mq(). This leads to kernel panic in error path. Typical call stack involving panic - #8 page_fault at ffffffff8175936f [exception RIP: blkif_free_ring+33] RIP: ffffffffa0149491 RSP: ffff8804f7673c08 RFLAGS: 00010292 ... #9 blkif_free at ffffffffa0149aaa [xen_blkfront] #10 talk_to_blkback at ffffffffa014c8cd [xen_blkfront] #11 blkback_changed at ffffffffa014ea8b [xen_blkfront] #12 xenbus_otherend_changed at ffffffff81424670 #13 backend_changed at ffffffff81426dc3 #14 xenwatch_thread at ffffffff81422f29 #15 kthread at ffffffff810abe6a #16 ret_from_fork at ffffffff81754078 Cc: stable@vger.kernel.org Fixes: 7ed8ce1c5fc7 ("xen-blkfront: move negotiate_mq to cover all cases of new VBDs") Signed-off-by: Manjunath Patil Acked-by: Roger Pau Monné Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- drivers/block/xen-blkfront.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 4b2dcb65b011f..32ac5f551e55f 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1910,6 +1910,7 @@ static int negotiate_mq(struct blkfront_info *info) info->rinfo = kzalloc(sizeof(struct blkfront_ring_info) * info->nr_rings, GFP_KERNEL); if (!info->rinfo) { xenbus_dev_fatal(info->xbdev, -ENOMEM, "allocating ring_info structure"); + info->nr_rings = 0; return -ENOMEM; } -- GitLab From 1f1eb8444ba04c530c2e6f024a77e94782a6279f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 13 Sep 2018 23:22:40 -0400 Subject: [PATCH 1440/2269] media: em28xx: use a default format if TRY_FMT fails commit f823ce2a1202d47110a7ef86b65839f0be8adc38 upstream. Follow the V4L2 spec, as warned by v4l2-compliance: warn: v4l2-test-formats.cpp(732): TRY_FMT cannot handle an invalid pixelformat. warn: v4l2-test-formats.cpp(733): This may or may not be a problem. For more information see: warn: v4l2-test-formats.cpp(734): http://www.mail-archive.com/linux-media@vger.kernel.org/msg56550.html Cc: stable@vger.kernel.org Fixes: bddcf63313c6 ("V4L/DVB (9927): em28xx: use a more standard way to specify video formats") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/em28xx/em28xx-video.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/usb/em28xx/em28xx-video.c b/drivers/media/usb/em28xx/em28xx-video.c index 8d253a5df0a9f..73d8ae3d9bcf8 100644 --- a/drivers/media/usb/em28xx/em28xx-video.c +++ b/drivers/media/usb/em28xx/em28xx-video.c @@ -1445,9 +1445,9 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *priv, fmt = format_by_fourcc(f->fmt.pix.pixelformat); if (!fmt) { - em28xx_videodbg("Fourcc format (%08x) invalid.\n", - f->fmt.pix.pixelformat); - return -EINVAL; + fmt = &format[0]; + em28xx_videodbg("Fourcc format (%08x) invalid. Using default (%08x).\n", + f->fmt.pix.pixelformat, fmt->fourcc); } if (dev->board.is_em2800) { -- GitLab From 3dfd975bb1c360932ec718bf83f65bad594e964c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 13 Sep 2018 16:49:51 -0400 Subject: [PATCH 1441/2269] media: tvp5150: avoid going past array on v4l2_querymenu() commit 5c4c4505b716cb782ad7263091edc466c4d1fbd4 upstream. The parameters of v4l2_ctrl_new_std_menu_items() are tricky: instead of the number of possible values, it requires the number of the maximum value. In other words, the ARRAY_SIZE() value should be decremented, otherwise it will go past the array bounds, as warned by KASAN: [ 279.839688] BUG: KASAN: global-out-of-bounds in v4l2_querymenu+0x10d/0x180 [videodev] [ 279.839709] Read of size 8 at addr ffffffffc10a4cb0 by task v4l2-compliance/16676 [ 279.839736] CPU: 1 PID: 16676 Comm: v4l2-compliance Not tainted 4.18.0-rc2+ #120 [ 279.839741] Hardware name: /NUC5i7RYB, BIOS RYBDWi35.86A.0364.2017.0511.0949 05/11/2017 [ 279.839743] Call Trace: [ 279.839758] dump_stack+0x71/0xab [ 279.839807] ? v4l2_querymenu+0x10d/0x180 [videodev] [ 279.839817] print_address_description+0x1c9/0x270 [ 279.839863] ? v4l2_querymenu+0x10d/0x180 [videodev] [ 279.839871] kasan_report+0x237/0x360 [ 279.839918] v4l2_querymenu+0x10d/0x180 [videodev] [ 279.839964] __video_do_ioctl+0x2c8/0x590 [videodev] [ 279.840011] ? copy_overflow+0x20/0x20 [videodev] [ 279.840020] ? avc_ss_reset+0xa0/0xa0 [ 279.840028] ? check_stack_object+0x21/0x60 [ 279.840036] ? __check_object_size+0xe7/0x240 [ 279.840080] video_usercopy+0xed/0x730 [videodev] [ 279.840123] ? copy_overflow+0x20/0x20 [videodev] [ 279.840167] ? v4l_enumstd+0x40/0x40 [videodev] [ 279.840177] ? __handle_mm_fault+0x9f9/0x1ba0 [ 279.840186] ? __pmd_alloc+0x2c0/0x2c0 [ 279.840193] ? __vfs_write+0xb6/0x350 [ 279.840200] ? kernel_read+0xa0/0xa0 [ 279.840244] ? video_usercopy+0x730/0x730 [videodev] [ 279.840284] v4l2_ioctl+0xa1/0xb0 [videodev] [ 279.840295] do_vfs_ioctl+0x117/0x8a0 [ 279.840303] ? selinux_file_ioctl+0x211/0x2f0 [ 279.840313] ? ioctl_preallocate+0x120/0x120 [ 279.840319] ? selinux_capable+0x20/0x20 [ 279.840332] ksys_ioctl+0x70/0x80 [ 279.840342] __x64_sys_ioctl+0x3d/0x50 [ 279.840351] do_syscall_64+0x6d/0x1c0 [ 279.840361] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 279.840367] RIP: 0033:0x7fdfb46275d7 [ 279.840369] Code: b3 66 90 48 8b 05 b1 48 2d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 81 48 2d 00 f7 d8 64 89 01 48 [ 279.840474] RSP: 002b:00007ffee1179038 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 [ 279.840483] RAX: ffffffffffffffda RBX: 00007ffee1179180 RCX: 00007fdfb46275d7 [ 279.840488] RDX: 00007ffee11790c0 RSI: 00000000c02c5625 RDI: 0000000000000003 [ 279.840493] RBP: 0000000000000002 R08: 0000000000000020 R09: 00000000009f0902 [ 279.840497] R10: 0000000000000000 R11: 0000000000000202 R12: 00007ffee117a5a0 [ 279.840501] R13: 00007ffee11790c0 R14: 0000000000000002 R15: 0000000000000000 [ 279.840515] The buggy address belongs to the variable: [ 279.840535] tvp5150_test_patterns+0x10/0xffffffffffffe360 [tvp5150] Fixes: c43875f66140 ("[media] tvp5150: replace MEDIA_ENT_F_CONN_TEST by a control") Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/tvp5150.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/i2c/tvp5150.c b/drivers/media/i2c/tvp5150.c index 59b0c1fce9be7..4d3e97f97c76b 100644 --- a/drivers/media/i2c/tvp5150.c +++ b/drivers/media/i2c/tvp5150.c @@ -1530,7 +1530,7 @@ static int tvp5150_probe(struct i2c_client *c, 27000000, 1, 27000000); v4l2_ctrl_new_std_menu_items(&core->hdl, &tvp5150_ctrl_ops, V4L2_CID_TEST_PATTERN, - ARRAY_SIZE(tvp5150_test_patterns), + ARRAY_SIZE(tvp5150_test_patterns) - 1, 0, 0, tvp5150_test_patterns); sd->ctrl_handler = &core->hdl; if (core->hdl.error) { -- GitLab From a4e311ed2390394d32806d5fcca5542505f5f47f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 14 Sep 2018 00:20:21 -0400 Subject: [PATCH 1442/2269] media: em28xx: fix input name for Terratec AV 350 commit 15644bfa195bd166d0a5ed76ae2d587f719c3dac upstream. Instead of using a register value, use an AMUX name, as otherwise VIDIOC_G_AUDIO would fail. Cc: stable@vger.kernel.org Fixes: 766ed64de554 ("V4L/DVB (11827): Add support for Terratec Grabster AV350") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/em28xx/em28xx-cards.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/usb/em28xx/em28xx-cards.c b/drivers/media/usb/em28xx/em28xx-cards.c index 11a59854a0a62..9747e23aad271 100644 --- a/drivers/media/usb/em28xx/em28xx-cards.c +++ b/drivers/media/usb/em28xx/em28xx-cards.c @@ -2112,13 +2112,13 @@ struct em28xx_board em28xx_boards[] = { .input = { { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, - .amux = EM28XX_AUDIO_SRC_LINE, + .amux = EM28XX_AMUX_LINE_IN, .gpio = terratec_av350_unmute_gpio, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, - .amux = EM28XX_AUDIO_SRC_LINE, + .amux = EM28XX_AMUX_LINE_IN, .gpio = terratec_av350_unmute_gpio, } }, }, -- GitLab From 453a8a459cc2cd6c975449c04e0720fe118e4056 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 13 Sep 2018 22:46:29 -0400 Subject: [PATCH 1443/2269] media: em28xx: make v4l2-compliance happier by starting sequence on zero commit afeaade90db4c5dab93f326d9582be1d5954a198 upstream. The v4l2-compliance tool complains if a video doesn't start with a zero sequence number. While this shouldn't cause any real problem for apps, let's make it happier, in order to better check the v4l2-compliance differences before and after patchsets. This is actually an old issue. It is there since at least its videobuf2 conversion, e. g. changeset 3829fadc461 ("[media] em28xx: convert to videobuf2"), if VB1 wouldn't suffer from the same issue. Cc: stable@vger.kernel.org Fixes: d3829fadc461 ("[media] em28xx: convert to videobuf2") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/em28xx/em28xx-video.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/usb/em28xx/em28xx-video.c b/drivers/media/usb/em28xx/em28xx-video.c index 73d8ae3d9bcf8..92a74bc34527b 100644 --- a/drivers/media/usb/em28xx/em28xx-video.c +++ b/drivers/media/usb/em28xx/em28xx-video.c @@ -900,6 +900,8 @@ static int em28xx_enable_analog_tuner(struct em28xx *dev) if (!mdev || !v4l2->decoder) return 0; + dev->v4l2->field_count = 0; + /* * This will find the tuner that is connected into the decoder. * Technically, this is not 100% correct, as the device may be -- GitLab From baf1746da59e310f8e7d016090e2d447e867828a Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 13 Sep 2018 07:47:28 -0400 Subject: [PATCH 1444/2269] media: media colorspaces*.rst: rename AdobeRGB to opRGB commit a58c37978cf02f6d35d05ee4e9288cb8455f1401 upstream. Drop all Adobe references and use the official opRGB standard instead. Signed-off-by: Hans Verkuil Cc: stable@vger.kernel.org Acked-by: Daniel Vetter Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- Documentation/media/uapi/v4l/biblio.rst | 10 ---------- Documentation/media/uapi/v4l/colorspaces-defs.rst | 8 ++++---- .../media/uapi/v4l/colorspaces-details.rst | 13 ++++++------- 3 files changed, 10 insertions(+), 21 deletions(-) diff --git a/Documentation/media/uapi/v4l/biblio.rst b/Documentation/media/uapi/v4l/biblio.rst index 1cedcfc043273..386d6cf83e9ca 100644 --- a/Documentation/media/uapi/v4l/biblio.rst +++ b/Documentation/media/uapi/v4l/biblio.rst @@ -226,16 +226,6 @@ xvYCC :author: International Electrotechnical Commission (http://www.iec.ch) -.. _adobergb: - -AdobeRGB -======== - - -:title: Adobe© RGB (1998) Color Image Encoding Version 2005-05 - -:author: Adobe Systems Incorporated (http://www.adobe.com) - .. _oprgb: opRGB diff --git a/Documentation/media/uapi/v4l/colorspaces-defs.rst b/Documentation/media/uapi/v4l/colorspaces-defs.rst index 410907fe9415e..f24615544792b 100644 --- a/Documentation/media/uapi/v4l/colorspaces-defs.rst +++ b/Documentation/media/uapi/v4l/colorspaces-defs.rst @@ -51,8 +51,8 @@ whole range, 0-255, dividing the angular value by 1.41. The enum - See :ref:`col-rec709`. * - ``V4L2_COLORSPACE_SRGB`` - See :ref:`col-srgb`. - * - ``V4L2_COLORSPACE_ADOBERGB`` - - See :ref:`col-adobergb`. + * - ``V4L2_COLORSPACE_OPRGB`` + - See :ref:`col-oprgb`. * - ``V4L2_COLORSPACE_BT2020`` - See :ref:`col-bt2020`. * - ``V4L2_COLORSPACE_DCI_P3`` @@ -90,8 +90,8 @@ whole range, 0-255, dividing the angular value by 1.41. The enum - Use the Rec. 709 transfer function. * - ``V4L2_XFER_FUNC_SRGB`` - Use the sRGB transfer function. - * - ``V4L2_XFER_FUNC_ADOBERGB`` - - Use the AdobeRGB transfer function. + * - ``V4L2_XFER_FUNC_OPRGB`` + - Use the opRGB transfer function. * - ``V4L2_XFER_FUNC_SMPTE240M`` - Use the SMPTE 240M transfer function. * - ``V4L2_XFER_FUNC_NONE`` diff --git a/Documentation/media/uapi/v4l/colorspaces-details.rst b/Documentation/media/uapi/v4l/colorspaces-details.rst index b5d551b9cc8f8..09fabf4cd4126 100644 --- a/Documentation/media/uapi/v4l/colorspaces-details.rst +++ b/Documentation/media/uapi/v4l/colorspaces-details.rst @@ -290,15 +290,14 @@ Y' is clamped to the range [0…1] and Cb and Cr are clamped to the range 170M/BT.601. The Y'CbCr quantization is limited range. -.. _col-adobergb: +.. _col-oprgb: -Colorspace Adobe RGB (V4L2_COLORSPACE_ADOBERGB) +Colorspace opRGB (V4L2_COLORSPACE_OPRGB) =============================================== -The :ref:`adobergb` standard defines the colorspace used by computer -graphics that use the AdobeRGB colorspace. This is also known as the -:ref:`oprgb` standard. The default transfer function is -``V4L2_XFER_FUNC_ADOBERGB``. The default Y'CbCr encoding is +The :ref:`oprgb` standard defines the colorspace used by computer +graphics that use the opRGB colorspace. The default transfer function is +``V4L2_XFER_FUNC_OPRGB``. The default Y'CbCr encoding is ``V4L2_YCBCR_ENC_601``. The default Y'CbCr quantization is limited range. @@ -312,7 +311,7 @@ The chromaticities of the primary colors and the white reference are: .. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}| -.. flat-table:: Adobe RGB Chromaticities +.. flat-table:: opRGB Chromaticities :header-rows: 1 :stub-columns: 0 :widths: 1 1 2 -- GitLab From ad2c712b50597ab70cdf9c75101518a6837d0e69 Mon Sep 17 00:00:00 2001 From: Tri Vo Date: Wed, 19 Sep 2018 12:27:50 -0700 Subject: [PATCH 1445/2269] arm64: lse: remove -fcall-used-x0 flag commit 2a6c7c367de82951c98a290a21156770f6f82c84 upstream. x0 is not callee-saved in the PCS. So there is no need to specify -fcall-used-x0. Clang doesn't currently support -fcall-used flags. This patch will help building the kernel with clang. Tested-by: Nick Desaulniers Acked-by: Will Deacon Signed-off-by: Tri Vo Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 9a8cb96555d6b..9a947afaf74c0 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -12,7 +12,7 @@ lib-y := bitops.o clear_user.o delay.o copy_from_user.o \ # when supported by the CPU. Result and argument registers are handled # correctly, based on the function prototype. lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o -CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2 \ +CFLAGS_atomic_ll_sc.o := -ffixed-x1 -ffixed-x2 \ -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 \ -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9 \ -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12 \ -- GitLab From 7f46d951fd3389089500623a9f026e4d25be823a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 27 Sep 2018 22:36:27 +0100 Subject: [PATCH 1446/2269] rpmsg: smd: fix memory leak on channel create commit 940c620d6af8fca7d115de40f19870fba415efac upstream. Currently a failed allocation of channel->name leads to an immediate return without freeing channel. Fix this by setting ret to -ENOMEM and jumping to an exit path that kfree's channel. Detected by CoverityScan, CID#1473692 ("Resource Leak") Fixes: 53e2822e56c7 ("rpmsg: Introduce Qualcomm SMD backend") Cc: stable@vger.kernel.org Signed-off-by: Colin Ian King Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- drivers/rpmsg/qcom_smd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/rpmsg/qcom_smd.c b/drivers/rpmsg/qcom_smd.c index f1a2147a6d842..72d02bfeda9e9 100644 --- a/drivers/rpmsg/qcom_smd.c +++ b/drivers/rpmsg/qcom_smd.c @@ -1049,8 +1049,10 @@ static struct qcom_smd_channel *qcom_smd_create_channel(struct qcom_smd_edge *ed channel->edge = edge; channel->name = kstrdup(name, GFP_KERNEL); - if (!channel->name) - return ERR_PTR(-ENOMEM); + if (!channel->name) { + ret = -ENOMEM; + goto free_channel; + } mutex_init(&channel->tx_lock); spin_lock_init(&channel->recv_lock); @@ -1099,6 +1101,7 @@ static struct qcom_smd_channel *qcom_smd_create_channel(struct qcom_smd_edge *ed free_name_and_channel: kfree(channel->name); +free_channel: kfree(channel); return ERR_PTR(ret); -- GitLab From fdd780d0a26070e59fe2c3690a776aad25227756 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 30 Oct 2018 13:26:15 -0400 Subject: [PATCH 1447/2269] Cramfs: fix abad comparison when wrap-arounds occur commit 672ca9dd13f1aca0c17516f76fc5b0e8344b3e46 upstream. It is possible for corrupted filesystem images to produce very large block offsets that may wrap when a length is added, and wrongly pass the buffer size test. Reported-by: Anatoly Trosinenko Signed-off-by: Nicolas Pitre Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/cramfs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 7919967488cbd..011c6f53dcda3 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -186,7 +186,8 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i continue; blk_offset = (blocknr - buffer_blocknr[i]) << PAGE_SHIFT; blk_offset += offset; - if (blk_offset + len > BUFFER_SIZE) + if (blk_offset > BUFFER_SIZE || + blk_offset + len > BUFFER_SIZE) continue; return read_buffers[i] + blk_offset; } -- GitLab From 540631d9e690431fdc214b98e2f623838d84b03c Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Tue, 25 Sep 2018 10:21:10 -0500 Subject: [PATCH 1448/2269] ARM: dts: socfpga: Fix SDRAM node address for Arria10 commit ce3bf934f919a7d675c5b7fa4cc233ded9c6256e upstream. The address in the SDRAM node was incorrect. Fix this to agree with the correct address and to match the reg definition block. Cc: stable@vger.kernel.org Fixes: 54b4a8f57848b("arm: socfpga: dts: Add Arria10 SDRAM EDAC DTS support") Signed-off-by: Thor Thayer Signed-off-by: Dinh Nguyen Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/socfpga_arria10.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/socfpga_arria10.dtsi b/arch/arm/boot/dts/socfpga_arria10.dtsi index 791ca15c799eb..bd1985694bcae 100644 --- a/arch/arm/boot/dts/socfpga_arria10.dtsi +++ b/arch/arm/boot/dts/socfpga_arria10.dtsi @@ -601,7 +601,7 @@ status = "disabled"; }; - sdr: sdr@ffc25000 { + sdr: sdr@ffcfb100 { compatible = "altr,sdr-ctl", "syscon"; reg = <0xffcfb100 0x80>; }; -- GitLab From 1258e45dc460aa29dd70fd715fd1624b7ab93ef1 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Tue, 25 Sep 2018 10:31:52 -0500 Subject: [PATCH 1449/2269] arm64: dts: stratix10: Correct System Manager register size commit 74121b9aa3cd571ddfff014a9f47db36cae3cda9 upstream. Correct the register size of the System Manager node. Cc: stable@vger.kernel.org Fixes: 78cd6a9d8e154 ("arm64: dts: Add base stratix 10 dtsi") Signed-off-by: Thor Thayer Signed-off-by: Dinh Nguyen Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index c2b9bcb0ef61a..e79f3defe0021 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -249,7 +249,7 @@ sysmgr: sysmgr@ffd12000 { compatible = "altr,sys-mgr", "syscon"; - reg = <0xffd12000 0x1000>; + reg = <0xffd12000 0x228>; }; /* Local timer */ -- GitLab From 59bc2d3a68ad99c72972caadfa350e758af65e8b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 15 Nov 2017 10:44:58 +0100 Subject: [PATCH 1450/2269] soc/tegra: pmc: Fix child-node lookup commit 1dc6bd5e39a29453bdcc17348dd2a89f1aa4004e upstream. Fix child-node lookup during probe, which ended up searching the whole device tree depth-first starting at the parent rather than just matching on its children. To make things worse, the parent pmc node could end up being prematurely freed as of_find_node_by_name() drops a reference to its first argument. Fixes: 3568df3d31d6 ("soc: tegra: Add thermal reset (thermtrip) support to PMC") Cc: stable # 4.0 Cc: Mikko Perttunen Signed-off-by: Johan Hovold Reviewed-by: Mikko Perttunen Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/soc/tegra/pmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c index 0453ff6839a7e..7e9ef3431bea4 100644 --- a/drivers/soc/tegra/pmc.c +++ b/drivers/soc/tegra/pmc.c @@ -1321,7 +1321,7 @@ static void tegra_pmc_init_tsense_reset(struct tegra_pmc *pmc) if (!pmc->soc->has_tsense_reset) return; - np = of_find_node_by_name(pmc->dev->of_node, "i2c-thermtrip"); + np = of_get_child_by_name(pmc->dev->of_node, "i2c-thermtrip"); if (!np) { dev_warn(dev, "i2c-thermtrip node not found, %s.\n", disabled); return; -- GitLab From e09399e8047e4cf7d6df0b3029ce4b13ab5970eb Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 22 Oct 2018 18:16:26 -0300 Subject: [PATCH 1451/2269] selftests/powerpc: Fix ptrace tm failure commit 48dc0ef19044bfb69193302fbe3a834e3331b7ae upstream. Test ptrace-tm-spd-gpr fails on current kernel (4.19) due to a segmentation fault that happens on the child process prior to setting cptr[2] = 1. This causes the parent process to wait forever at 'while (!pptr[2])' and the test to be killed by the test harness framework by timeout, thus, failing. The segmentation fault happens because of a inline assembly being generated as: 0x10000355c lfs f0, 0(0) This is reading memory position 0x0 and causing the segmentation fault. This code is being generated by ASM_LOAD_FPR_SINGLE_PRECISION(flt_4), where flt_4 is passed to the inline assembly block as: [flt_4] "r" (&d) Since the inline assembly 'r' constraint means any GPR, gpr0 is being chosen, thus causing this issue when issuing a Load Floating-Point Single instruction. This patch simply changes the constraint to 'b', which specify that this register will be used as base, and r0 is not allowed to be used, avoiding this issue. Other than that, removing flt_2 register from the input operands, since it is not used by the inline assembly code at all. Cc: stable@vger.kernel.org Signed-off-by: Breno Leitao Acked-by: Segher Boessenkool Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c index 327fa943c7f36..dbdffa2e2c824 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c @@ -67,8 +67,8 @@ trans: "3: ;" : [res] "=r" (result), [texasr] "=r" (texasr) : [gpr_1]"i"(GPR_1), [gpr_2]"i"(GPR_2), [gpr_4]"i"(GPR_4), - [sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "r" (&a), - [flt_2] "r" (&b), [flt_4] "r" (&d) + [sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "b" (&a), + [flt_4] "b" (&d) : "memory", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", -- GitLab From 2f7e525a25e10e66f87b2329b1b8ab4d311e5f4e Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 9 Oct 2018 14:36:45 +0800 Subject: [PATCH 1452/2269] btrfs: qgroup: Avoid calling qgroup functions if qgroup is not enabled commit 3628b4ca64f24a4ec55055597d0cb1c814729f8b upstream. Some qgroup trace events like btrfs_qgroup_release_data() and btrfs_qgroup_free_delayed_ref() can still be triggered even if qgroup is not enabled. This is caused by the lack of qgroup status check before calling some qgroup functions. Thankfully the functions can handle quota disabled case well and just do nothing for qgroup disabled case. This patch will do earlier check before triggering related trace events. And for enabled <-> disabled race case: 1) For enabled->disabled case Disable will wipe out all qgroups data including reservation and excl/rfer. Even if we leak some reservation or numbers, it will still be cleared, so nothing will go wrong. 2) For disabled -> enabled case Current btrfs_qgroup_release_data() will use extent_io tree to ensure we won't underflow reservation. And for delayed_ref we use head->qgroup_reserved to record the reserved space, so in that case head->qgroup_reserved should be 0 and we won't underflow. CC: stable@vger.kernel.org # 4.14+ Reported-by: Chris Murphy Link: https://lore.kernel.org/linux-btrfs/CAJCQCtQau7DtuUUeycCkZ36qjbKuxNzsgqJ7+sJ6W0dK_NLE3w@mail.gmail.com/ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/qgroup.c | 4 ++++ fs/btrfs/qgroup.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 47dec283628dd..1b647fa932091 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2972,6 +2972,10 @@ static int __btrfs_qgroup_release_data(struct inode *inode, int trace_op = QGROUP_RELEASE; int ret; + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, + &BTRFS_I(inode)->root->fs_info->flags)) + return 0; + /* In release case, we shouldn't have @reserved */ WARN_ON(!free && reserved); if (free && reserved) diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index d9984e87cddfc..83483ade3b19d 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -232,6 +232,8 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info, u64 ref_root, u64 num_bytes) { + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) + return; trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes); btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes); } -- GitLab From 61e3643dc51e8368a8042252fa1b3cca00784923 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 21 Aug 2018 09:42:03 +0800 Subject: [PATCH 1453/2269] btrfs: Handle owner mismatch gracefully when walking up tree commit 65c6e82becec33731f48786e5a30f98662c86b16 upstream. [BUG] When mounting certain crafted image, btrfs will trigger kernel BUG_ON() when trying to recover balance: kernel BUG at fs/btrfs/extent-tree.c:8956! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 1 PID: 662 Comm: mount Not tainted 4.18.0-rc1-custom+ #10 RIP: 0010:walk_up_proc+0x336/0x480 [btrfs] RSP: 0018:ffffb53540c9b890 EFLAGS: 00010202 Call Trace: walk_up_tree+0x172/0x1f0 [btrfs] btrfs_drop_snapshot+0x3a4/0x830 [btrfs] merge_reloc_roots+0xe1/0x1d0 [btrfs] btrfs_recover_relocation+0x3ea/0x420 [btrfs] open_ctree+0x1af3/0x1dd0 [btrfs] btrfs_mount_root+0x66b/0x740 [btrfs] mount_fs+0x3b/0x16a vfs_kern_mount.part.9+0x54/0x140 btrfs_mount+0x16d/0x890 [btrfs] mount_fs+0x3b/0x16a vfs_kern_mount.part.9+0x54/0x140 do_mount+0x1fd/0xda0 ksys_mount+0xba/0xd0 __x64_sys_mount+0x21/0x30 do_syscall_64+0x60/0x210 entry_SYSCALL_64_after_hwframe+0x49/0xbe [CAUSE] Extent tree corruption. In this particular case, reloc tree root's owner is DATA_RELOC_TREE (should be TREE_RELOC), thus its backref is corrupted and we failed the owner check in walk_up_tree(). [FIX] It's pretty hard to take care of every extent tree corruption, but at least we can remove such BUG_ON() and exit more gracefully. And since in this particular image, DATA_RELOC_TREE and TREE_RELOC share the same root (which is obviously invalid), we needs to make __del_reloc_root() more robust to detect such invalid sharing to avoid possible NULL dereference as root->node can be NULL in this case. Link: https://bugzilla.kernel.org/show_bug.cgi?id=200411 Reported-by: Xu Wen CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 18 ++++++++++++------ fs/btrfs/relocation.c | 2 +- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f96f72659693a..1fe62225bc31f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9028,15 +9028,14 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, if (eb == root->node) { if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) parent = eb->start; - else - BUG_ON(root->root_key.objectid != - btrfs_header_owner(eb)); + else if (root->root_key.objectid != btrfs_header_owner(eb)) + goto owner_mismatch; } else { if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF) parent = path->nodes[level + 1]->start; - else - BUG_ON(root->root_key.objectid != - btrfs_header_owner(path->nodes[level + 1])); + else if (root->root_key.objectid != + btrfs_header_owner(path->nodes[level + 1])) + goto owner_mismatch; } btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); @@ -9044,6 +9043,11 @@ out: wc->refs[level] = 0; wc->flags[level] = 0; return 0; + +owner_mismatch: + btrfs_err_rl(fs_info, "unexpected tree owner, have %llu expect %llu", + btrfs_header_owner(eb), root->root_key.objectid); + return -EUCLEAN; } static noinline int walk_down_tree(struct btrfs_trans_handle *trans, @@ -9097,6 +9101,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, ret = walk_up_proc(trans, root, path, wc); if (ret > 0) return 0; + if (ret < 0) + return ret; if (path->locks[level]) { btrfs_tree_unlock_rw(path->nodes[level], diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index b80b03e0c5d3f..eeae2c3ab17e5 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1334,7 +1334,7 @@ static void __del_reloc_root(struct btrfs_root *root) struct mapping_node *node = NULL; struct reloc_control *rc = fs_info->reloc_ctl; - if (rc) { + if (rc && root->node) { spin_lock(&rc->reloc_root_tree.lock); rb_node = tree_search(&rc->reloc_root_tree.rb_root, root->node->start); -- GitLab From d36ac4a32275f6c551c9e2d0284b3e0ef79623ec Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 21 Aug 2018 09:53:47 +0800 Subject: [PATCH 1454/2269] btrfs: locking: Add extra check in btrfs_init_new_buffer() to avoid deadlock commit b72c3aba09a53fc7c1824250d71180ca154517a7 upstream. [BUG] For certain crafted image, whose csum root leaf has missing backref, if we try to trigger write with data csum, it could cause deadlock with the following kernel WARN_ON(): WARNING: CPU: 1 PID: 41 at fs/btrfs/locking.c:230 btrfs_tree_lock+0x3e2/0x400 CPU: 1 PID: 41 Comm: kworker/u4:1 Not tainted 4.18.0-rc1+ #8 Workqueue: btrfs-endio-write btrfs_endio_write_helper RIP: 0010:btrfs_tree_lock+0x3e2/0x400 Call Trace: btrfs_alloc_tree_block+0x39f/0x770 __btrfs_cow_block+0x285/0x9e0 btrfs_cow_block+0x191/0x2e0 btrfs_search_slot+0x492/0x1160 btrfs_lookup_csum+0xec/0x280 btrfs_csum_file_blocks+0x2be/0xa60 add_pending_csums+0xaf/0xf0 btrfs_finish_ordered_io+0x74b/0xc90 finish_ordered_fn+0x15/0x20 normal_work_helper+0xf6/0x500 btrfs_endio_write_helper+0x12/0x20 process_one_work+0x302/0x770 worker_thread+0x81/0x6d0 kthread+0x180/0x1d0 ret_from_fork+0x35/0x40 [CAUSE] That crafted image has missing backref for csum tree root leaf. And when we try to allocate new tree block, since there is no EXTENT/METADATA_ITEM for csum tree root, btrfs consider it's free slot and use it. The extent tree of the image looks like: Normal image | This fuzzed image ----------------------------------+-------------------------------- BG 29360128 | BG 29360128 One empty slot | One empty slot 29364224: backref to UUID tree | 29364224: backref to UUID tree Two empty slots | Two empty slots 29376512: backref to CSUM tree | One empty slot (bad type) <<< 29380608: backref to D_RELOC tree | 29380608: backref to D_RELOC tree ... | ... Since bytenr 29376512 has no METADATA/EXTENT_ITEM, when btrfs try to alloc tree block, it's an valid slot for btrfs. And for finish_ordered_write, when we need to insert csum, we try to CoW csum tree root. By accident, empty slots at bytenr BG_OFFSET, BG_OFFSET + 8K, BG_OFFSET + 12K is already used by tree block COW for other trees, the next empty slot is BG_OFFSET + 16K, which should be the backref for CSUM tree. But due to the bad type, btrfs can recognize it and still consider it as an empty slot, and will try to use it for csum tree CoW. Then in the following call trace, we will try to lock the new tree block, which turns out to be the old csum tree root which is already locked: btrfs_search_slot() called on csum tree root, which is at 29376512 |- btrfs_cow_block() |- btrfs_set_lock_block() | |- Now locks tree block 29376512 (old csum tree root) |- __btrfs_cow_block() |- btrfs_alloc_tree_block() |- btrfs_reserve_extent() | Now it returns tree block 29376512, which extent tree | shows its empty slot, but it's already hold by csum tree |- btrfs_init_new_buffer() |- btrfs_tree_lock() | Triggers WARN_ON(eb->lock_owner == current->pid) |- wait_event() Wait lock owner to release the lock, but it's locked by ourself, so it will deadlock [FIX] This patch will do the lock_owner and current->pid check at btrfs_init_new_buffer(). So above deadlock can be avoided. Since such problem can only happen in crafted image, we will still trigger kernel warning for later aborted transaction, but with a little more meaningful warning message. Link: https://bugzilla.kernel.org/show_bug.cgi?id=200405 Reported-by: Xu Wen CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1fe62225bc31f..e6c5cfd051a8e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8398,6 +8398,19 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (IS_ERR(buf)) return buf; + /* + * Extra safety check in case the extent tree is corrupted and extent + * allocator chooses to use a tree block which is already used and + * locked. + */ + if (buf->lock_owner == current->pid) { + btrfs_err_rl(fs_info, +"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected", + buf->start, btrfs_header_owner(buf), current->pid); + free_extent_buffer(buf); + return ERR_PTR(-EUCLEAN); + } + btrfs_set_header_generation(buf, trans->transid); btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); btrfs_tree_lock(buf); -- GitLab From e2fc220f5cfd10ca7cd74ef622e8906287db1c7e Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 6 Sep 2018 16:59:33 -0400 Subject: [PATCH 1455/2269] btrfs: fix error handling in free_log_tree commit 374b0e2d6ba5da7fd1cadb3247731ff27d011f6f upstream. When we hit an I/O error in free_log_tree->walk_log_tree during file system shutdown we can crash due to there not being a valid transaction handle. Use btrfs_handle_fs_error when there's no transaction handle to use. BUG: unable to handle kernel NULL pointer dereference at 0000000000000060 IP: free_log_tree+0xd2/0x140 [btrfs] PGD 0 P4D 0 Oops: 0000 [#1] SMP DEBUG_PAGEALLOC PTI Modules linked in: CPU: 2 PID: 23544 Comm: umount Tainted: G W 4.12.14-kvmsmall #9 SLE15 (unreleased) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014 task: ffff96bfd3478880 task.stack: ffffa7cf40d78000 RIP: 0010:free_log_tree+0xd2/0x140 [btrfs] RSP: 0018:ffffa7cf40d7bd10 EFLAGS: 00010282 RAX: 00000000fffffffb RBX: 00000000fffffffb RCX: 0000000000000002 RDX: 0000000000000000 RSI: ffff96c02f07d4c8 RDI: 0000000000000282 RBP: ffff96c013cf1000 R08: ffff96c02f07d4c8 R09: ffff96c02f07d4d0 R10: 0000000000000000 R11: 0000000000000002 R12: 0000000000000000 R13: ffff96c005e800c0 R14: ffffa7cf40d7bdb8 R15: 0000000000000000 FS: 00007f17856bcfc0(0000) GS:ffff96c03f600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000060 CR3: 0000000045ed6002 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? wait_for_writer+0xb0/0xb0 [btrfs] btrfs_free_log+0x17/0x30 [btrfs] btrfs_drop_and_free_fs_root+0x9a/0xe0 [btrfs] btrfs_free_fs_roots+0xc0/0x130 [btrfs] ? wait_for_completion+0xf2/0x100 close_ctree+0xea/0x2e0 [btrfs] ? kthread_stop+0x161/0x260 generic_shutdown_super+0x6c/0x120 kill_anon_super+0xe/0x20 btrfs_kill_super+0x13/0x100 [btrfs] deactivate_locked_super+0x3f/0x70 cleanup_mnt+0x3b/0x70 task_work_run+0x78/0x90 exit_to_usermode_loop+0x77/0xa6 do_syscall_64+0x1c5/0x1e0 entry_SYSCALL_64_after_hwframe+0x42/0xb7 RIP: 0033:0x7f1784f90827 RSP: 002b:00007ffdeeb03118 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 0000556a60c62970 RCX: 00007f1784f90827 RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000556a60c62b50 RBP: 0000000000000000 R08: 0000000000000005 R09: 00000000ffffffff R10: 0000556a60c63900 R11: 0000000000000246 R12: 0000556a60c62b50 R13: 00007f17854a81c4 R14: 0000000000000000 R15: 0000000000000000 RIP: free_log_tree+0xd2/0x140 [btrfs] RSP: ffffa7cf40d7bd10 CR2: 0000000000000060 Fixes: 681ae50917df9 ("Btrfs: cleanup reserved space when freeing tree log on error") CC: # v3.13 Signed-off-by: Jeff Mahoney Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index e1b4a59485dff..efc0a93abe748 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3078,9 +3078,12 @@ static void free_log_tree(struct btrfs_trans_handle *trans, }; ret = walk_log_tree(trans, log, &wc); - /* I don't think this can happen but just in case */ - if (ret) - btrfs_abort_transaction(trans, ret); + if (ret) { + if (trans) + btrfs_abort_transaction(trans, ret); + else + btrfs_handle_fs_error(log->fs_info, ret, NULL); + } while (1) { ret = find_first_extent_bit(&log->dirty_log_pages, -- GitLab From 7b23644c64fd2d4033cf265bb3f131cccffdc453 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 7 Sep 2018 14:16:23 +0800 Subject: [PATCH 1456/2269] btrfs: Enhance btrfs_trim_fs function to handle error better commit 93bba24d4b5ad1e5cd8b43f64e66ff9d6355dd20 upstream. Function btrfs_trim_fs() doesn't handle errors in a consistent way. If error happens when trimming existing block groups, it will skip the remaining blocks and continue to trim unallocated space for each device. The return value will only reflect the final error from device trimming. This patch will fix such behavior by: 1) Recording the last error from block group or device trimming The return value will also reflect the last error during trimming. Make developer more aware of the problem. 2) Continuing trimming if possible If we failed to trim one block group or device, we could still try the next block group or device. 3) Report number of failures during block group and device trimming It would be less noisy, but still gives user a brief summary of what's going wrong. Such behavior can avoid confusion for cases like failure to trim the first block group and then only unallocated space is trimmed. Reported-by: Chris Murphy CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba [ add bg_ret and dev_ret to the messages ] Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 49 ++++++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e6c5cfd051a8e..21b62e63a916d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -11037,6 +11037,15 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, return ret; } +/* + * Trim the whole filesystem by: + * 1) trimming the free space in each block group + * 2) trimming the unallocated space on each device + * + * This will also continue trimming even if a block group or device encounters + * an error. The return value will be the last error, or 0 if nothing bad + * happens. + */ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) { struct btrfs_block_group_cache *cache = NULL; @@ -11047,6 +11056,10 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) u64 end; u64 trimmed = 0; u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy); + u64 bg_failed = 0; + u64 dev_failed = 0; + int bg_ret = 0; + int dev_ret = 0; int ret = 0; /* @@ -11057,7 +11070,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) else cache = btrfs_lookup_block_group(fs_info, range->start); - while (cache) { + for (; cache; cache = next_block_group(fs_info, cache)) { if (cache->key.objectid >= (range->start + range->len)) { btrfs_put_block_group(cache); break; @@ -11071,13 +11084,15 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) if (!block_group_cache_done(cache)) { ret = cache_block_group(cache, 0); if (ret) { - btrfs_put_block_group(cache); - break; + bg_failed++; + bg_ret = ret; + continue; } ret = wait_block_group_cache_done(cache); if (ret) { - btrfs_put_block_group(cache); - break; + bg_failed++; + bg_ret = ret; + continue; } } ret = btrfs_trim_block_group(cache, @@ -11088,28 +11103,40 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) trimmed += group_trimmed; if (ret) { - btrfs_put_block_group(cache); - break; + bg_failed++; + bg_ret = ret; + continue; } } - - cache = next_block_group(fs_info, cache); } + if (bg_failed) + btrfs_warn(fs_info, + "failed to trim %llu block group(s), last error %d", + bg_failed, bg_ret); mutex_lock(&fs_info->fs_devices->device_list_mutex); devices = &fs_info->fs_devices->alloc_list; list_for_each_entry(device, devices, dev_alloc_list) { ret = btrfs_trim_free_extents(device, range->minlen, &group_trimmed); - if (ret) + if (ret) { + dev_failed++; + dev_ret = ret; break; + } trimmed += group_trimmed; } mutex_unlock(&fs_info->fs_devices->device_list_mutex); + if (dev_failed) + btrfs_warn(fs_info, + "failed to trim %llu device(s), last error %d", + dev_failed, dev_ret); range->len = trimmed; - return ret; + if (bg_ret) + return bg_ret; + return dev_ret; } /* -- GitLab From 2fdb337b67bcc407c0eaf1108786cb1300fe1ff1 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 7 Sep 2018 14:16:24 +0800 Subject: [PATCH 1457/2269] btrfs: Ensure btrfs_trim_fs can trim the whole filesystem commit 6ba9fc8e628becf0e3ec94083450d089b0dec5f5 upstream. [BUG] fstrim on some btrfs only trims the unallocated space, not trimming any space in existing block groups. [CAUSE] Before fstrim_range passed to btrfs_trim_fs(), it gets truncated to range [0, super->total_bytes). So later btrfs_trim_fs() will only be able to trim block groups in range [0, super->total_bytes). While for btrfs, any bytenr aligned to sectorsize is valid, since btrfs uses its logical address space, there is nothing limiting the location where we put block groups. For filesystem with frequent balance, it's quite easy to relocate all block groups and bytenr of block groups will start beyond super->total_bytes. In that case, btrfs will not trim existing block groups. [FIX] Just remove the truncation in btrfs_ioctl_fitrim(), so btrfs_trim_fs() can get the unmodified range, which is normally set to [0, U64_MAX]. Reported-by: Chris Murphy Fixes: f4c697e6406d ("btrfs: return EINVAL if start > total_bytes in fitrim ioctl") CC: # v4.4+ Signed-off-by: Qu Wenruo Reviewed-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 10 +--------- fs/btrfs/ioctl.c | 11 +++++++---- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 21b62e63a916d..a32c84932eed1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -11055,21 +11055,13 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) u64 start; u64 end; u64 trimmed = 0; - u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy); u64 bg_failed = 0; u64 dev_failed = 0; int bg_ret = 0; int dev_ret = 0; int ret = 0; - /* - * try to trim all FS space, our block group may start from non-zero. - */ - if (range->len == total_bytes) - cache = btrfs_lookup_first_block_group(fs_info, range->start); - else - cache = btrfs_lookup_block_group(fs_info, range->start); - + cache = btrfs_lookup_first_block_group(fs_info, range->start); for (; cache; cache = next_block_group(fs_info, cache)) { if (cache->key.objectid >= (range->start + range->len)) { btrfs_put_block_group(cache); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a507c0d253545..9333e4cda68d2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -352,7 +352,6 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) struct fstrim_range range; u64 minlen = ULLONG_MAX; u64 num_devices = 0; - u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy); int ret; if (!capable(CAP_SYS_ADMIN)) @@ -376,11 +375,15 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) return -EOPNOTSUPP; if (copy_from_user(&range, arg, sizeof(range))) return -EFAULT; - if (range.start > total_bytes || - range.len < fs_info->sb->s_blocksize) + + /* + * NOTE: Don't truncate the range using super->total_bytes. Bytenr of + * block group is in the logical address space, which can be any + * sectorsize aligned bytenr in the range [0, U64_MAX]. + */ + if (range.len < fs_info->sb->s_blocksize) return -EINVAL; - range.len = min(range.len, total_bytes - range.start); range.minlen = max(range.minlen, minlen); ret = btrfs_trim_fs(fs_info, &range); if (ret < 0) -- GitLab From a7d24c89db0b0b664046aebd1e0f9aef7306b944 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 6 Sep 2018 17:18:14 -0400 Subject: [PATCH 1458/2269] btrfs: iterate all devices during trim, instead of fs_devices::alloc_list commit d4e329de5e5e21594df2e0dd59da9acee71f133b upstream. btrfs_trim_fs iterates over the fs_devices->alloc_list while holding the device_list_mutex. The problem is that ->alloc_list is protected by the chunk mutex. We don't want to hold the chunk mutex over the trim of the entire file system. Fortunately, the ->dev_list list is protected by the dev_list mutex and while it will give us all devices, including read-only devices, we already just skip the read-only devices. Then we can continue to take and release the chunk mutex while scanning each device. Fixes: 499f377f49f ("btrfs: iterate over unused chunk space in FITRIM") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Jeff Mahoney Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a32c84932eed1..3ebef3ab9dde0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -11107,8 +11107,8 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) "failed to trim %llu block group(s), last error %d", bg_failed, bg_ret); mutex_lock(&fs_info->fs_devices->device_list_mutex); - devices = &fs_info->fs_devices->alloc_list; - list_for_each_entry(device, devices, dev_alloc_list) { + devices = &fs_info->fs_devices->devices; + list_for_each_entry(device, devices, dev_list) { ret = btrfs_trim_free_extents(device, range->minlen, &group_trimmed); if (ret) { -- GitLab From da05e93720843101d7b70c84b63805a37d4485d1 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 6 Sep 2018 17:18:15 -0400 Subject: [PATCH 1459/2269] btrfs: don't attempt to trim devices that don't support it commit 0be88e367fd8fbdb45257615d691f4675dda062f upstream. We check whether any device the file system is using supports discard in the ioctl call, but then we attempt to trim free extents on every device regardless of whether discard is supported. Due to the way we mask off EOPNOTSUPP, we can end up issuing the trim operations on each free range on devices that don't support it, just wasting time. Fixes: 499f377f49f08 ("btrfs: iterate over unused chunk space in FITRIM") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Jeff Mahoney Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3ebef3ab9dde0..43a2d77039df5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10976,6 +10976,10 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, *trimmed = 0; + /* Discard not supported = nothing to do. */ + if (!blk_queue_discard(bdev_get_queue(device->bdev))) + return 0; + /* Not writeable = nothing to do. */ if (!device->writeable) return 0; -- GitLab From 9ce0f8b6fb06afb39a4dba8a8aa9ed4ccd5134b5 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 12 Sep 2018 10:45:45 -0400 Subject: [PATCH 1460/2269] btrfs: wait on caching when putting the bg cache commit 3aa7c7a31c26321696b92841d5103461c6f3f517 upstream. While testing my backport I noticed there was a panic if I ran generic/416 generic/417 generic/418 all in a row. This just happened to uncover a race where we had outstanding IO after we destroy all of our workqueues, and then we'd go to queue the endio work on those free'd workqueues. This is because we aren't waiting for the caching threads to be done before freeing everything up, so to fix this make sure we wait on any outstanding caching that's being done before we free up the block group, so we're sure to be done with all IO by the time we get to btrfs_stop_all_workers(). This fixes the panic I was seeing consistently in testing. ------------[ cut here ]------------ kernel BUG at fs/btrfs/volumes.c:6112! SMP PTI Modules linked in: CPU: 1 PID: 27165 Comm: kworker/u4:7 Not tainted 4.16.0-02155-g3553e54a578d-dirty #875 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 Workqueue: btrfs-cache btrfs_cache_helper RIP: 0010:btrfs_map_bio+0x346/0x370 RSP: 0000:ffffc900061e79d0 EFLAGS: 00010202 RAX: 0000000000000000 RBX: ffff880071542e00 RCX: 0000000000533000 RDX: ffff88006bb74380 RSI: 0000000000000008 RDI: ffff880078160000 RBP: 0000000000000001 R08: ffff8800781cd200 R09: 0000000000503000 R10: ffff88006cd21200 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: ffff8800781cd200 R15: ffff880071542e00 FS: 0000000000000000(0000) GS:ffff88007fd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000817ffc4 CR3: 0000000078314000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btree_submit_bio_hook+0x8a/0xd0 submit_one_bio+0x5d/0x80 read_extent_buffer_pages+0x18a/0x320 btree_read_extent_buffer_pages+0xbc/0x200 ? alloc_extent_buffer+0x359/0x3e0 read_tree_block+0x3d/0x60 read_block_for_search.isra.30+0x1a5/0x360 btrfs_search_slot+0x41b/0xa10 btrfs_next_old_leaf+0x212/0x470 caching_thread+0x323/0x490 normal_work_helper+0xc5/0x310 process_one_work+0x141/0x340 worker_thread+0x44/0x3c0 kthread+0xf8/0x130 ? process_one_work+0x340/0x340 ? kthread_bind+0x10/0x10 ret_from_fork+0x35/0x40 RIP: btrfs_map_bio+0x346/0x370 RSP: ffffc900061e79d0 ---[ end trace 827eb13e50846033 ]--- Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Josef Bacik Reviewed-by: Omar Sandoval Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 43a2d77039df5..7a863338dba13 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9881,6 +9881,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info) block_group = btrfs_lookup_first_block_group(info, last); while (block_group) { + wait_block_group_cache_done(block_group); spin_lock(&block_group->lock); if (block_group->iref) break; -- GitLab From b5d59a48c3724d2fdfa145a9dfa64261299aee45 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 28 Sep 2018 07:17:49 -0400 Subject: [PATCH 1461/2269] btrfs: protect space cache inode alloc with GFP_NOFS commit 84de76a2fb217dc1b6bc2965cc397d1648aa1404 upstream. If we're allocating a new space cache inode it's likely going to be under a transaction handle, so we need to use memalloc_nofs_save() in order to avoid deadlocks, and more importantly lockdep messages that make xfstests fail. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Omar Sandoval Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/free-space-cache.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 4426d1c73e50f..e75489e3b67cd 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "ctree.h" #include "free-space-cache.h" #include "transaction.h" @@ -59,6 +60,7 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, struct btrfs_free_space_header *header; struct extent_buffer *leaf; struct inode *inode = NULL; + unsigned nofs_flag; int ret; key.objectid = BTRFS_FREE_SPACE_OBJECTID; @@ -80,7 +82,13 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, btrfs_disk_key_to_cpu(&location, &disk_key); btrfs_release_path(path); + /* + * We are often under a trans handle at this point, so we need to make + * sure NOFS is set to keep us from deadlocking. + */ + nofs_flag = memalloc_nofs_save(); inode = btrfs_iget(fs_info->sb, &location, root, NULL); + memalloc_nofs_restore(nofs_flag); if (IS_ERR(inode)) return inode; if (is_bad_inode(inode)) { -- GitLab From a7e3da4918b18ad8ff3848c9054425e8143a991c Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 28 Sep 2018 07:18:00 -0400 Subject: [PATCH 1462/2269] btrfs: reset max_extent_size on clear in a bitmap commit 553cceb49681d60975d00892877d4c871bf220f9 upstream. We need to clear the max_extent_size when we clear bits from a bitmap since it could have been from the range that contains the max_extent_size. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Liu Bo Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/free-space-cache.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index e75489e3b67cd..9b3fbe7c16a91 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1710,6 +1710,8 @@ static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, bitmap_clear(info->bitmap, start, count); info->bytes -= bytes; + if (info->max_extent_size > ctl->unit) + info->max_extent_size = 0; } static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, -- GitLab From 0ae66cf18fc52d3f3285b5090b3e4516e4fcc1ed Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 28 Sep 2018 07:18:02 -0400 Subject: [PATCH 1463/2269] btrfs: make sure we create all new block groups commit 545e3366db823dc3342ca9d7fea803f829c9062f upstream. Allocating new chunks modifies both the extent and chunk tree, which can trigger new chunk allocations. So instead of doing list_for_each_safe, just do while (!list_empty()) so we make sure we don't exit with other pending bg's still on our list. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Omar Sandoval Reviewed-by: Liu Bo Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7a863338dba13..e201415edb2dc 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10270,7 +10270,7 @@ error: void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { - struct btrfs_block_group_cache *block_group, *tmp; + struct btrfs_block_group_cache *block_group; struct btrfs_root *extent_root = fs_info->extent_root; struct btrfs_block_group_item item; struct btrfs_key key; @@ -10278,7 +10278,10 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, bool can_flush_pending_bgs = trans->can_flush_pending_bgs; trans->can_flush_pending_bgs = false; - list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) { + while (!list_empty(&trans->new_bgs)) { + block_group = list_first_entry(&trans->new_bgs, + struct btrfs_block_group_cache, + bg_list); if (ret) goto next; -- GitLab From 985579fa688e8fed8ed3570210af7201dc67d179 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 8 Oct 2018 11:12:55 +0100 Subject: [PATCH 1464/2269] Btrfs: fix warning when replaying log after fsync of a tmpfile commit f2d72f42d5fa3bf33761d9e47201745f624fcff5 upstream. When replaying a log which contains a tmpfile (which necessarily has a link count of 0) we end up calling inc_nlink(), at fs/btrfs/tree-log.c:replay_one_buffer(), which produces a warning like the following: [195191.943673] WARNING: CPU: 0 PID: 6924 at fs/inode.c:342 inc_nlink+0x33/0x40 [195191.943723] CPU: 0 PID: 6924 Comm: mount Not tainted 4.19.0-rc6-btrfs-next-38 #1 [195191.943724] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.2-0-gf9626ccb91-prebuilt.qemu-project.org 04/01/2014 [195191.943726] RIP: 0010:inc_nlink+0x33/0x40 [195191.943728] RSP: 0018:ffffb96e425e3870 EFLAGS: 00010246 [195191.943730] RAX: 0000000000000000 RBX: ffff8c0d1e6af4f0 RCX: 0000000000000006 [195191.943731] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8c0d1e6af4f0 [195191.943731] RBP: 0000000000000097 R08: 0000000000000001 R09: 0000000000000000 [195191.943732] R10: 0000000000000000 R11: 0000000000000000 R12: ffffb96e425e3a60 [195191.943733] R13: ffff8c0d10cff0c8 R14: ffff8c0d0d515348 R15: ffff8c0d78a1b3f8 [195191.943735] FS: 00007f570ee24480(0000) GS:ffff8c0dfb200000(0000) knlGS:0000000000000000 [195191.943736] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [195191.943737] CR2: 00005593286277c8 CR3: 00000000bb8f2006 CR4: 00000000003606f0 [195191.943739] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [195191.943740] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [195191.943741] Call Trace: [195191.943778] replay_one_buffer+0x797/0x7d0 [btrfs] [195191.943802] walk_up_log_tree+0x1c1/0x250 [btrfs] [195191.943809] ? rcu_read_lock_sched_held+0x3f/0x70 [195191.943825] walk_log_tree+0xae/0x1d0 [btrfs] [195191.943840] btrfs_recover_log_trees+0x1d7/0x4d0 [btrfs] [195191.943856] ? replay_dir_deletes+0x280/0x280 [btrfs] [195191.943870] open_ctree+0x1c3b/0x22a0 [btrfs] [195191.943887] btrfs_mount_root+0x6b4/0x800 [btrfs] [195191.943894] ? rcu_read_lock_sched_held+0x3f/0x70 [195191.943899] ? pcpu_alloc+0x55b/0x7c0 [195191.943906] ? mount_fs+0x3b/0x140 [195191.943908] mount_fs+0x3b/0x140 [195191.943912] ? __init_waitqueue_head+0x36/0x50 [195191.943916] vfs_kern_mount+0x62/0x160 [195191.943927] btrfs_mount+0x134/0x890 [btrfs] [195191.943936] ? rcu_read_lock_sched_held+0x3f/0x70 [195191.943938] ? pcpu_alloc+0x55b/0x7c0 [195191.943943] ? mount_fs+0x3b/0x140 [195191.943952] ? btrfs_remount+0x570/0x570 [btrfs] [195191.943954] mount_fs+0x3b/0x140 [195191.943956] ? __init_waitqueue_head+0x36/0x50 [195191.943960] vfs_kern_mount+0x62/0x160 [195191.943963] do_mount+0x1f9/0xd40 [195191.943967] ? memdup_user+0x4b/0x70 [195191.943971] ksys_mount+0x7e/0xd0 [195191.943974] __x64_sys_mount+0x21/0x30 [195191.943977] do_syscall_64+0x60/0x1b0 [195191.943980] entry_SYSCALL_64_after_hwframe+0x49/0xbe [195191.943983] RIP: 0033:0x7f570e4e524a [195191.943986] RSP: 002b:00007ffd83589478 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 [195191.943989] RAX: ffffffffffffffda RBX: 0000563f335b2060 RCX: 00007f570e4e524a [195191.943990] RDX: 0000563f335b2240 RSI: 0000563f335b2280 RDI: 0000563f335b2260 [195191.943992] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000020 [195191.943993] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000563f335b2260 [195191.943994] R13: 0000563f335b2240 R14: 0000000000000000 R15: 00000000ffffffff [195191.944002] irq event stamp: 8688 [195191.944010] hardirqs last enabled at (8687): [] console_unlock+0x503/0x640 [195191.944012] hardirqs last disabled at (8688): [] trace_hardirqs_off_thunk+0x1a/0x1c [195191.944018] softirqs last enabled at (8638): [] __set_page_dirty_nobuffers+0x101/0x150 [195191.944020] softirqs last disabled at (8634): [] wb_wakeup_delayed+0x2e/0x60 [195191.944022] ---[ end trace 5d6e873a9a0b811a ]--- This happens because the inode does not have the flag I_LINKABLE set, which is a runtime only flag, not meant to be persisted, set when the inode is created through open(2) if the flag O_EXCL is not passed to it. Except for the warning, there are no other consequences (like corruptions or metadata inconsistencies). Since it's pointless to replay a tmpfile as it would be deleted in a later phase of the log replay procedure (it has a link count of 0), fix this by not logging tmpfiles and if a tmpfile is found in a log (created by a kernel without this change), skip the replay of the inode. A test case for fstests follows soon. Fixes: 471d557afed1 ("Btrfs: fix loss of prealloc extents past i_size after fsync log replay") CC: stable@vger.kernel.org # 4.18+ Reported-by: Martin Steigerwald Link: https://lore.kernel.org/linux-btrfs/3666619.NTnn27ZJZE@merkaba/ Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index efc0a93abe748..0eb9efd4c346f 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -273,6 +273,13 @@ struct walk_control { /* what stage of the replay code we're currently in */ int stage; + /* + * Ignore any items from the inode currently being processed. Needs + * to be set every time we find a BTRFS_INODE_ITEM_KEY and we are in + * the LOG_WALK_REPLAY_INODES stage. + */ + bool ignore_cur_inode; + /* the root we are currently replaying */ struct btrfs_root *replay_dest; @@ -2363,6 +2370,20 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, inode_item = btrfs_item_ptr(eb, i, struct btrfs_inode_item); + /* + * If we have a tmpfile (O_TMPFILE) that got fsync'ed + * and never got linked before the fsync, skip it, as + * replaying it is pointless since it would be deleted + * later. We skip logging tmpfiles, but it's always + * possible we are replaying a log created with a kernel + * that used to log tmpfiles. + */ + if (btrfs_inode_nlink(eb, inode_item) == 0) { + wc->ignore_cur_inode = true; + continue; + } else { + wc->ignore_cur_inode = false; + } ret = replay_xattr_deletes(wc->trans, root, log, path, key.objectid); if (ret) @@ -2400,16 +2421,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, root->fs_info->sectorsize); ret = btrfs_drop_extents(wc->trans, root, inode, from, (u64)-1, 1); - /* - * If the nlink count is zero here, the iput - * will free the inode. We bump it to make - * sure it doesn't get freed until the link - * count fixup is done. - */ if (!ret) { - if (inode->i_nlink == 0) - inc_nlink(inode); - /* Update link count and nbytes. */ + /* Update the inode's nbytes. */ ret = btrfs_update_inode(wc->trans, root, inode); } @@ -2424,6 +2437,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, break; } + if (wc->ignore_cur_inode) + continue; + if (key.type == BTRFS_DIR_INDEX_KEY && wc->stage == LOG_WALK_REPLAY_DIR_INDEX) { ret = replay_one_dir_item(wc->trans, root, path, @@ -5644,7 +5660,13 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, if (ret) goto end_no_trans; - if (btrfs_inode_in_log(inode, trans->transid)) { + /* + * Skip already logged inodes or inodes corresponding to tmpfiles + * (since logging them is pointless, a link count of 0 means they + * will never be accessible). + */ + if (btrfs_inode_in_log(inode, trans->transid) || + inode->vfs_inode.i_nlink == 0) { ret = BTRFS_NO_LOG_SYNC; goto end_no_trans; } -- GitLab From 45be291e9def76b8c42c10bfbf482a9dc81aa537 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 9 Oct 2018 15:05:29 +0100 Subject: [PATCH 1465/2269] Btrfs: fix wrong dentries after fsync of file that got its parent replaced commit 0f375eed92b5a407657532637ed9652611a682f5 upstream. In a scenario like the following: mkdir /mnt/A # inode 258 mkdir /mnt/B # inode 259 touch /mnt/B/bar # inode 260 sync mv /mnt/B/bar /mnt/A/bar mv -T /mnt/A /mnt/B fsync /mnt/B/bar After replaying the log we end up with file bar having 2 hard links, both with the name 'bar' and one in the directory with inode number 258 and the other in the directory with inode number 259. Also, we end up with the directory inode 259 still existing and with the directory inode 258 still named as 'A', instead of 'B'. In this scenario, file 'bar' should only have one hard link, located at directory inode 258, the directory inode 259 should not exist anymore and the name for directory inode 258 should be 'B'. This incorrect behaviour happens because when attempting to log the old parents of an inode, we skip any parents that no longer exist. Fix this by forcing a full commit if an old parent no longer exists. A test case for fstests follows soon. CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 0eb9efd4c346f..dd7a39797a327 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5583,9 +5583,33 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, dir_inode = btrfs_iget(fs_info->sb, &inode_key, root, NULL); - /* If parent inode was deleted, skip it. */ - if (IS_ERR(dir_inode)) - continue; + /* + * If the parent inode was deleted, return an error to + * fallback to a transaction commit. This is to prevent + * getting an inode that was moved from one parent A to + * a parent B, got its former parent A deleted and then + * it got fsync'ed, from existing at both parents after + * a log replay (and the old parent still existing). + * Example: + * + * mkdir /mnt/A + * mkdir /mnt/B + * touch /mnt/B/bar + * sync + * mv /mnt/B/bar /mnt/A/bar + * mv -T /mnt/A /mnt/B + * fsync /mnt/B/bar + * + * + * If we ignore the old parent B which got deleted, + * after a log replay we would have file bar linked + * at both parents and the old parent B would still + * exist. + */ + if (IS_ERR(dir_inode)) { + ret = PTR_ERR(dir_inode); + goto out; + } if (ctx) ctx->log_new_dentries = false; -- GitLab From 08374d8bc19ba9899257b6e7824588eb4e5ff0f5 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 10 Aug 2018 10:20:26 +0800 Subject: [PATCH 1466/2269] btrfs: qgroup: Dirty all qgroups before rescan commit 9c7b0c2e8dbfbcd80a71e2cbfe02704f26c185c6 upstream. [BUG] In the following case, rescan won't zero out the number of qgroup 1/0: $ mkfs.btrfs -fq $DEV $ mount $DEV /mnt $ btrfs quota enable /mnt $ btrfs qgroup create 1/0 /mnt $ btrfs sub create /mnt/sub $ btrfs qgroup assign 0/257 1/0 /mnt $ dd if=/dev/urandom of=/mnt/sub/file bs=1k count=1000 $ btrfs sub snap /mnt/sub /mnt/snap $ btrfs quota rescan -w /mnt $ btrfs qgroup show -pcre /mnt qgroupid rfer excl max_rfer max_excl parent child -------- ---- ---- -------- -------- ------ ----- 0/5 16.00KiB 16.00KiB none none --- --- 0/257 1016.00KiB 16.00KiB none none 1/0 --- 0/258 1016.00KiB 16.00KiB none none --- --- 1/0 1016.00KiB 16.00KiB none none --- 0/257 So far so good, but: $ btrfs qgroup remove 0/257 1/0 /mnt WARNING: quotas may be inconsistent, rescan needed $ btrfs quota rescan -w /mnt $ btrfs qgroup show -pcre /mnt qgoupid rfer excl max_rfer max_excl parent child -------- ---- ---- -------- -------- ------ ----- 0/5 16.00KiB 16.00KiB none none --- --- 0/257 1016.00KiB 16.00KiB none none --- --- 0/258 1016.00KiB 16.00KiB none none --- --- 1/0 1016.00KiB 16.00KiB none none --- --- ^^^^^^^^^^ ^^^^^^^^ not cleared [CAUSE] Before rescan we call qgroup_rescan_zero_tracking() to zero out all qgroups' accounting numbers. However we don't mark all qgroups dirty, but rely on rescan to do so. If we have any high level qgroup without children, it won't be marked dirty during rescan, since we cannot reach that qgroup. This will cause QGROUP_INFO items of childless qgroups never get updated in the quota tree, thus their numbers will stay the same in "btrfs qgroup show" output. [FIX] Just mark all qgroups dirty in qgroup_rescan_zero_tracking(), so even if we have childless qgroups, their QGROUP_INFO items will still get updated during rescan. Reported-by: Misono Tomohiro CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Qu Wenruo Reviewed-by: Misono Tomohiro Tested-by: Misono Tomohiro Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/qgroup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 1b647fa932091..d6d6e9593e391 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2763,6 +2763,7 @@ qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info) qgroup->rfer_cmpr = 0; qgroup->excl = 0; qgroup->excl_cmpr = 0; + qgroup_dirty(fs_info, qgroup); } spin_unlock(&fs_info->qgroup_lock); } -- GitLab From bdda31751a2074cbfcf64f52b59b165c06204ae5 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 13 Oct 2018 00:37:25 +0100 Subject: [PATCH 1467/2269] Btrfs: fix null pointer dereference on compressed write path error commit 3527a018c00e5dbada2f9d7ed5576437b6dd5cfb upstream. At inode.c:compress_file_range(), under the "free_pages_out" label, we can end up dereferencing the "pages" pointer when it has a NULL value. This case happens when "start" has a value of 0 and we fail to allocate memory for the "pages" pointer. When that happens we jump to the "cont" label and then enter the "if (start == 0)" branch where we immediately call the cow_file_range_inline() function. If that function returns 0 (success creating an inline extent) or an error (like -ENOMEM for example) we jump to the "free_pages_out" label and then access "pages[i]" leading to a NULL pointer dereference, since "nr_pages" has a value greater than zero at that point. Fix this by setting "nr_pages" to 0 when we fail to allocate memory for the "pages" pointer. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201119 Fixes: 771ed689d2cd ("Btrfs: Optimize compressed writeback and reads") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Liu Bo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e8bfafa25a716..c24badcafcc03 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -524,6 +524,7 @@ again: pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); if (!pages) { /* just bail out to the uncompressed code */ + nr_pages = 0; goto cont; } -- GitLab From 947a9b021baf0521a12b1895c81507d7f5c2ddbe Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 15 Oct 2018 09:51:00 +0100 Subject: [PATCH 1468/2269] Btrfs: fix assertion on fsync of regular file when using no-holes feature commit 7ed586d0a8241e81d58c656c5b315f781fa6fc97 upstream. When using the NO_HOLES feature and logging a regular file, we were expecting that if we find an inline extent, that either its size in RAM (uncompressed and unenconded) matches the size of the file or if it does not, that it matches the sector size and it represents compressed data. This assertion does not cover a case where the length of the inline extent is smaller than the sector size and also smaller the file's size, such case is possible through fallocate. Example: $ mkfs.btrfs -f -O no-holes /dev/sdb $ mount /dev/sdb /mnt $ xfs_io -f -c "pwrite -S 0xb60 0 21" /mnt/foobar $ xfs_io -c "falloc 40 40" /mnt/foobar $ xfs_io -c "fsync" /mnt/foobar In the above example we trigger the assertion because the inline extent's length is 21 bytes while the file size is 80 bytes. The fallocate() call merely updated the file's size and did not touch the existing inline extent, as expected. So fix this by adjusting the assertion so that an inline extent length smaller than the file size is valid if the file size is smaller than the filesystem's sector size. A test case for fstests follows soon. Reported-by: Anatoly Trosinenko Fixes: a89ca6f24ffe ("Btrfs: fix fsync after truncate when no_holes feature is enabled") CC: stable@vger.kernel.org # 4.14+ Link: https://lore.kernel.org/linux-btrfs/CAE5jQCfRSBC7n4pUTFJcmHh109=gwyT9mFkCOL+NKfzswmR=_Q@mail.gmail.com/ Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index dd7a39797a327..accc54efb66e0 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4641,7 +4641,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, ASSERT(len == i_size || (len == fs_info->sectorsize && btrfs_file_extent_compression(leaf, extent) != - BTRFS_COMPRESS_NONE)); + BTRFS_COMPRESS_NONE) || + (len < i_size && i_size < fs_info->sectorsize)); return 0; } -- GitLab From 65d41e98fd715f93b9d79e4583fdb5db7a176c44 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 12 Oct 2018 15:32:33 -0400 Subject: [PATCH 1469/2269] btrfs: set max_extent_size properly commit ad22cf6ea47fa20fbe11ac324a0a15c0a9a4a2a9 upstream. We can't use entry->bytes if our entry is a bitmap entry, we need to use entry->max_extent_size in that case. Fix up all the logic to make this consistent. CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/free-space-cache.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 9b3fbe7c16a91..a8b4e4d7b82de 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1795,6 +1795,13 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl, return -1; } +static inline u64 get_max_extent_size(struct btrfs_free_space *entry) +{ + if (entry->bitmap) + return entry->max_extent_size; + return entry->bytes; +} + /* Cache the size of the max extent in bytes */ static struct btrfs_free_space * find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, @@ -1816,8 +1823,8 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, for (node = &entry->offset_index; node; node = rb_next(node)) { entry = rb_entry(node, struct btrfs_free_space, offset_index); if (entry->bytes < *bytes) { - if (entry->bytes > *max_extent_size) - *max_extent_size = entry->bytes; + *max_extent_size = max(get_max_extent_size(entry), + *max_extent_size); continue; } @@ -1835,8 +1842,8 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, } if (entry->bytes < *bytes + align_off) { - if (entry->bytes > *max_extent_size) - *max_extent_size = entry->bytes; + *max_extent_size = max(get_max_extent_size(entry), + *max_extent_size); continue; } @@ -1848,8 +1855,10 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, *offset = tmp; *bytes = size; return entry; - } else if (size > *max_extent_size) { - *max_extent_size = size; + } else { + *max_extent_size = + max(get_max_extent_size(entry), + *max_extent_size); } continue; } @@ -2709,8 +2718,8 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, err = search_bitmap(ctl, entry, &search_start, &search_bytes, true); if (err) { - if (search_bytes > *max_extent_size) - *max_extent_size = search_bytes; + *max_extent_size = max(get_max_extent_size(entry), + *max_extent_size); return 0; } @@ -2747,8 +2756,9 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, entry = rb_entry(node, struct btrfs_free_space, offset_index); while (1) { - if (entry->bytes < bytes && entry->bytes > *max_extent_size) - *max_extent_size = entry->bytes; + if (entry->bytes < bytes) + *max_extent_size = max(get_max_extent_size(entry), + *max_extent_size); if (entry->bytes < bytes || (!entry->bitmap && entry->offset < min_start)) { -- GitLab From 7509d4f9ad5ee2b6061f2073dfb5e3f48216c09f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 11 Oct 2018 15:54:09 -0400 Subject: [PATCH 1470/2269] btrfs: don't use ctl->free_space for max_extent_size commit fb5c39d7a887108087de6ff93d3f326b01b4ef41 upstream. max_extent_size is supposed to be the largest contiguous range for the space info, and ctl->free_space is the total free space in the block group. We need to keep track of these separately and _only_ use the max_free_space if we don't have a max_extent_size, as that means our original request was too large to search any of the block groups for and therefore wouldn't have a max_extent_size set. CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e201415edb2dc..2cb3569ac5481 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7573,6 +7573,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *block_group = NULL; u64 search_start = 0; u64 max_extent_size = 0; + u64 max_free_space = 0; u64 empty_cluster = 0; struct btrfs_space_info *space_info; int loop = 0; @@ -7867,8 +7868,8 @@ unclustered_alloc: spin_lock(&ctl->tree_lock); if (ctl->free_space < num_bytes + empty_cluster + empty_size) { - if (ctl->free_space > max_extent_size) - max_extent_size = ctl->free_space; + max_free_space = max(max_free_space, + ctl->free_space); spin_unlock(&ctl->tree_lock); goto loop; } @@ -8037,6 +8038,8 @@ loop: } out: if (ret == -ENOSPC) { + if (!max_extent_size) + max_extent_size = max_free_space; spin_lock(&space_info->lock); space_info->max_extent_size = max_extent_size; spin_unlock(&space_info->lock); -- GitLab From 42551ab618977b1e8d0b296f937b1aa80e93cd39 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 11 Oct 2018 15:54:21 -0400 Subject: [PATCH 1471/2269] btrfs: only free reserved extent if we didn't insert it commit 49940bdd57779c78462da7aa5a8650b2fea8c2ff upstream. When we insert the file extent once the ordered extent completes we free the reserved extent reservation as it'll have been migrated to the bytes_used counter. However if we error out after this step we'll still clear the reserved extent reservation, resulting in a negative accounting of the reserved bytes for the block group and space info. Fix this by only doing the free if we didn't successfully insert a file extent for this extent. CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Omar Sandoval Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c24badcafcc03..39edb4b71fa97 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2966,6 +2966,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) bool truncated = false; bool range_locked = false; bool clear_new_delalloc_bytes = false; + bool clear_reserved_extent = true; if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) && @@ -3069,10 +3070,12 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) logical_len, logical_len, compress_type, 0, 0, BTRFS_FILE_EXTENT_REG); - if (!ret) + if (!ret) { + clear_reserved_extent = false; btrfs_release_delalloc_bytes(fs_info, ordered_extent->start, ordered_extent->disk_len); + } } unpin_extent_cache(&BTRFS_I(inode)->extent_tree, ordered_extent->file_offset, ordered_extent->len, @@ -3132,8 +3135,13 @@ out: * wrong we need to return the space for this ordered extent * back to the allocator. We only free the extent in the * truncated case if we didn't write out the extent at all. + * + * If we made it past insert_reserved_file_extent before we + * errored out then we don't need to do this as the accounting + * has already been done. */ if ((ret || !logical_len) && + clear_reserved_extent && !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) btrfs_free_reserved_extent(fs_info, -- GitLab From 87d7ea688393835feeeb5afc2fd4691e8a733da7 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 11 Oct 2018 15:54:31 -0400 Subject: [PATCH 1472/2269] btrfs: don't run delayed_iputs in commit commit 30928e9baac238a7330085a1c5747f0b5df444b4 upstream. This could result in a really bad case where we do something like evict evict_refill_and_join btrfs_commit_transaction btrfs_run_delayed_iputs evict evict_refill_and_join btrfs_commit_transaction ... forever We have plenty of other places where we run delayed iputs that are much safer, let those do the work. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/transaction.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 27638b96079da..f74005ca8f087 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -2307,15 +2307,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) kmem_cache_free(btrfs_trans_handle_cachep, trans); - /* - * If fs has been frozen, we can not handle delayed iputs, otherwise - * it'll result in deadlock about SB_FREEZE_FS. - */ - if (current != fs_info->transaction_kthread && - current != fs_info->cleaner_kthread && - !test_bit(BTRFS_FS_FROZEN, &fs_info->flags)) - btrfs_run_delayed_iputs(fs_info); - return ret; scrub_continue: -- GitLab From c8bac97db4639b958af13cc41ebcaea3297d4e72 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 12 Oct 2018 15:32:32 -0400 Subject: [PATCH 1473/2269] btrfs: move the dio_sem higher up the callchain commit c495144bc6962186feae31d687596d2472000e45 upstream. We're getting a lockdep splat because we take the dio_sem under the log_mutex. What we really need is to protect fsync() from logging an extent map for an extent we never waited on higher up, so just guard the whole thing with dio_sem. ====================================================== WARNING: possible circular locking dependency detected 4.18.0-rc4-xfstests-00025-g5de5edbaf1d4 #411 Not tainted ------------------------------------------------------ aio-dio-invalid/30928 is trying to acquire lock: 0000000092621cfd (&mm->mmap_sem){++++}, at: get_user_pages_unlocked+0x5a/0x1e0 but task is already holding lock: 00000000cefe6b35 (&ei->dio_sem){++++}, at: btrfs_direct_IO+0x3be/0x400 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #5 (&ei->dio_sem){++++}: lock_acquire+0xbd/0x220 down_write+0x51/0xb0 btrfs_log_changed_extents+0x80/0xa40 btrfs_log_inode+0xbaf/0x1000 btrfs_log_inode_parent+0x26f/0xa80 btrfs_log_dentry_safe+0x50/0x70 btrfs_sync_file+0x357/0x540 do_fsync+0x38/0x60 __ia32_sys_fdatasync+0x12/0x20 do_fast_syscall_32+0x9a/0x2f0 entry_SYSENTER_compat+0x84/0x96 -> #4 (&ei->log_mutex){+.+.}: lock_acquire+0xbd/0x220 __mutex_lock+0x86/0xa10 btrfs_record_unlink_dir+0x2a/0xa0 btrfs_unlink+0x5a/0xc0 vfs_unlink+0xb1/0x1a0 do_unlinkat+0x264/0x2b0 do_fast_syscall_32+0x9a/0x2f0 entry_SYSENTER_compat+0x84/0x96 -> #3 (sb_internal#2){.+.+}: lock_acquire+0xbd/0x220 __sb_start_write+0x14d/0x230 start_transaction+0x3e6/0x590 btrfs_evict_inode+0x475/0x640 evict+0xbf/0x1b0 btrfs_run_delayed_iputs+0x6c/0x90 cleaner_kthread+0x124/0x1a0 kthread+0x106/0x140 ret_from_fork+0x3a/0x50 -> #2 (&fs_info->cleaner_delayed_iput_mutex){+.+.}: lock_acquire+0xbd/0x220 __mutex_lock+0x86/0xa10 btrfs_alloc_data_chunk_ondemand+0x197/0x530 btrfs_check_data_free_space+0x4c/0x90 btrfs_delalloc_reserve_space+0x20/0x60 btrfs_page_mkwrite+0x87/0x520 do_page_mkwrite+0x31/0xa0 __handle_mm_fault+0x799/0xb00 handle_mm_fault+0x7c/0xe0 __do_page_fault+0x1d3/0x4a0 async_page_fault+0x1e/0x30 -> #1 (sb_pagefaults){.+.+}: lock_acquire+0xbd/0x220 __sb_start_write+0x14d/0x230 btrfs_page_mkwrite+0x6a/0x520 do_page_mkwrite+0x31/0xa0 __handle_mm_fault+0x799/0xb00 handle_mm_fault+0x7c/0xe0 __do_page_fault+0x1d3/0x4a0 async_page_fault+0x1e/0x30 -> #0 (&mm->mmap_sem){++++}: __lock_acquire+0x42e/0x7a0 lock_acquire+0xbd/0x220 down_read+0x48/0xb0 get_user_pages_unlocked+0x5a/0x1e0 get_user_pages_fast+0xa4/0x150 iov_iter_get_pages+0xc3/0x340 do_direct_IO+0xf93/0x1d70 __blockdev_direct_IO+0x32d/0x1c20 btrfs_direct_IO+0x227/0x400 generic_file_direct_write+0xcf/0x180 btrfs_file_write_iter+0x308/0x58c aio_write+0xf8/0x1d0 io_submit_one+0x3a9/0x620 __ia32_compat_sys_io_submit+0xb2/0x270 do_int80_syscall_32+0x5b/0x1a0 entry_INT80_compat+0x88/0xa0 other info that might help us debug this: Chain exists of: &mm->mmap_sem --> &ei->log_mutex --> &ei->dio_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&ei->dio_sem); lock(&ei->log_mutex); lock(&ei->dio_sem); lock(&mm->mmap_sem); *** DEADLOCK *** 1 lock held by aio-dio-invalid/30928: #0: 00000000cefe6b35 (&ei->dio_sem){++++}, at: btrfs_direct_IO+0x3be/0x400 stack backtrace: CPU: 0 PID: 30928 Comm: aio-dio-invalid Not tainted 4.18.0-rc4-xfstests-00025-g5de5edbaf1d4 #411 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 Call Trace: dump_stack+0x7c/0xbb print_circular_bug.isra.37+0x297/0x2a4 check_prev_add.constprop.45+0x781/0x7a0 ? __lock_acquire+0x42e/0x7a0 validate_chain.isra.41+0x7f0/0xb00 __lock_acquire+0x42e/0x7a0 lock_acquire+0xbd/0x220 ? get_user_pages_unlocked+0x5a/0x1e0 down_read+0x48/0xb0 ? get_user_pages_unlocked+0x5a/0x1e0 get_user_pages_unlocked+0x5a/0x1e0 get_user_pages_fast+0xa4/0x150 iov_iter_get_pages+0xc3/0x340 do_direct_IO+0xf93/0x1d70 ? __alloc_workqueue_key+0x358/0x490 ? __blockdev_direct_IO+0x14b/0x1c20 __blockdev_direct_IO+0x32d/0x1c20 ? btrfs_run_delalloc_work+0x40/0x40 ? can_nocow_extent+0x490/0x490 ? kvm_clock_read+0x1f/0x30 ? can_nocow_extent+0x490/0x490 ? btrfs_run_delalloc_work+0x40/0x40 btrfs_direct_IO+0x227/0x400 ? btrfs_run_delalloc_work+0x40/0x40 generic_file_direct_write+0xcf/0x180 btrfs_file_write_iter+0x308/0x58c aio_write+0xf8/0x1d0 ? kvm_clock_read+0x1f/0x30 ? __might_fault+0x3e/0x90 io_submit_one+0x3a9/0x620 ? io_submit_one+0xe5/0x620 __ia32_compat_sys_io_submit+0xb2/0x270 do_int80_syscall_32+0x5b/0x1a0 entry_INT80_compat+0x88/0xa0 CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/file.c | 12 ++++++++++++ fs/btrfs/tree-log.c | 2 -- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5690feded0de5..57e25e83b81a8 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2078,6 +2078,14 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) goto out; inode_lock(inode); + + /* + * We take the dio_sem here because the tree log stuff can race with + * lockless dio writes and get an extent map logged for an extent we + * never waited on. We need it this high up for lockdep reasons. + */ + down_write(&BTRFS_I(inode)->dio_sem); + atomic_inc(&root->log_batch); full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); @@ -2129,6 +2137,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) ret = start_ordered_ops(inode, start, end); } if (ret) { + up_write(&BTRFS_I(inode)->dio_sem); inode_unlock(inode); goto out; } @@ -2184,6 +2193,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) * checked called fsync. */ ret = filemap_check_wb_err(inode->i_mapping, file->f_wb_err); + up_write(&BTRFS_I(inode)->dio_sem); inode_unlock(inode); goto out; } @@ -2208,6 +2218,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); + up_write(&BTRFS_I(inode)->dio_sem); inode_unlock(inode); goto out; } @@ -2229,6 +2240,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) * file again, but that will end up using the synchronization * inside btrfs_sync_log to keep things safe. */ + up_write(&BTRFS_I(inode)->dio_sem); inode_unlock(inode); /* diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index accc54efb66e0..6c4a2ea2ede9c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4362,7 +4362,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, INIT_LIST_HEAD(&extents); - down_write(&inode->dio_sem); write_lock(&tree->lock); test_gen = root->fs_info->last_trans_committed; logged_start = start; @@ -4443,7 +4442,6 @@ process: } WARN_ON(!list_empty(&extents)); write_unlock(&tree->lock); - up_write(&inode->dio_sem); btrfs_release_path(path); if (!ret) -- GitLab From bf005ed0fbddd6a5e3a2c1c3b222c4104a9819b5 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 12 Oct 2018 13:02:48 +0100 Subject: [PATCH 1474/2269] Btrfs: fix use-after-free during inode eviction commit 421f0922a2cfb0c75acd9746454aaa576c711a65 upstream. At inode.c:evict_inode_truncate_pages(), when we iterate over the inode's extent states, we access an extent state record's "state" field after we unlocked the inode's io tree lock. This can lead to a use-after-free issue because after we unlock the io tree that extent state record might have been freed due to being merged into another adjacent extent state record (a previous inflight bio for a read operation finished in the meanwhile which unlocked a range in the io tree and cause a merge of extent state records, as explained in the comment before the while loop added in commit 6ca0709756710 ("Btrfs: fix hang during inode eviction due to concurrent readahead")). Fix this by keeping a copy of the extent state's flags in a local variable and using it after unlocking the io tree. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201189 Fixes: b9d0b38928e2 ("btrfs: Add handler for invalidate page") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 39edb4b71fa97..90568a21fa779 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5335,11 +5335,13 @@ static void evict_inode_truncate_pages(struct inode *inode) struct extent_state *cached_state = NULL; u64 start; u64 end; + unsigned state_flags; node = rb_first(&io_tree->state); state = rb_entry(node, struct extent_state, rb_node); start = state->start; end = state->end; + state_flags = state->state; spin_unlock(&io_tree->lock); lock_extent_bits(io_tree, start, end, &cached_state); @@ -5352,7 +5354,7 @@ static void evict_inode_truncate_pages(struct inode *inode) * * Note, end is the bytenr of last byte, so we need + 1 here. */ - if (state->state & EXTENT_DELALLOC) + if (state_flags & EXTENT_DELALLOC) btrfs_qgroup_free_data(inode, NULL, start, end - start + 1); clear_extent_bit(io_tree, start, end, -- GitLab From 3c2d1ef3dfe0d798266f3ae4d3b7e13aa4956f4d Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 22 Oct 2018 10:43:06 +0100 Subject: [PATCH 1475/2269] Btrfs: fix use-after-free when dumping free space commit 9084cb6a24bf5838a665af92ded1af8363f9e563 upstream. We were iterating a block group's free space cache rbtree without locking first the lock that protects it (the free_space_ctl->free_space_offset rbtree is protected by the free_space_ctl->tree_lock spinlock). KASAN reported an use-after-free problem when iterating such a rbtree due to a concurrent rbtree delete: [ 9520.359168] ================================================================== [ 9520.359656] BUG: KASAN: use-after-free in rb_next+0x13/0x90 [ 9520.359949] Read of size 8 at addr ffff8800b7ada500 by task btrfs-transacti/1721 [ 9520.360357] [ 9520.360530] CPU: 4 PID: 1721 Comm: btrfs-transacti Tainted: G L 4.19.0-rc8-nbor #555 [ 9520.360990] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 [ 9520.362682] Call Trace: [ 9520.362887] dump_stack+0xa4/0xf5 [ 9520.363146] print_address_description+0x78/0x280 [ 9520.363412] kasan_report+0x263/0x390 [ 9520.363650] ? rb_next+0x13/0x90 [ 9520.363873] __asan_load8+0x54/0x90 [ 9520.364102] rb_next+0x13/0x90 [ 9520.364380] btrfs_dump_free_space+0x146/0x160 [btrfs] [ 9520.364697] dump_space_info+0x2cd/0x310 [btrfs] [ 9520.364997] btrfs_reserve_extent+0x1ee/0x1f0 [btrfs] [ 9520.365310] __btrfs_prealloc_file_range+0x1cc/0x620 [btrfs] [ 9520.365646] ? btrfs_update_time+0x180/0x180 [btrfs] [ 9520.365923] ? _raw_spin_unlock+0x27/0x40 [ 9520.366204] ? btrfs_alloc_data_chunk_ondemand+0x2c0/0x5c0 [btrfs] [ 9520.366549] btrfs_prealloc_file_range_trans+0x23/0x30 [btrfs] [ 9520.366880] cache_save_setup+0x42e/0x580 [btrfs] [ 9520.367220] ? btrfs_check_data_free_space+0xd0/0xd0 [btrfs] [ 9520.367518] ? lock_downgrade+0x2f0/0x2f0 [ 9520.367799] ? btrfs_write_dirty_block_groups+0x11f/0x6e0 [btrfs] [ 9520.368104] ? kasan_check_read+0x11/0x20 [ 9520.368349] ? do_raw_spin_unlock+0xa8/0x140 [ 9520.368638] btrfs_write_dirty_block_groups+0x2af/0x6e0 [btrfs] [ 9520.368978] ? btrfs_start_dirty_block_groups+0x870/0x870 [btrfs] [ 9520.369282] ? do_raw_spin_unlock+0xa8/0x140 [ 9520.369534] ? _raw_spin_unlock+0x27/0x40 [ 9520.369811] ? btrfs_run_delayed_refs+0x1b8/0x230 [btrfs] [ 9520.370137] commit_cowonly_roots+0x4b9/0x610 [btrfs] [ 9520.370560] ? commit_fs_roots+0x350/0x350 [btrfs] [ 9520.370926] ? btrfs_run_delayed_refs+0x1b8/0x230 [btrfs] [ 9520.371285] btrfs_commit_transaction+0x5e5/0x10e0 [btrfs] [ 9520.371612] ? btrfs_apply_pending_changes+0x90/0x90 [btrfs] [ 9520.371943] ? start_transaction+0x168/0x6c0 [btrfs] [ 9520.372257] transaction_kthread+0x21c/0x240 [btrfs] [ 9520.372537] kthread+0x1d2/0x1f0 [ 9520.372793] ? btrfs_cleanup_transaction+0xb50/0xb50 [btrfs] [ 9520.373090] ? kthread_park+0xb0/0xb0 [ 9520.373329] ret_from_fork+0x3a/0x50 [ 9520.373567] [ 9520.373738] Allocated by task 1804: [ 9520.373974] kasan_kmalloc+0xff/0x180 [ 9520.374208] kasan_slab_alloc+0x11/0x20 [ 9520.374447] kmem_cache_alloc+0xfc/0x2d0 [ 9520.374731] __btrfs_add_free_space+0x40/0x580 [btrfs] [ 9520.375044] unpin_extent_range+0x4f7/0x7a0 [btrfs] [ 9520.375383] btrfs_finish_extent_commit+0x15f/0x4d0 [btrfs] [ 9520.375707] btrfs_commit_transaction+0xb06/0x10e0 [btrfs] [ 9520.376027] btrfs_alloc_data_chunk_ondemand+0x237/0x5c0 [btrfs] [ 9520.376365] btrfs_check_data_free_space+0x81/0xd0 [btrfs] [ 9520.376689] btrfs_delalloc_reserve_space+0x25/0x80 [btrfs] [ 9520.377018] btrfs_direct_IO+0x42e/0x6d0 [btrfs] [ 9520.377284] generic_file_direct_write+0x11e/0x220 [ 9520.377587] btrfs_file_write_iter+0x472/0xac0 [btrfs] [ 9520.377875] aio_write+0x25c/0x360 [ 9520.378106] io_submit_one+0xaa0/0xdc0 [ 9520.378343] __se_sys_io_submit+0xfa/0x2f0 [ 9520.378589] __x64_sys_io_submit+0x43/0x50 [ 9520.378840] do_syscall_64+0x7d/0x240 [ 9520.379081] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 9520.379387] [ 9520.379557] Freed by task 1802: [ 9520.379782] __kasan_slab_free+0x173/0x260 [ 9520.380028] kasan_slab_free+0xe/0x10 [ 9520.380262] kmem_cache_free+0xc1/0x2c0 [ 9520.380544] btrfs_find_space_for_alloc+0x4cd/0x4e0 [btrfs] [ 9520.380866] find_free_extent+0xa99/0x17e0 [btrfs] [ 9520.381166] btrfs_reserve_extent+0xd5/0x1f0 [btrfs] [ 9520.381474] btrfs_get_blocks_direct+0x60b/0xbd0 [btrfs] [ 9520.381761] __blockdev_direct_IO+0x10ee/0x58a1 [ 9520.382059] btrfs_direct_IO+0x25a/0x6d0 [btrfs] [ 9520.382321] generic_file_direct_write+0x11e/0x220 [ 9520.382623] btrfs_file_write_iter+0x472/0xac0 [btrfs] [ 9520.382904] aio_write+0x25c/0x360 [ 9520.383172] io_submit_one+0xaa0/0xdc0 [ 9520.383416] __se_sys_io_submit+0xfa/0x2f0 [ 9520.383678] __x64_sys_io_submit+0x43/0x50 [ 9520.383927] do_syscall_64+0x7d/0x240 [ 9520.384165] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 9520.384439] [ 9520.384610] The buggy address belongs to the object at ffff8800b7ada500 which belongs to the cache btrfs_free_space of size 72 [ 9520.385175] The buggy address is located 0 bytes inside of 72-byte region [ffff8800b7ada500, ffff8800b7ada548) [ 9520.385691] The buggy address belongs to the page: [ 9520.385957] page:ffffea0002deb680 count:1 mapcount:0 mapping:ffff880108a1d700 index:0x0 compound_mapcount: 0 [ 9520.388030] flags: 0x8100(slab|head) [ 9520.388281] raw: 0000000000008100 ffffea0002deb608 ffffea0002728808 ffff880108a1d700 [ 9520.388722] raw: 0000000000000000 0000000000130013 00000001ffffffff 0000000000000000 [ 9520.389169] page dumped because: kasan: bad access detected [ 9520.389473] [ 9520.389658] Memory state around the buggy address: [ 9520.389943] ffff8800b7ada400: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 9520.390368] ffff8800b7ada480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 9520.390796] >ffff8800b7ada500: fb fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc [ 9520.391223] ^ [ 9520.391461] ffff8800b7ada580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 9520.391885] ffff8800b7ada600: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 9520.392313] ================================================================== [ 9520.392772] BTRFS critical (device vdc): entry offset 2258497536, bytes 131072, bitmap no [ 9520.393247] BUG: unable to handle kernel NULL pointer dereference at 0000000000000011 [ 9520.393705] PGD 800000010dbab067 P4D 800000010dbab067 PUD 107551067 PMD 0 [ 9520.394059] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI [ 9520.394378] CPU: 4 PID: 1721 Comm: btrfs-transacti Tainted: G B L 4.19.0-rc8-nbor #555 [ 9520.394858] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 [ 9520.395350] RIP: 0010:rb_next+0x3c/0x90 [ 9520.396461] RSP: 0018:ffff8801074ff780 EFLAGS: 00010292 [ 9520.396762] RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffffffff81b5ac4c [ 9520.397115] RDX: 0000000000000000 RSI: 0000000000000008 RDI: 0000000000000011 [ 9520.397468] RBP: ffff8801074ff7a0 R08: ffffed0021d64ccc R09: ffffed0021d64ccc [ 9520.397821] R10: 0000000000000001 R11: ffffed0021d64ccb R12: ffff8800b91e0000 [ 9520.398188] R13: ffff8800a3ceba48 R14: ffff8800b627bf80 R15: 0000000000020000 [ 9520.398555] FS: 0000000000000000(0000) GS:ffff88010eb00000(0000) knlGS:0000000000000000 [ 9520.399007] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 9520.399335] CR2: 0000000000000011 CR3: 0000000106b52000 CR4: 00000000000006a0 [ 9520.399679] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 9520.400023] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 9520.400400] Call Trace: [ 9520.400648] btrfs_dump_free_space+0x146/0x160 [btrfs] [ 9520.400974] dump_space_info+0x2cd/0x310 [btrfs] [ 9520.401287] btrfs_reserve_extent+0x1ee/0x1f0 [btrfs] [ 9520.401609] __btrfs_prealloc_file_range+0x1cc/0x620 [btrfs] [ 9520.401952] ? btrfs_update_time+0x180/0x180 [btrfs] [ 9520.402232] ? _raw_spin_unlock+0x27/0x40 [ 9520.402522] ? btrfs_alloc_data_chunk_ondemand+0x2c0/0x5c0 [btrfs] [ 9520.402882] btrfs_prealloc_file_range_trans+0x23/0x30 [btrfs] [ 9520.403261] cache_save_setup+0x42e/0x580 [btrfs] [ 9520.403570] ? btrfs_check_data_free_space+0xd0/0xd0 [btrfs] [ 9520.403871] ? lock_downgrade+0x2f0/0x2f0 [ 9520.404161] ? btrfs_write_dirty_block_groups+0x11f/0x6e0 [btrfs] [ 9520.404481] ? kasan_check_read+0x11/0x20 [ 9520.404732] ? do_raw_spin_unlock+0xa8/0x140 [ 9520.405026] btrfs_write_dirty_block_groups+0x2af/0x6e0 [btrfs] [ 9520.405375] ? btrfs_start_dirty_block_groups+0x870/0x870 [btrfs] [ 9520.405694] ? do_raw_spin_unlock+0xa8/0x140 [ 9520.405958] ? _raw_spin_unlock+0x27/0x40 [ 9520.406243] ? btrfs_run_delayed_refs+0x1b8/0x230 [btrfs] [ 9520.406574] commit_cowonly_roots+0x4b9/0x610 [btrfs] [ 9520.406899] ? commit_fs_roots+0x350/0x350 [btrfs] [ 9520.407253] ? btrfs_run_delayed_refs+0x1b8/0x230 [btrfs] [ 9520.407589] btrfs_commit_transaction+0x5e5/0x10e0 [btrfs] [ 9520.407925] ? btrfs_apply_pending_changes+0x90/0x90 [btrfs] [ 9520.408262] ? start_transaction+0x168/0x6c0 [btrfs] [ 9520.408582] transaction_kthread+0x21c/0x240 [btrfs] [ 9520.408870] kthread+0x1d2/0x1f0 [ 9520.409138] ? btrfs_cleanup_transaction+0xb50/0xb50 [btrfs] [ 9520.409440] ? kthread_park+0xb0/0xb0 [ 9520.409682] ret_from_fork+0x3a/0x50 [ 9520.410508] Dumping ftrace buffer: [ 9520.410764] (ftrace buffer empty) [ 9520.411007] CR2: 0000000000000011 [ 9520.411297] ---[ end trace 01a0863445cf360a ]--- [ 9520.411568] RIP: 0010:rb_next+0x3c/0x90 [ 9520.412644] RSP: 0018:ffff8801074ff780 EFLAGS: 00010292 [ 9520.412932] RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffffffff81b5ac4c [ 9520.413274] RDX: 0000000000000000 RSI: 0000000000000008 RDI: 0000000000000011 [ 9520.413616] RBP: ffff8801074ff7a0 R08: ffffed0021d64ccc R09: ffffed0021d64ccc [ 9520.414007] R10: 0000000000000001 R11: ffffed0021d64ccb R12: ffff8800b91e0000 [ 9520.414349] R13: ffff8800a3ceba48 R14: ffff8800b627bf80 R15: 0000000000020000 [ 9520.416074] FS: 0000000000000000(0000) GS:ffff88010eb00000(0000) knlGS:0000000000000000 [ 9520.416536] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 9520.416848] CR2: 0000000000000011 CR3: 0000000106b52000 CR4: 00000000000006a0 [ 9520.418477] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 9520.418846] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 9520.419204] Kernel panic - not syncing: Fatal exception [ 9520.419666] Dumping ftrace buffer: [ 9520.419930] (ftrace buffer empty) [ 9520.420168] Kernel Offset: disabled [ 9520.420406] ---[ end Kernel panic - not syncing: Fatal exception ]--- Fix this by acquiring the respective lock before iterating the rbtree. Reported-by: Nikolay Borisov CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/free-space-cache.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index a8b4e4d7b82de..9f31b81a5e27d 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2482,6 +2482,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, struct rb_node *n; int count = 0; + spin_lock(&ctl->tree_lock); for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { info = rb_entry(n, struct btrfs_free_space, offset_index); if (info->bytes >= bytes && !block_group->ro) @@ -2490,6 +2491,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, info->offset, info->bytes, (info->bitmap) ? "yes" : "no"); } + spin_unlock(&ctl->tree_lock); btrfs_info(fs_info, "block group has cluster?: %s", list_empty(&block_group->cluster_list) ? "no" : "yes"); btrfs_info(fs_info, -- GitLab From fc8a236ee0580c920c48696f0df806affa892cbc Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 26 Mar 2018 23:59:00 +0100 Subject: [PATCH 1476/2269] Btrfs: fix fsync after hole punching when using no-holes feature commit 4ee3fad34a9cc2cf33303dfbd0cf554248651c86 upstream. When we have the no-holes mode enabled and fsync a file after punching a hole in it, we can end up not logging the whole hole range in the log tree. This happens if the file has extent items that span more than one leaf and we punch a hole that covers a range that starts in a leaf but does not go beyond the offset of the first extent in the next leaf. Example: $ mkfs.btrfs -f -O no-holes -n 65536 /dev/sdb $ mount /dev/sdb /mnt $ for ((i = 0; i <= 831; i++)); do offset=$((i * 2 * 256 * 1024)) xfs_io -f -c "pwrite -S 0xab -b 256K $offset 256K" \ /mnt/foobar >/dev/null done $ sync # We now have 2 leafs in our filesystem fs tree, the first leaf has an # item corresponding the extent at file offset 216530944 and the second # leaf has a first item corresponding to the extent at offset 217055232. # Now we punch a hole that partially covers the range of the extent at # offset 216530944 but does go beyond the offset 217055232. $ xfs_io -c "fpunch $((216530944 + 128 * 1024 - 4000)) 256K" /mnt/foobar $ xfs_io -c "fsync" /mnt/foobar # mount to replay the log $ mount /dev/sdb /mnt # Before this patch, only the subrange [216658016, 216662016[ (length of # 4000 bytes) was logged, leaving an incorrect file layout after log # replay. Fix this by checking if there is a hole between the last extent item that we processed and the first extent item in the next leaf, and if there is one, log an explicit hole extent item. Fixes: 16e7549f045d ("Btrfs: incompatible format change to remove hole extents") Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 6c4a2ea2ede9c..2109db1964497 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3978,6 +3978,36 @@ fill_holes: break; *last_extent = extent_end; } + + /* + * Check if there is a hole between the last extent found in our leaf + * and the first extent in the next leaf. If there is one, we need to + * log an explicit hole so that at replay time we can punch the hole. + */ + if (ret == 0 && + key.objectid == btrfs_ino(inode) && + key.type == BTRFS_EXTENT_DATA_KEY && + i == btrfs_header_nritems(src_path->nodes[0])) { + ret = btrfs_next_leaf(inode->root, src_path); + need_find_last_extent = true; + if (ret > 0) { + ret = 0; + } else if (ret == 0) { + btrfs_item_key_to_cpu(src_path->nodes[0], &key, + src_path->slots[0]); + if (key.objectid == btrfs_ino(inode) && + key.type == BTRFS_EXTENT_DATA_KEY && + *last_extent < key.offset) { + const u64 len = key.offset - *last_extent; + + ret = btrfs_insert_file_extent(trans, log, + btrfs_ino(inode), + *last_extent, 0, + 0, len, 0, len, + 0, 0, 0); + } + } + } /* * Need to let the callers know we dropped the path so they should * re-search. -- GitLab From dfba4092167df3bb9b6abb0510ea4c680bb8ece5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 24 Oct 2018 08:32:49 -0700 Subject: [PATCH 1477/2269] net: sched: Remove TCA_OPTIONS from policy commit e72bde6b66299602087c8c2350d36a525e75d06e upstream. Marco reported an error with hfsc: root@Calimero:~# tc qdisc add dev eth0 root handle 1:0 hfsc default 1 Error: Attribute failed policy validation. Apparently a few implementations pass TCA_OPTIONS as a binary instead of nested attribute, so drop TCA_OPTIONS from the policy. Fixes: 8b4c3cdd9dd8 ("net: sched: Add policy validation for tc attributes") Reported-by: Marco Berizzi Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_api.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 691ca96f7460b..7b4270987ac16 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1218,7 +1218,6 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { [TCA_KIND] = { .type = NLA_STRING }, - [TCA_OPTIONS] = { .type = NLA_NESTED }, [TCA_RATE] = { .type = NLA_BINARY, .len = sizeof(struct tc_estimator) }, [TCA_STAB] = { .type = NLA_NESTED }, -- GitLab From 4bea15f7933dc87a091a7481590dd2ea1088b2bf Mon Sep 17 00:00:00 2001 From: Daniel Colascione Date: Fri, 12 Oct 2018 03:54:27 -0700 Subject: [PATCH 1478/2269] bpf: wait for running BPF programs when updating map-in-map commit 1ae80cf31938c8f77c37a29bbe29e7f1cd492be8 upstream. The map-in-map frequently serves as a mechanism for atomic snapshotting of state that a BPF program might record. The current implementation is dangerous to use in this way, however, since userspace has no way of knowing when all programs that might have retrieved the "old" value of the map may have completed. This change ensures that map update operations on map-in-map map types always wait for all references to the old map to drop before returning to userspace. Signed-off-by: Daniel Colascione Reviewed-by: Joel Fernandes (Google) Signed-off-by: Alexei Starovoitov [fengc@google.com: 4.14 backport: adjust context] Signed-off-by: Chenbo Feng Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/syscall.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ea22d0b6a9f0a..5c9deed4524e9 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -519,6 +519,17 @@ err_put: return err; } +static void maybe_wait_bpf_programs(struct bpf_map *map) +{ + /* Wait for any running BPF programs to complete so that + * userspace, when we return to it, knows that all programs + * that could be running use the new map value. + */ + if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS || + map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) + synchronize_rcu(); +} + #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags static int map_update_elem(union bpf_attr *attr) @@ -592,6 +603,7 @@ static int map_update_elem(union bpf_attr *attr) } __this_cpu_dec(bpf_prog_active); preempt_enable(); + maybe_wait_bpf_programs(map); if (!err) trace_bpf_map_update_elem(map, ufd, key, value); @@ -636,6 +648,7 @@ static int map_delete_elem(union bpf_attr *attr) rcu_read_unlock(); __this_cpu_dec(bpf_prog_active); preempt_enable(); + maybe_wait_bpf_programs(map); if (!err) trace_bpf_map_delete_elem(map, ufd, key); -- GitLab From c3cd0a4fe429089701a3480cbbf355495e0642c7 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Sun, 14 Oct 2018 17:05:07 -0700 Subject: [PATCH 1479/2269] MD: fix invalid stored role for a disk - try2 commit 9e753ba9b9b405e3902d9f08aec5f2ea58a0c317 upstream. Commit d595567dc4f0 (MD: fix invalid stored role for a disk) broke linear hotadd. Let's only fix the role for disks in raid1/10. Based on Guoqing's original patch. Reported-by: kernel test robot Cc: Gioh Kim Cc: Guoqing Jiang Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 4 ---- drivers/md/raid1.c | 1 + drivers/md/raid10.c | 1 + 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 4c1a6abed6069..5599712d478e6 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1766,10 +1766,6 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) } else set_bit(In_sync, &rdev->flags); rdev->raid_disk = role; - if (role >= mddev->raid_disks) { - rdev->saved_raid_disk = -1; - rdev->raid_disk = -1; - } break; } if (sb->devflags & WriteMostly1) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 78d8307637045..205f86f1a6cbe 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1725,6 +1725,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) */ if (rdev->saved_raid_disk >= 0 && rdev->saved_raid_disk >= first && + rdev->saved_raid_disk < conf->raid_disks && conf->mirrors[rdev->saved_raid_disk].rdev == NULL) first = last = rdev->saved_raid_disk; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 927b60e9d3ca2..e786546bf3b84 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1775,6 +1775,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) first = last = rdev->raid_disk; if (rdev->saved_raid_disk >= first && + rdev->saved_raid_disk < conf->geo.raid_disks && conf->mirrors[rdev->saved_raid_disk].rdev == NULL) mirror = rdev->saved_raid_disk; else -- GitLab From 2e390c487815669fb9bb35d7ea11883cc10a9b50 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 13 Nov 2018 11:15:18 -0800 Subject: [PATCH 1480/2269] Linux 4.14.81 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f4cad5e035617..2fe1424d61d22 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 80 +SUBLEVEL = 81 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 90c68f716d57886f4e6f3ff7c7c07014b4254745 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 13 Oct 2018 09:16:22 +0000 Subject: [PATCH 1481/2269] powerpc/traps: restore recoverability of machine_check interrupts [ Upstream commit daf00ae71dad8aa05965713c62558aeebf2df48e ] commit b96672dd840f ("powerpc: Machine check interrupt is a non- maskable interrupt") added a call to nmi_enter() at the beginning of machine check restart exception handler. Due to that, in_interrupt() always returns true regardless of the state before entering the exception, and die() panics even when the system was not already in interrupt. This patch calls nmi_exit() before calling die() in order to restore the interrupt state we had before calling nmi_enter() Fixes: b96672dd840f ("powerpc: Machine check interrupt is a non-maskable interrupt") Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/traps.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index ac2e5e56a9f0c..a5f2b7593976d 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -694,12 +694,17 @@ void machine_check_exception(struct pt_regs *regs) if (check_io_access(regs)) goto bail; - die("Machine check", regs, SIGBUS); - /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) nmi_panic(regs, "Unrecoverable Machine check"); + if (!nested) + nmi_exit(); + + die("Machine check", regs, SIGBUS); + + return; + bail: if (!nested) nmi_exit(); -- GitLab From 2b9aed7cb2e2dd48092ce1950a0c4bdd792a19d3 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 29 Aug 2018 21:56:56 +1000 Subject: [PATCH 1482/2269] powerpc/64/module: REL32 relocation range check [ Upstream commit b851ba02a6f3075f0f99c60c4bc30a4af80cf428 ] The recent module relocation overflow crash demonstrated that we have no range checking on REL32 relative relocations. This patch implements a basic check, the same kernel that previously oopsed and rebooted now continues with some of these errors when loading the module: module_64: x_tables: REL32 527703503449812 out of range! Possibly other relocations (ADDR32, REL16, TOC16, etc.) should also have overflow checks. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/module_64.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 2a1b1273a3120..4d8f6291b7661 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -656,7 +656,14 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_PPC64_REL32: /* 32 bits relative (used by relative exception tables) */ - *(u32 *)location = value - (unsigned long)location; + /* Convert value to relative */ + value -= (unsigned long)location; + if (value + 0x80000000 > 0xffffffff) { + pr_err("%s: REL32 %li out of range!\n", + me->name, (long int)value); + return -ENOEXEC; + } + *(u32 *)location = value; break; case R_PPC64_TOCSAVE: -- GitLab From 0b1f1204554e4ade5af9ea8d59ce6b59ef95e894 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 15 Aug 2018 21:29:45 +1000 Subject: [PATCH 1483/2269] powerpc/mm: Fix page table dump to work on Radix [ Upstream commit 0d923962ab69c27cca664a2d535e90ef655110ca ] When we're running on Book3S with the Radix MMU enabled the page table dump currently prints the wrong addresses because it uses the wrong start address. Fix it to use PAGE_OFFSET rather than KERN_VIRT_START. Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/dump_linuxpagetables.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c index c9282d27b2037..31c1c61afaa42 100644 --- a/arch/powerpc/mm/dump_linuxpagetables.c +++ b/arch/powerpc/mm/dump_linuxpagetables.c @@ -422,12 +422,13 @@ static void walk_pagetables(struct pg_state *st) unsigned int i; unsigned long addr; + addr = st->start_address; + /* * Traverse the linux pagetable structure and dump pages that are in * the hash pagetable. */ - for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { - addr = KERN_VIRT_START + i * PGDIR_SIZE; + for (i = 0; i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) { if (!pgd_none(*pgd) && !pgd_huge(*pgd)) /* pgd exists */ walk_pud(st, pgd, addr); @@ -476,9 +477,14 @@ static int ptdump_show(struct seq_file *m, void *v) { struct pg_state st = { .seq = m, - .start_address = KERN_VIRT_START, .marker = address_markers, }; + + if (radix_enabled()) + st.start_address = PAGE_OFFSET; + else + st.start_address = KERN_VIRT_START; + /* Traverse kernel page tables */ walk_pagetables(&st); note_page(&st, 0, 0, 0); -- GitLab From 211981e7e186dd3d9119a4f9f90633a677a2d58b Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Wed, 12 Sep 2018 11:23:20 +1000 Subject: [PATCH 1484/2269] powerpc/eeh: Fix possible null deref in eeh_dump_dev_log() [ Upstream commit f9bc28aedfb5bbd572d2d365f3095c1becd7209b ] If an error occurs during an unplug operation, it's possible for eeh_dump_dev_log() to be called when edev->pdn is null, which currently leads to dereferencing a null pointer. Handle this by skipping the error log for those devices. Signed-off-by: Sam Bobroff Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/eeh.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 116000b455319..45322b37669a9 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -169,6 +169,11 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) int n = 0, l = 0; char buffer[128]; + if (!pdn) { + pr_warn("EEH: Note: No error log for absent device.\n"); + return 0; + } + n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n", pdn->phb->global_number, pdn->busno, PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); -- GitLab From 8c5800cdb75c354ec0224f0fbab9e4c604b065c0 Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Mon, 8 Oct 2018 10:39:17 +0800 Subject: [PATCH 1485/2269] tty: check name length in tty_find_polling_driver() [ Upstream commit 33a1a7be198657c8ca26ad406c4d2a89b7162bcc ] The issue is found by a fuzzing test. If tty_find_polling_driver() recevies an incorrect input such as ',,' or '0b', the len becomes 0 and strncmp() always return 0. In this case, a null p->ops->poll_init() is called and it causes a kernel panic. Fix this by checking name length against zero in tty_find_polling_driver(). $echo ,, > /sys/module/kgdboc/parameters/kgdboc [ 20.804451] WARNING: CPU: 1 PID: 104 at drivers/tty/serial/serial_core.c:457 uart_get_baud_rate+0xe8/0x190 [ 20.804917] Modules linked in: [ 20.805317] CPU: 1 PID: 104 Comm: sh Not tainted 4.19.0-rc7ajb #8 [ 20.805469] Hardware name: linux,dummy-virt (DT) [ 20.805732] pstate: 20000005 (nzCv daif -PAN -UAO) [ 20.805895] pc : uart_get_baud_rate+0xe8/0x190 [ 20.806042] lr : uart_get_baud_rate+0xc0/0x190 [ 20.806476] sp : ffffffc06acff940 [ 20.806676] x29: ffffffc06acff940 x28: 0000000000002580 [ 20.806977] x27: 0000000000009600 x26: 0000000000009600 [ 20.807231] x25: ffffffc06acffad0 x24: 00000000ffffeff0 [ 20.807576] x23: 0000000000000001 x22: 0000000000000000 [ 20.807807] x21: 0000000000000001 x20: 0000000000000000 [ 20.808049] x19: ffffffc06acffac8 x18: 0000000000000000 [ 20.808277] x17: 0000000000000000 x16: 0000000000000000 [ 20.808520] x15: ffffffffffffffff x14: ffffffff00000000 [ 20.808757] x13: ffffffffffffffff x12: 0000000000000001 [ 20.809011] x11: 0101010101010101 x10: ffffff880d59ff5f [ 20.809292] x9 : ffffff880d59ff5e x8 : ffffffc06acffaf3 [ 20.809549] x7 : 0000000000000000 x6 : ffffff880d59ff5f [ 20.809803] x5 : 0000000080008001 x4 : 0000000000000003 [ 20.810056] x3 : ffffff900853e6b4 x2 : dfffff9000000000 [ 20.810693] x1 : ffffffc06acffad0 x0 : 0000000000000cb0 [ 20.811005] Call trace: [ 20.811214] uart_get_baud_rate+0xe8/0x190 [ 20.811479] serial8250_do_set_termios+0xe0/0x6f4 [ 20.811719] serial8250_set_termios+0x48/0x54 [ 20.811928] uart_set_options+0x138/0x1bc [ 20.812129] uart_poll_init+0x114/0x16c [ 20.812330] tty_find_polling_driver+0x158/0x200 [ 20.812545] configure_kgdboc+0xbc/0x1bc [ 20.812745] param_set_kgdboc_var+0xb8/0x150 [ 20.812960] param_attr_store+0xbc/0x150 [ 20.813160] module_attr_store+0x40/0x58 [ 20.813364] sysfs_kf_write+0x8c/0xa8 [ 20.813563] kernfs_fop_write+0x154/0x290 [ 20.813764] vfs_write+0xf0/0x278 [ 20.813951] __arm64_sys_write+0x84/0xf4 [ 20.814400] el0_svc_common+0xf4/0x1dc [ 20.814616] el0_svc_handler+0x98/0xbc [ 20.814804] el0_svc+0x8/0xc [ 20.822005] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 [ 20.826913] Mem abort info: [ 20.827103] ESR = 0x84000006 [ 20.827352] Exception class = IABT (current EL), IL = 16 bits [ 20.827655] SET = 0, FnV = 0 [ 20.827855] EA = 0, S1PTW = 0 [ 20.828135] user pgtable: 4k pages, 39-bit VAs, pgdp = (____ptrval____) [ 20.828484] [0000000000000000] pgd=00000000aadee003, pud=00000000aadee003, pmd=0000000000000000 [ 20.829195] Internal error: Oops: 84000006 [#1] SMP [ 20.829564] Modules linked in: [ 20.829890] CPU: 1 PID: 104 Comm: sh Tainted: G W 4.19.0-rc7ajb #8 [ 20.830545] Hardware name: linux,dummy-virt (DT) [ 20.830829] pstate: 60000085 (nZCv daIf -PAN -UAO) [ 20.831174] pc : (null) [ 20.831457] lr : serial8250_do_set_termios+0x358/0x6f4 [ 20.831727] sp : ffffffc06acff9b0 [ 20.831936] x29: ffffffc06acff9b0 x28: ffffff9008d7c000 [ 20.832267] x27: ffffff900969e16f x26: 0000000000000000 [ 20.832589] x25: ffffff900969dfb0 x24: 0000000000000000 [ 20.832906] x23: ffffffc06acffad0 x22: ffffff900969e160 [ 20.833232] x21: 0000000000000000 x20: ffffffc06acffac8 [ 20.833559] x19: ffffff900969df90 x18: 0000000000000000 [ 20.833878] x17: 0000000000000000 x16: 0000000000000000 [ 20.834491] x15: ffffffffffffffff x14: ffffffff00000000 [ 20.834821] x13: ffffffffffffffff x12: 0000000000000001 [ 20.835143] x11: 0101010101010101 x10: ffffff880d59ff5f [ 20.835467] x9 : ffffff880d59ff5e x8 : ffffffc06acffaf3 [ 20.835790] x7 : 0000000000000000 x6 : ffffff880d59ff5f [ 20.836111] x5 : c06419717c314100 x4 : 0000000000000007 [ 20.836419] x3 : 0000000000000000 x2 : 0000000000000000 [ 20.836732] x1 : 0000000000000001 x0 : ffffff900969df90 [ 20.837100] Process sh (pid: 104, stack limit = 0x(____ptrval____)) [ 20.837396] Call trace: [ 20.837566] (null) [ 20.837816] serial8250_set_termios+0x48/0x54 [ 20.838089] uart_set_options+0x138/0x1bc [ 20.838570] uart_poll_init+0x114/0x16c [ 20.838834] tty_find_polling_driver+0x158/0x200 [ 20.839119] configure_kgdboc+0xbc/0x1bc [ 20.839380] param_set_kgdboc_var+0xb8/0x150 [ 20.839658] param_attr_store+0xbc/0x150 [ 20.839920] module_attr_store+0x40/0x58 [ 20.840183] sysfs_kf_write+0x8c/0xa8 [ 20.840183] sysfs_kf_write+0x8c/0xa8 [ 20.840440] kernfs_fop_write+0x154/0x290 [ 20.840702] vfs_write+0xf0/0x278 [ 20.840942] __arm64_sys_write+0x84/0xf4 [ 20.841209] el0_svc_common+0xf4/0x1dc [ 20.841471] el0_svc_handler+0x98/0xbc [ 20.841713] el0_svc+0x8/0xc [ 20.842057] Code: bad PC value [ 20.842764] ---[ end trace a8835d7de79aaadf ]--- [ 20.843134] Kernel panic - not syncing: Fatal exception [ 20.843515] SMP: stopping secondary CPUs [ 20.844289] Kernel Offset: disabled [ 20.844634] CPU features: 0x0,21806002 [ 20.844857] Memory Limit: none [ 20.845172] ---[ end Kernel panic - not syncing: Fatal exception ]--- Signed-off-by: Miles Chen Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 8d65b2f9ee806..83376caa571b7 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -407,7 +407,7 @@ struct tty_driver *tty_find_polling_driver(char *name, int *line) mutex_lock(&tty_mutex); /* Search through the tty devices to look for a match */ list_for_each_entry(p, &tty_drivers, tty_drivers) { - if (strncmp(name, p->name, len) != 0) + if (!len || strncmp(name, p->name, len) != 0) continue; stp = str; if (*stp == ',') -- GitLab From 6c2449a04c43faf8b6ca52cb11617c77fd3ee4dc Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 10 Sep 2018 14:45:23 -0300 Subject: [PATCH 1486/2269] ARM: imx_v6_v7_defconfig: Select CONFIG_TMPFS_POSIX_ACL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 35d3cbe84544da74e39e1cec01374092467e3119 ] Andreas Müller reports: "Fixes: | Sep 04 09:05:10 imx6qdl-variscite-som systemd-udevd[220]: Failed to apply ACL on /dev/v4l-subdev0: Operation not supported | Sep 04 09:05:10 imx6qdl-variscite-som systemd-udevd[224]: Failed to apply ACL on /dev/v4l-subdev1: Operation not supported | Sep 04 09:05:10 imx6qdl-variscite-som systemd-udevd[215]: Failed to apply ACL on /dev/v4l-subdev10: Operation not supported | Sep 04 09:05:10 imx6qdl-variscite-som systemd-udevd[228]: Failed to apply ACL on /dev/v4l-subdev2: Operation not supported | Sep 04 09:05:10 imx6qdl-variscite-som systemd-udevd[232]: Failed to apply ACL on /dev/v4l-subdev5: Operation not supported | Sep 04 09:05:10 imx6qdl-variscite-som systemd-udevd[217]: Failed to apply ACL on /dev/v4l-subdev11: Operation not supported | Sep 04 09:05:10 imx6qdl-variscite-som systemd-udevd[214]: Failed to apply ACL on /dev/dri/card1: Operation not supported | Sep 04 09:05:10 imx6qdl-variscite-som systemd-udevd[216]: Failed to apply ACL on /dev/v4l-subdev8: Operation not supported | Sep 04 09:05:10 imx6qdl-variscite-som systemd-udevd[226]: Failed to apply ACL on /dev/v4l-subdev9: Operation not supported and nasty follow-ups: Starting weston from sddm as unpriviledged user fails with some hints on missing access rights." Select the CONFIG_TMPFS_POSIX_ACL option to fix these issues. Reported-by: Andreas Müller Signed-off-by: Fabio Estevam Acked-by: Otavio Salvador Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/arm/configs/imx_v6_v7_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 32acac9ab81aa..9c795ceedd5bb 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -383,6 +383,7 @@ CONFIG_ZISOFS=y CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=y +CONFIG_TMPFS_POSIX_ACL=y CONFIG_JFFS2_FS=y CONFIG_UBIFS_FS=y CONFIG_NFS_FS=y -- GitLab From 3374b0b15ec7c25aa98274aa95dbdea6d6d2af88 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Mon, 1 Oct 2018 16:21:51 +1000 Subject: [PATCH 1487/2269] powerpc/nohash: fix undefined behaviour when testing page size support [ Upstream commit f5e284803a7206d43e26f9ffcae5de9626d95e37 ] When enumerating page size definitions to check hardware support, we construct a constant which is (1U << (def->shift - 10)). However, the array of page size definitions is only initalised for various MMU_PAGE_* constants, so it contains a number of 0-initialised elements with def->shift == 0. This means we end up shifting by a very large number, which gives the following UBSan splat: ================================================================================ UBSAN: Undefined behaviour in /home/dja/dev/linux/linux/arch/powerpc/mm/tlb_nohash.c:506:21 shift exponent 4294967286 is too large for 32-bit type 'unsigned int' CPU: 0 PID: 0 Comm: swapper Not tainted 4.19.0-rc3-00045-ga604f927b012-dirty #6 Call Trace: [c00000000101bc20] [c000000000a13d54] .dump_stack+0xa8/0xec (unreliable) [c00000000101bcb0] [c0000000004f20a8] .ubsan_epilogue+0x18/0x64 [c00000000101bd30] [c0000000004f2b10] .__ubsan_handle_shift_out_of_bounds+0x110/0x1a4 [c00000000101be20] [c000000000d21760] .early_init_mmu+0x1b4/0x5a0 [c00000000101bf10] [c000000000d1ba28] .early_setup+0x100/0x130 [c00000000101bf90] [c000000000000528] start_here_multiplatform+0x68/0x80 ================================================================================ Fix this by first checking if the element exists (shift != 0) before constructing the constant. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/tlb_nohash.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index bfc4a08696092..2ae18e90e0bad 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -500,6 +500,9 @@ static void setup_page_sizes(void) for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { struct mmu_psize_def *def = &mmu_psize_defs[psize]; + if (!def->shift) + continue; + if (tlb1ps & (1U << (def->shift - 10))) { def->flags |= MMU_PAGE_SIZE_DIRECT; -- GitLab From 4cb592436db9ff3ff394e6d8d951545a15fc66e8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 13 Aug 2018 13:19:52 +0000 Subject: [PATCH 1488/2269] powerpc/mm: Don't report hugepage tables as memory leaks when using kmemleak [ Upstream commit 803d690e68f0c5230183f1a42c7d50a41d16e380 ] When a process allocates a hugepage, the following leak is reported by kmemleak. This is a false positive which is due to the pointer to the table being stored in the PGD as physical memory address and not virtual memory pointer. unreferenced object 0xc30f8200 (size 512): comm "mmap", pid 374, jiffies 4872494 (age 627.630s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] huge_pte_alloc+0xdc/0x1f8 [<9e0df1e1>] hugetlb_fault+0x560/0x8f8 [<7938ec6c>] follow_hugetlb_page+0x14c/0x44c [] __get_user_pages+0x1c4/0x3dc [] __mm_populate+0xac/0x140 [<3215421e>] vm_mmap_pgoff+0xb4/0xb8 [] ksys_mmap_pgoff+0xcc/0x1fc [<4fcd760f>] ret_from_syscall+0x0/0x38 See commit a984506c542e2 ("powerpc/mm: Don't report PUDs as memory leaks when using kmemleak") for detailed explanation. To fix that, this patch tells kmemleak to ignore the allocated hugepage table. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/hugetlbpage.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 4c9e5f9c7a44d..e2d929ddad7f7 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -110,6 +111,8 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, for (i = i - 1 ; i >= 0; i--, hpdp--) *hpdp = __hugepd(0); kmem_cache_free(cachep, new); + } else { + kmemleak_ignore(new); } spin_unlock(&mm->page_table_lock); return 0; -- GitLab From 547d528ea395d104dcb31a18b2d51a285e0973aa Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 26 Sep 2018 12:11:27 +0300 Subject: [PATCH 1489/2269] drm/omap: fix memory barrier bug in DMM driver [ Upstream commit 538f66ba204944470a653a4cccc5f8befdf97c22 ] A DMM timeout "timed out waiting for done" has been observed on DRA7 devices. The timeout happens rarely, and only when the system is under heavy load. Debugging showed that the timeout can be made to happen much more frequently by optimizing the DMM driver, so that there's almost no code between writing the last DMM descriptors to RAM, and writing to DMM register which starts the DMM transaction. The current theory is that a wmb() does not properly ensure that the data written to RAM is observable by all the components in the system. This DMM timeout has caused interesting (and rare) bugs as the error handling was not functioning properly (the error handling has been fixed in previous commits): * If a DMM timeout happened when a GEM buffer was being pinned for display on the screen, a timeout error would be shown, but the driver would continue programming DSS HW with broken buffer, leading to SYNCLOST floods and possible crashes. * If a DMM timeout happened when other user (say, video decoder) was pinning a GEM buffer, a timeout would be shown but if the user handled the error properly, no other issues followed. * If a DMM timeout happened when a GEM buffer was being released, the driver does not even notice the error, leading to crashes or hang later. This patch adds wmb() and readl() calls after the last bit is written to RAM, which should ensure that the execution proceeds only after the data is actually in RAM, and thus observable by DMM. The read-back should not be needed. Further study is required to understand if DMM is somehow special case and read-back is ok, or if DRA7's memory barriers do not work correctly. Signed-off-by: Tomi Valkeinen Signed-off-by: Peter Ujfalusi Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/omapdrm/omap_dmm_tiler.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c index df05fe53c399b..32901c6fe3dfc 100644 --- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c +++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c @@ -273,6 +273,17 @@ static int dmm_txn_commit(struct dmm_txn *txn, bool wait) } txn->last_pat->next_pa = 0; + /* ensure that the written descriptors are visible to DMM */ + wmb(); + + /* + * NOTE: the wmb() above should be enough, but there seems to be a bug + * in OMAP's memory barrier implementation, which in some rare cases may + * cause the writes not to be observable after wmb(). + */ + + /* read back to ensure the data is in RAM */ + readl(&txn->last_pat->next_pa); /* write to PAT_DESCR to clear out any pending transaction */ dmm_write(dmm, 0x0, reg[PAT_DESCR][engine->id]); -- GitLab From 18490ea7607c1c136b385075219e65760a656f05 Mon Sep 17 00:00:00 2001 From: John Garry Date: Sat, 22 Sep 2018 01:25:25 +0800 Subject: [PATCH 1490/2269] drm/hisilicon: hibmc: Do not carry error code in HiBMC framebuffer pointer [ Upstream commit 331d880b35a76b5de0eec8cbcecbf615d758a5f9 ] In hibmc_drm_fb_create(), when the call to hibmc_framebuffer_init() fails with error, do not store the error code in the HiBMC device frame-buffer pointer, as this will be later checked for non-zero value in hibmc_fbdev_destroy() when our intention is to check for a valid function pointer. This fixes the following crash: [ 9.699791] Unable to handle kernel NULL pointer dereference at virtual address 000000000000001a [ 9.708672] Mem abort info: [ 9.711489] ESR = 0x96000004 [ 9.714570] Exception class = DABT (current EL), IL = 32 bits [ 9.720551] SET = 0, FnV = 0 [ 9.723631] EA = 0, S1PTW = 0 [ 9.726799] Data abort info: [ 9.729702] ISV = 0, ISS = 0x00000004 [ 9.733573] CM = 0, WnR = 0 [ 9.736566] [000000000000001a] user address but active_mm is swapper [ 9.742987] Internal error: Oops: 96000004 [#1] PREEMPT SMP [ 9.748614] Modules linked in: [ 9.751694] CPU: 16 PID: 293 Comm: kworker/16:1 Tainted: G W 4.19.0-rc4-next-20180920-00001-g9b0012c #322 [ 9.762681] Hardware name: Huawei Taishan 2280 /D05, BIOS Hisilicon D05 IT21 Nemo 2.0 RC0 04/18/2018 [ 9.771915] Workqueue: events work_for_cpu_fn [ 9.776312] pstate: 60000005 (nZCv daif -PAN -UAO) [ 9.781150] pc : drm_mode_object_put+0x0/0x20 [ 9.785547] lr : hibmc_fbdev_fini+0x40/0x58 [ 9.789767] sp : ffff00000af1bcf0 [ 9.793108] x29: ffff00000af1bcf0 x28: 0000000000000000 [ 9.798473] x27: 0000000000000000 x26: ffff000008f66630 [ 9.803838] x25: 0000000000000000 x24: ffff0000095abb98 [ 9.809203] x23: ffff8017db92fe00 x22: ffff8017d2b13000 [ 9.814568] x21: ffffffffffffffea x20: ffff8017d2f80018 [ 9.819933] x19: ffff8017d28a0018 x18: ffffffffffffffff [ 9.825297] x17: 0000000000000000 x16: 0000000000000000 [ 9.830662] x15: ffff0000092296c8 x14: ffff00008939970f [ 9.836026] x13: ffff00000939971d x12: ffff000009229940 [ 9.841391] x11: ffff0000085f8fc0 x10: ffff00000af1b9a0 [ 9.846756] x9 : 000000000000000d x8 : 6620657a696c6169 [ 9.852121] x7 : ffff8017d3340580 x6 : ffff8017d4168000 [ 9.857486] x5 : 0000000000000000 x4 : ffff8017db92fb20 [ 9.862850] x3 : 0000000000002690 x2 : ffff8017d3340480 [ 9.868214] x1 : 0000000000000028 x0 : 0000000000000002 [ 9.873580] Process kworker/16:1 (pid: 293, stack limit = 0x(____ptrval____)) [ 9.880788] Call trace: [ 9.883252] drm_mode_object_put+0x0/0x20 [ 9.887297] hibmc_unload+0x1c/0x80 [ 9.890815] hibmc_pci_probe+0x170/0x3c8 [ 9.894773] local_pci_probe+0x3c/0xb0 [ 9.898555] work_for_cpu_fn+0x18/0x28 [ 9.902337] process_one_work+0x1e0/0x318 [ 9.906382] worker_thread+0x228/0x450 [ 9.910164] kthread+0x128/0x130 [ 9.913418] ret_from_fork+0x10/0x18 [ 9.917024] Code: a94153f3 a8c27bfd d65f03c0 d503201f (f9400c01) [ 9.923180] ---[ end trace 2695ffa0af5be375 ]--- Fixes: d1667b86795a ("drm/hisilicon/hibmc: Add support for frame buffer") Signed-off-by: John Garry Reviewed-by: Xinliang Liu Signed-off-by: Xinliang Liu Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c index b92595c477ef6..8bd29075ae4eb 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c @@ -122,6 +122,7 @@ static int hibmc_drm_fb_create(struct drm_fb_helper *helper, hi_fbdev->fb = hibmc_framebuffer_init(priv->dev, &mode_cmd, gobj); if (IS_ERR(hi_fbdev->fb)) { ret = PTR_ERR(hi_fbdev->fb); + hi_fbdev->fb = NULL; DRM_ERROR("failed to initialize framebuffer: %d\n", ret); goto out_release_fbi; } -- GitLab From 27f612940d5cf7b5d4bae316c4dd6dfead6a6ec6 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Sun, 9 Sep 2018 12:02:32 -0400 Subject: [PATCH 1491/2269] media: pci: cx23885: handle adding to list failure [ Upstream commit c5d59528e24ad22500347b199d52b9368e686a42 ] altera_hw_filt_init() which calls append_internal() assumes that the node was successfully linked in while in fact it can silently fail. So the call-site needs to set return to -ENOMEM on append_internal() returning NULL and exit through the err path. Fixes: 349bcf02e361 ("[media] Altera FPGA based CI driver module") Signed-off-by: Nicholas Mc Guire Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/pci/cx23885/altera-ci.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/media/pci/cx23885/altera-ci.c b/drivers/media/pci/cx23885/altera-ci.c index 5c94e312cba3d..f77254cc16bf9 100644 --- a/drivers/media/pci/cx23885/altera-ci.c +++ b/drivers/media/pci/cx23885/altera-ci.c @@ -665,6 +665,10 @@ static int altera_hw_filt_init(struct altera_ci_config *config, int hw_filt_nr) } temp_int = append_internal(inter); + if (!temp_int) { + ret = -ENOMEM; + goto err; + } inter->filts_used = 1; inter->dev = config->dev; inter->fpga_rw = config->fpga_rw; @@ -699,6 +703,7 @@ err: __func__, ret); kfree(pid_filt); + kfree(inter); return ret; } @@ -733,6 +738,10 @@ int altera_ci_init(struct altera_ci_config *config, int ci_nr) } temp_int = append_internal(inter); + if (!temp_int) { + ret = -ENOMEM; + goto err; + } inter->cis_used = 1; inter->dev = config->dev; inter->fpga_rw = config->fpga_rw; @@ -801,6 +810,7 @@ err: ci_dbg_print("%s: Cannot initialize CI: Error %d.\n", __func__, ret); kfree(state); + kfree(inter); return ret; } -- GitLab From e56c482e4e7f4d39a851ebceb7652be59d248208 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 1 Aug 2018 10:18:04 -0400 Subject: [PATCH 1492/2269] media: coda: don't overwrite h.264 profile_idc on decoder instance [ Upstream commit 1f32061e843205f6fe8404d5100d5adcec334e75 ] On a decoder instance, after the profile has been parsed from the stream __v4l2_ctrl_s_ctrl() is called to notify userspace about changes in the read-only profile control. This ends up calling back into the CODA driver where a missing check on the s_ctrl caused the profile information that has just been parsed from the stream to be overwritten with the default baseline profile. Later on the driver fails to enable frame reordering, based on the wrong profile information. Fixes: 347de126d1da (media: coda: add read-only h.264 decoder profile/level controls) Signed-off-by: Lucas Stach Reviewed-by: Philipp Zabel Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/coda/coda-common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/platform/coda/coda-common.c b/drivers/media/platform/coda/coda-common.c index 15eb5dc4dff9a..99d138d3f87f9 100644 --- a/drivers/media/platform/coda/coda-common.c +++ b/drivers/media/platform/coda/coda-common.c @@ -1686,7 +1686,8 @@ static int coda_s_ctrl(struct v4l2_ctrl *ctrl) break; case V4L2_CID_MPEG_VIDEO_H264_PROFILE: /* TODO: switch between baseline and constrained baseline */ - ctx->params.h264_profile_idc = 66; + if (ctx->inst_type == CODA_INST_ENCODER) + ctx->params.h264_profile_idc = 66; break; case V4L2_CID_MPEG_VIDEO_H264_LEVEL: /* nothing to do, this is set by the encoder */ -- GitLab From 806be82cd255646c8fa872529447be1289c12c6c Mon Sep 17 00:00:00 2001 From: Dengcheng Zhu Date: Tue, 11 Sep 2018 14:49:20 -0700 Subject: [PATCH 1493/2269] MIPS: kexec: Mark CPU offline before disabling local IRQ [ Upstream commit dc57aaf95a516f70e2d527d8287a0332c481a226 ] After changing CPU online status, it will not be sent any IPIs such as in __flush_cache_all() on software coherency systems. Do this before disabling local IRQ. Signed-off-by: Dengcheng Zhu Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/20571/ Cc: pburton@wavecomp.com Cc: ralf@linux-mips.org Cc: linux-mips@linux-mips.org Cc: rachel.mozes@intel.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/crash.c | 3 +++ arch/mips/kernel/machine_kexec.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/arch/mips/kernel/crash.c b/arch/mips/kernel/crash.c index d455363d51c3d..4c07a43a3242a 100644 --- a/arch/mips/kernel/crash.c +++ b/arch/mips/kernel/crash.c @@ -36,6 +36,9 @@ static void crash_shutdown_secondary(void *passed_regs) if (!cpu_online(cpu)) return; + /* We won't be sent IPIs any more. */ + set_cpu_online(cpu, false); + local_irq_disable(); if (!cpumask_test_cpu(cpu, &cpus_in_crash)) crash_save_cpu(regs, cpu); diff --git a/arch/mips/kernel/machine_kexec.c b/arch/mips/kernel/machine_kexec.c index 8b574bcd39ba8..4b3726e4fe3ac 100644 --- a/arch/mips/kernel/machine_kexec.c +++ b/arch/mips/kernel/machine_kexec.c @@ -118,6 +118,9 @@ machine_kexec(struct kimage *image) *ptr = (unsigned long) phys_to_virt(*ptr); } + /* Mark offline BEFORE disabling local irq. */ + set_cpu_online(smp_processor_id(), false); + /* * we do not want to be bothered. */ -- GitLab From f386e1e50e35ea51c6bd977eb4808c2f429a54df Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Fri, 14 Sep 2018 13:36:47 +0930 Subject: [PATCH 1494/2269] powerpc/boot: Ensure _zimage_start is a weak symbol [ Upstream commit ee9d21b3b3583712029a0db65a4b7c081d08d3b3 ] When building with clang crt0's _zimage_start is not marked weak, which breaks the build when linking the kernel image: $ objdump -t arch/powerpc/boot/crt0.o |grep _zimage_start$ 0000000000000058 g .text 0000000000000000 _zimage_start ld: arch/powerpc/boot/wrapper.a(crt0.o): in function '_zimage_start': (.text+0x58): multiple definition of '_zimage_start'; arch/powerpc/boot/pseries-head.o:(.text+0x0): first defined here Clang requires the .weak directive to appear after the symbol is declared. The binutils manual says: This directive sets the weak attribute on the comma separated list of symbol names. If the symbols do not already exist, they will be created. So it appears this is different with clang. The only reference I could see for this was an OpenBSD mailing list post[1]. Changing it to be after the declaration fixes building with Clang, and still works with GCC. $ objdump -t arch/powerpc/boot/crt0.o |grep _zimage_start$ 0000000000000058 w .text 0000000000000000 _zimage_start Reported to clang as https://bugs.llvm.org/show_bug.cgi?id=38921 [1] https://groups.google.com/forum/#!topic/fa.openbsd.tech/PAgKKen2YCY Signed-off-by: Joel Stanley Reviewed-by: Nick Desaulniers Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/boot/crt0.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S index dcf2f15e67971..32dfe6d083f32 100644 --- a/arch/powerpc/boot/crt0.S +++ b/arch/powerpc/boot/crt0.S @@ -47,8 +47,10 @@ p_end: .long _end p_pstack: .long _platform_stack_top #endif - .weak _zimage_start .globl _zimage_start + /* Clang appears to require the .weak directive to be after the symbol + * is defined. See https://bugs.llvm.org/show_bug.cgi?id=38921 */ + .weak _zimage_start _zimage_start: .globl _zimage_start_lib _zimage_start_lib: -- GitLab From 983721397fe0a6b2a5937da74b43d4bc0bbb13fe Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Fri, 17 Aug 2018 14:25:01 +1000 Subject: [PATCH 1495/2269] powerpc/memtrace: Remove memory in chunks [ Upstream commit 3f7daf3d7582dc6628ac40a9045dd1bbd80c5f35 ] When hot-removing memory release_mem_region_adjustable() splits iomem resources if they are not the exact size of the memory being hot-deleted. Adding this memory back to the kernel adds a new resource. Eg a node has memory 0x0 - 0xfffffffff. Hot-removing 1GB from 0xf40000000 results in the single resource 0x0-0xfffffffff being split into two resources: 0x0-0xf3fffffff and 0xf80000000-0xfffffffff. When we hot-add the memory back we now have three resources: 0x0-0xf3fffffff, 0xf40000000-0xf7fffffff, and 0xf80000000-0xfffffffff. This is an issue if we try to remove some memory that overlaps resources. Eg when trying to remove 2GB at address 0xf40000000, release_mem_region_adjustable() fails as it expects the chunk of memory to be within the boundaries of a single resource. We then get the warning: "Unable to release resource" and attempting to use memtrace again gives us this error: "bash: echo: write error: Resource temporarily unavailable" This patch makes memtrace remove memory in chunks that are always the same size from an address that is always equal to end_of_memory - n*size, for some n. So hotremoving and hotadding memory of different sizes will now not attempt to remove memory that spans multiple resources. Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/memtrace.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index fc222a0c2ac46..c9a6d4f3403ce 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -119,17 +119,15 @@ static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE, change_memblock_state); - lock_device_hotplug(); - remove_memory(nid, start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT); - unlock_device_hotplug(); return true; } static u64 memtrace_alloc_node(u32 nid, u64 size) { - u64 start_pfn, end_pfn, nr_pages; + u64 start_pfn, end_pfn, nr_pages, pfn; u64 base_pfn; + u64 bytes = memory_block_size_bytes(); if (!NODE_DATA(nid) || !node_spanned_pages(nid)) return 0; @@ -142,8 +140,21 @@ static u64 memtrace_alloc_node(u32 nid, u64 size) end_pfn = round_down(end_pfn - nr_pages, nr_pages); for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) { - if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) + if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) { + /* + * Remove memory in memory block size chunks so that + * iomem resources are always split to the same size and + * we never try to remove memory that spans two iomem + * resources. + */ + lock_device_hotplug(); + end_pfn = base_pfn + nr_pages; + for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) { + remove_memory(nid, pfn << PAGE_SHIFT, bytes); + } + unlock_device_hotplug(); return base_pfn << PAGE_SHIFT; + } } return 0; -- GitLab From f9cb913a7959321bf491b824e5f207a2520cd412 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 15 Sep 2018 14:01:12 +0800 Subject: [PATCH 1496/2269] MIPS/PCI: Call pcie_bus_configure_settings() to set MPS/MRRS [ Upstream commit 2794f688b2c336e0da85e9f91fed33febbd9f54a ] Call pcie_bus_configure_settings() on MIPS, like for other platforms. The function pcie_bus_configure_settings() makes sure the MPS (Max Payload Size) across the bus is uniform and provides the ability to tune the MRSS (Max Read Request Size) and MPS (Max Payload Size) to higher performance values. Some devices will not operate properly if these aren't set correctly because the firmware doesn't always do it. Signed-off-by: Huacai Chen Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/20649/ Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: Huacai Chen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/mips/pci/pci-legacy.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/mips/pci/pci-legacy.c b/arch/mips/pci/pci-legacy.c index 0c65c38e05d6c..1ae6bc414e2b0 100644 --- a/arch/mips/pci/pci-legacy.c +++ b/arch/mips/pci/pci-legacy.c @@ -127,8 +127,12 @@ static void pcibios_scanbus(struct pci_controller *hose) if (pci_has_flag(PCI_PROBE_ONLY)) { pci_bus_claim_resources(bus); } else { + struct pci_bus *child; + pci_bus_size_bridges(bus); pci_bus_assign_resources(bus); + list_for_each_entry(child, &bus->children, node) + pcie_bus_configure_settings(child); } pci_bus_add_devices(bus); } -- GitLab From 4d0df50d740ecfd8ca7831c739e5c98c7466a94a Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Wed, 12 Sep 2018 15:31:55 +0100 Subject: [PATCH 1497/2269] sc16is7xx: Fix for multi-channel stall [ Upstream commit 8344498721059754e09d30fe255a12dab8fb03ef ] The SC16IS752 is a dual-channel device. The two channels are largely independent, but the IRQ signals are wired together as an open-drain, active low signal which will be driven low while either of the channels requires attention, which can be for significant periods of time until operations complete and the interrupt can be acknowledged. In that respect it is should be treated as a true level-sensitive IRQ. The kernel, however, needs to be able to exit interrupt context in order to use I2C or SPI to access the device registers (which may involve sleeping). Therefore the interrupt needs to be masked out or paused in some way. The usual way to manage sleeping from within an interrupt handler is to use a threaded interrupt handler - a regular interrupt routine does the minimum amount of work needed to triage the interrupt before waking the interrupt service thread. If the threaded IRQ is marked as IRQF_ONESHOT the kernel will automatically mask out the interrupt until the thread runs to completion. The sc16is7xx driver used to use a threaded IRQ, but a patch switched to using a kthread_worker in order to set realtime priorities on the handler thread and for other optimisations. The end result is non-threaded IRQ that schedules some work then returns IRQ_HANDLED, making the kernel think that all IRQ processing has completed. The work-around to prevent a constant stream of interrupts is to mark the interrupt as edge-sensitive rather than level-sensitive, but interpreting an active-low source as a falling-edge source requires care to prevent a total cessation of interrupts. Whereas an edge-triggering source will generate a new edge for every interrupt condition a level-triggering source will keep the signal at the interrupting level until it no longer requires attention; in other words, the host won't see another edge until all interrupt conditions are cleared. It is therefore vital that the interrupt handler does not exit with an outstanding interrupt condition, otherwise the kernel will not receive another interrupt unless some other operation causes the interrupt state on the device to be cleared. The existing sc16is7xx driver has a very simple interrupt "thread" (kthread_work job) that processes interrupts on each channel in turn until there are no more. If both channels are active and the first channel starts interrupting while the handler for the second channel is running then it will not be detected and an IRQ stall ensues. This could be handled easily if there was a shared IRQ status register, or a convenient way to determine if the IRQ had been deasserted for any length of time, but both appear to be lacking. Avoid this problem (or at least make it much less likely to happen) by reducing the granularity of per-channel interrupt processing to one condition per iteration, only exiting the overall loop when both channels are no longer interrupting. Signed-off-by: Phil Elwell Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index ca54ce074a5f8..a79f18edf2bd4 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -662,7 +662,7 @@ static void sc16is7xx_handle_tx(struct uart_port *port) uart_write_wakeup(port); } -static void sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno) +static bool sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno) { struct uart_port *port = &s->p[portno].port; @@ -671,7 +671,7 @@ static void sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno) iir = sc16is7xx_port_read(port, SC16IS7XX_IIR_REG); if (iir & SC16IS7XX_IIR_NO_INT_BIT) - break; + return false; iir &= SC16IS7XX_IIR_ID_MASK; @@ -693,16 +693,23 @@ static void sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno) port->line, iir); break; } - } while (1); + } while (0); + return true; } static void sc16is7xx_ist(struct kthread_work *ws) { struct sc16is7xx_port *s = to_sc16is7xx_port(ws, irq_work); - int i; - for (i = 0; i < s->devtype->nr_uart; ++i) - sc16is7xx_port_irq(s, i); + while (1) { + bool keep_polling = false; + int i; + + for (i = 0; i < s->devtype->nr_uart; ++i) + keep_polling |= sc16is7xx_port_irq(s, i); + if (!keep_polling) + break; + } } static irqreturn_t sc16is7xx_irq(int irq, void *dev_id) -- GitLab From 3e59ed2e314373c6c91599b027e95b607f5cb584 Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Thu, 28 Jun 2018 12:20:33 -0400 Subject: [PATCH 1498/2269] media: tvp5150: fix width alignment during set_selection() [ Upstream commit bd24db04101f45a9c1d874fe21b0c7eab7bcadec ] The driver ignored the width alignment which exists due to the UYVY colorspace format. Fix the width alignment and make use of the the provided v4l2 helper function to set the width, height and all alignments in one. Fixes: 963ddc63e20d ("[media] media: tvp5150: Add cropping support") Signed-off-by: Marco Felsch Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/tvp5150.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/media/i2c/tvp5150.c b/drivers/media/i2c/tvp5150.c index 4d3e97f97c76b..b41f7fafb7314 100644 --- a/drivers/media/i2c/tvp5150.c +++ b/drivers/media/i2c/tvp5150.c @@ -900,9 +900,6 @@ static int tvp5150_set_selection(struct v4l2_subdev *sd, /* tvp5150 has some special limits */ rect.left = clamp(rect.left, 0, TVP5150_MAX_CROP_LEFT); - rect.width = clamp_t(unsigned int, rect.width, - TVP5150_H_MAX - TVP5150_MAX_CROP_LEFT - rect.left, - TVP5150_H_MAX - rect.left); rect.top = clamp(rect.top, 0, TVP5150_MAX_CROP_TOP); /* Calculate height based on current standard */ @@ -916,9 +913,16 @@ static int tvp5150_set_selection(struct v4l2_subdev *sd, else hmax = TVP5150_V_MAX_OTHERS; - rect.height = clamp_t(unsigned int, rect.height, + /* + * alignments: + * - width = 2 due to UYVY colorspace + * - height, image = no special alignment + */ + v4l_bound_align_image(&rect.width, + TVP5150_H_MAX - TVP5150_MAX_CROP_LEFT - rect.left, + TVP5150_H_MAX - rect.left, 1, &rect.height, hmax - TVP5150_MAX_CROP_TOP - rect.top, - hmax - rect.top); + hmax - rect.top, 0, 0); tvp5150_write(sd, TVP5150_VERT_BLANKING_START, rect.top); tvp5150_write(sd, TVP5150_VERT_BLANKING_STOP, -- GitLab From d3835bb8fa6758b4f38da3741e206192a6340f90 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 31 Jul 2018 17:55:57 -0300 Subject: [PATCH 1499/2269] powerpc/selftests: Wait all threads to join [ Upstream commit 693b31b2fc1636f0aa7af53136d3b49f6ad9ff39 ] Test tm-tmspr might exit before all threads stop executing, because it just waits for the very last thread to join before proceeding/exiting. This patch makes sure that all threads that were created will join before proceeding/exiting. This patch also guarantees that the amount of threads being created is equal to thread_num. Signed-off-by: Breno Leitao Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/powerpc/tm/tm-tmspr.c | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c index 2bda81c7bf237..df1d7d4b1c89f 100644 --- a/tools/testing/selftests/powerpc/tm/tm-tmspr.c +++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c @@ -98,7 +98,7 @@ void texasr(void *in) int test_tmspr() { - pthread_t thread; + pthread_t *thread; int thread_num; unsigned long i; @@ -107,21 +107,28 @@ int test_tmspr() /* To cause some context switching */ thread_num = 10 * sysconf(_SC_NPROCESSORS_ONLN); + thread = malloc(thread_num * sizeof(pthread_t)); + if (thread == NULL) + return EXIT_FAILURE; + /* Test TFIAR and TFHAR */ - for (i = 0 ; i < thread_num ; i += 2){ - if (pthread_create(&thread, NULL, (void*)tfiar_tfhar, (void *)i)) + for (i = 0; i < thread_num; i += 2) { + if (pthread_create(&thread[i], NULL, (void *)tfiar_tfhar, + (void *)i)) return EXIT_FAILURE; } - if (pthread_join(thread, NULL) != 0) - return EXIT_FAILURE; - /* Test TEXASR */ - for (i = 0 ; i < thread_num ; i++){ - if (pthread_create(&thread, NULL, (void*)texasr, (void *)i)) + for (i = 1; i < thread_num; i += 2) { + if (pthread_create(&thread[i], NULL, (void *)texasr, (void *)i)) return EXIT_FAILURE; } - if (pthread_join(thread, NULL) != 0) - return EXIT_FAILURE; + + for (i = 0; i < thread_num; i++) { + if (pthread_join(thread[i], NULL) != 0) + return EXIT_FAILURE; + } + + free(thread); if (passed) return 0; -- GitLab From 442b54290c782f22a848941064625cad3e263f39 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Thu, 13 Sep 2018 11:44:09 +0300 Subject: [PATCH 1500/2269] staging:iio:ad7606: fix voltage scales [ Upstream commit 4ee033301c898dd0835d035d0e0eb768a3d35da1 ] Fixes commit 17be2a2905a6ec9aa27cd59521495e2f490d2af0 ("staging: iio: ad7606: replace range/range_available with corresponding scale"). The AD7606 devices don't have a 2.5V voltage range, they have 5V & 10V voltage range, which is selectable via the `gpio_range` descriptor. The scales also seem to have been miscomputed, because when they were applied to the raw values, the results differ from the expected values. After checking the ADC transfer function in the datasheet, these were re-computed. Signed-off-by: Alexandru Ardelean Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/iio/adc/ad7606.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/staging/iio/adc/ad7606.c b/drivers/staging/iio/adc/ad7606.c index 18f5f139117ee..0ff458fbee5ec 100644 --- a/drivers/staging/iio/adc/ad7606.c +++ b/drivers/staging/iio/adc/ad7606.c @@ -26,9 +26,12 @@ #include "ad7606.h" -/* Scales are computed as 2.5/2**16 and 5/2**16 respectively */ +/* + * Scales are computed as 5000/32768 and 10000/32768 respectively, + * so that when applied to the raw values they provide mV values + */ static const unsigned int scale_avail[2][2] = { - {0, 38147}, {0, 76294} + {0, 152588}, {0, 305176} }; static int ad7606_reset(struct ad7606_state *st) -- GitLab From 491fc097e3d49f777d2e45ec3ae6341cff986f44 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Sat, 8 Sep 2018 01:18:43 +0900 Subject: [PATCH 1501/2269] 9p locks: fix glock.client_id leak in do_lock [ Upstream commit b4dc44b3cac9e8327e0655f530ed0c46f2e6214c ] the 9p client code overwrites our glock.client_id pointing to a static buffer by an allocated string holding the network provided value which we do not care about; free and reset the value as appropriate. This is almost identical to the leak in v9fs_file_getlock() fixed by Al Viro in commit ce85dd58ad5a6 ("9p: we are leaking glock.client_id in v9fs_file_getlock()"), which was returned as an error by a coverity false positive -- while we are here attempt to make the code slightly more robust to future change of the net/9p/client code and hopefully more clear to coverity that there is no problem. Link: http://lkml.kernel.org/r/1536339057-21974-5-git-send-email-asmadeus@codewreck.org Signed-off-by: Dominique Martinet Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/9p/vfs_file.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 03c9e325bfbc1..3a2f37ad1f89c 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -204,6 +204,14 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl) break; if (schedule_timeout_interruptible(P9_LOCK_TIMEOUT) != 0) break; + /* + * p9_client_lock_dotl overwrites flock.client_id with the + * server message, free and reuse the client name + */ + if (flock.client_id != fid->clnt->name) { + kfree(flock.client_id); + flock.client_id = fid->clnt->name; + } } /* map 9p status to VFS status */ @@ -235,6 +243,8 @@ out_unlock: locks_lock_file_wait(filp, fl); fl->fl_type = fl_type; } + if (flock.client_id != fid->clnt->name) + kfree(flock.client_id); out: return res; } @@ -269,7 +279,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl) res = p9_client_getlock_dotl(fid, &glock); if (res < 0) - return res; + goto out; /* map 9p lock type to os lock type */ switch (glock.type) { case P9_LOCK_TYPE_RDLCK: @@ -290,7 +300,9 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl) fl->fl_end = glock.start + glock.length - 1; fl->fl_pid = -glock.proc_id; } - kfree(glock.client_id); +out: + if (glock.client_id != fid->clnt->name) + kfree(glock.client_id); return res; } -- GitLab From 18280c1260154343e01accb1d52a0dd321cfc828 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Tue, 28 Aug 2018 07:32:35 +0900 Subject: [PATCH 1502/2269] 9p: clear dangling pointers in p9stat_free [ Upstream commit 62e3941776fea8678bb8120607039410b1b61a65 ] p9stat_free is more of a cleanup function than a 'free' function as it only frees the content of the struct; there are chances of use-after-free if it is improperly used (e.g. p9stat_free called twice as it used to be possible to) Clearing dangling pointers makes the function idempotent and safer to use. Link: http://lkml.kernel.org/r/1535410108-20650-2-git-send-email-asmadeus@codewreck.org Signed-off-by: Dominique Martinet Reported-by: syzbot+d4252148d198410b864f@syzkaller.appspotmail.com Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/9p/protocol.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 16e10680518c4..9743837aebc60 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -46,10 +46,15 @@ p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); void p9stat_free(struct p9_wstat *stbuf) { kfree(stbuf->name); + stbuf->name = NULL; kfree(stbuf->uid); + stbuf->uid = NULL; kfree(stbuf->gid); + stbuf->gid = NULL; kfree(stbuf->muid); + stbuf->muid = NULL; kfree(stbuf->extension); + stbuf->extension = NULL; } EXPORT_SYMBOL(p9stat_free); -- GitLab From a8c254d8e96032d5bb235cb2e777203d9acda09d Mon Sep 17 00:00:00 2001 From: Young_X Date: Wed, 3 Oct 2018 12:54:29 +0000 Subject: [PATCH 1503/2269] cdrom: fix improper type cast, which can leat to information leak. commit e4f3aa2e1e67bb48dfbaaf1cad59013d5a5bc276 upstream. There is another cast from unsigned long to int which causes a bounds check to fail with specially crafted input. The value is then used as an index in the slot array in cdrom_slot_status(). This issue is similar to CVE-2018-16658 and CVE-2018-10940. Signed-off-by: Young_X Signed-off-by: Jens Axboe Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/cdrom/cdrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 8cfa10ab7abcb..930b49606a8cd 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2441,7 +2441,7 @@ static int cdrom_ioctl_select_disc(struct cdrom_device_info *cdi, return -ENOSYS; if (arg != CDSL_CURRENT && arg != CDSL_NONE) { - if ((int)arg >= cdi->capacity) + if (arg >= cdi->capacity) return -EINVAL; } -- GitLab From 0afa17be1a78bf35085bffb7d0e04b96b766f3ba Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 10 Oct 2018 19:10:06 +0300 Subject: [PATCH 1504/2269] ovl: fix error handling in ovl_verify_set_fh() commit babf4770be0adc69e6d2de150f4040f175e24beb upstream. We hit a BUG on kfree of an ERR_PTR()... Reported-by: syzbot+ff03fe05c717b82502d0@syzkaller.appspotmail.com Fixes: 8b88a2e64036 ("ovl: verify upper root dir matches lower root dir") Cc: # v4.13 Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/namei.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index d9468de3c9510..8442f9839c902 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -368,8 +368,10 @@ int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt, fh = ovl_encode_fh(origin, is_upper); err = PTR_ERR(fh); - if (IS_ERR(fh)) + if (IS_ERR(fh)) { + fh = NULL; goto fail; + } err = ovl_verify_origin_fh(dentry, fh); if (set && err == -ENODATA) -- GitLab From c60d5af75baaa459aa3619a0487637315de4f83c Mon Sep 17 00:00:00 2001 From: Himanshu Madhani Date: Fri, 31 Aug 2018 11:24:27 -0700 Subject: [PATCH 1505/2269] scsi: qla2xxx: Fix incorrect port speed being set for FC adapters commit 4c1458df9635c7e3ced155f594d2e7dfd7254e21 upstream. Fixes: 6246b8a1d26c7c ("[SCSI] qla2xxx: Enhancements to support ISP83xx.") Fixes: 1bb395485160d2 ("qla2xxx: Correct iiDMA-update calling conventions.") Cc: Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_mbx.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 2d909e12e23a3..929ec087b8eb3 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -3682,10 +3682,7 @@ qla2x00_set_idma_speed(scsi_qla_host_t *vha, uint16_t loop_id, mcp->mb[0] = MBC_PORT_PARAMS; mcp->mb[1] = loop_id; mcp->mb[2] = BIT_0; - if (IS_CNA_CAPABLE(vha->hw)) - mcp->mb[3] = port_speed & (BIT_5|BIT_4|BIT_3|BIT_2|BIT_1|BIT_0); - else - mcp->mb[3] = port_speed & (BIT_2|BIT_1|BIT_0); + mcp->mb[3] = port_speed & (BIT_5|BIT_4|BIT_3|BIT_2|BIT_1|BIT_0); mcp->mb[9] = vha->vp_idx; mcp->out_mb = MBX_9|MBX_3|MBX_2|MBX_1|MBX_0; mcp->in_mb = MBX_3|MBX_1|MBX_0; -- GitLab From 89fdc7f8646eba683709c3ecd1b7fa2d60e3f06a Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Fri, 31 Aug 2018 11:24:26 -0700 Subject: [PATCH 1506/2269] scsi: qla2xxx: Fix process response queue for ISP26XX and above commit b86ac8fd4b2f6ec2f9ca9194c56eac12d620096f upstream. This patch improves performance for 16G and above adapter by removing additional call to process_response_queue(). [mkp: typo] Cc: Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_init.c | 2 -- drivers/scsi/qla2xxx/qla_iocb.c | 17 ----------------- 2 files changed, 19 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 0e19f6bc24ffb..de40202b9ac7e 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -7591,7 +7591,6 @@ qla81xx_nvram_config(scsi_qla_host_t *vha) } icb->firmware_options_2 &= cpu_to_le32( ~(BIT_3 | BIT_2 | BIT_1 | BIT_0)); - vha->flags.process_response_queue = 0; if (ha->zio_mode != QLA_ZIO_DISABLED) { ha->zio_mode = QLA_ZIO_MODE_6; @@ -7603,7 +7602,6 @@ qla81xx_nvram_config(scsi_qla_host_t *vha) icb->firmware_options_2 |= cpu_to_le32( (uint32_t)ha->zio_mode); icb->interrupt_delay_timer = cpu_to_le16(ha->zio_timer); - vha->flags.process_response_queue = 1; } /* enable RIDA Format2 */ diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 8d579bf0fc81b..85cb4e30f7427 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -1524,12 +1524,6 @@ qla24xx_start_scsi(srb_t *sp) /* Set chip new ring index. */ WRT_REG_DWORD(req->req_q_in, req->ring_index); - RD_REG_DWORD_RELAXED(&ha->iobase->isp24.hccr); - - /* Manage unprocessed RIO/ZIO commands in response queue. */ - if (vha->flags.process_response_queue && - rsp->ring_ptr->signature != RESPONSE_PROCESSED) - qla24xx_process_response_queue(vha, rsp); spin_unlock_irqrestore(&ha->hardware_lock, flags); return QLA_SUCCESS; @@ -1723,12 +1717,6 @@ qla24xx_dif_start_scsi(srb_t *sp) /* Set chip new ring index. */ WRT_REG_DWORD(req->req_q_in, req->ring_index); - RD_REG_DWORD_RELAXED(&ha->iobase->isp24.hccr); - - /* Manage unprocessed RIO/ZIO commands in response queue. */ - if (vha->flags.process_response_queue && - rsp->ring_ptr->signature != RESPONSE_PROCESSED) - qla24xx_process_response_queue(vha, rsp); spin_unlock_irqrestore(&ha->hardware_lock, flags); @@ -1878,11 +1866,6 @@ qla2xxx_start_scsi_mq(srb_t *sp) /* Set chip new ring index. */ WRT_REG_DWORD(req->req_q_in, req->ring_index); - /* Manage unprocessed RIO/ZIO commands in response queue. */ - if (vha->flags.process_response_queue && - rsp->ring_ptr->signature != RESPONSE_PROCESSED) - qla24xx_process_response_queue(vha, rsp); - spin_unlock_irqrestore(&qpair->qp_lock, flags); return QLA_SUCCESS; -- GitLab From 4b6f0ec528d6c56e7394dde2bb72ba2af35305ac Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 11 Sep 2018 10:18:24 -0700 Subject: [PATCH 1507/2269] scsi: qla2xxx: Remove stale debug trace message from tcm_qla2xxx commit 7c388f91ec1a59b0ed815b07b90536e2d57e1e1f upstream. Remove stale debug trace. Fixes: 1eb42f965ced ("qla2xxx: Make trace flags more readable") Cc: stable@vger.kernel.org #4.10 Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 3f82ea1b72dc8..9465acd18df03 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -693,10 +693,6 @@ static int tcm_qla2xxx_queue_status(struct se_cmd *se_cmd) cmd->sg_cnt = 0; cmd->offset = 0; cmd->dma_data_direction = target_reverse_dma_direction(se_cmd); - if (cmd->trc_flags & TRC_XMIT_STATUS) { - pr_crit("Multiple calls for status = %p.\n", cmd); - dump_stack(); - } cmd->trc_flags |= TRC_XMIT_STATUS; if (se_cmd->data_direction == DMA_FROM_DEVICE) { -- GitLab From 21339e8cf0024bf6068a118d985f28417afc1d66 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 11 Sep 2018 10:18:21 -0700 Subject: [PATCH 1508/2269] scsi: qla2xxx: shutdown chip if reset fail commit 1e4ac5d6fe0a4af17e4b6251b884485832bf75a3 upstream. If chip unable to fully initialize, use full shutdown sequence to clear out any stale FW state. Fixes: e315cd28b9ef ("[SCSI] qla2xxx: Code changes for qla data structure refactoring") Cc: stable@vger.kernel.org #4.10 Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index de40202b9ac7e..5c7d2628232d7 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -6077,7 +6077,7 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) * The next call disables the board * completely. */ - ha->isp_ops->reset_adapter(vha); + qla2x00_abort_isp_cleanup(vha); vha->flags.online = 0; clear_bit(ISP_ABORT_RETRY, &vha->dpc_flags); -- GitLab From 0a7a9ed0ce1d1e669b50d5763168d4cdddba58b2 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 26 Sep 2018 22:05:14 -0700 Subject: [PATCH 1509/2269] scsi: qla2xxx: Fix re-using LoopID when handle is in use commit 5c6400536481d9ef44ef94e7bf2c7b8e81534db7 upstream. This patch fixes issue where driver clears NPort ID map instead of marking handle in use. Once driver clears NPort ID from the database, it can reuse the same NPort ID resulting in a PLOGI failure. [mkp: fixed Himanshu's SoB] Fixes: a084fd68e1d2 ("scsi: qla2xxx: Fix re-login for Nport Handle in use") Cc: Signed-of-by: Quinn Tran Reviewed-by: Ewan D. Milne Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_init.c | 18 ++++-------------- drivers/scsi/qla2xxx/qla_target.c | 3 ++- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 5c7d2628232d7..041497c8ecd8f 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -1511,25 +1511,15 @@ qla24xx_handle_plogi_done_event(struct scsi_qla_host *vha, struct event_arg *ea) cid.b.rsvd_1 = 0; ql_dbg(ql_dbg_disc, vha, 0x20ec, - "%s %d %8phC LoopID 0x%x in use post gnl\n", + "%s %d %8phC lid %#x in use with pid %06x post gnl\n", __func__, __LINE__, ea->fcport->port_name, - ea->fcport->loop_id); + ea->fcport->loop_id, cid.b24); - if (IS_SW_RESV_ADDR(cid)) { - set_bit(ea->fcport->loop_id, vha->hw->loop_id_map); - ea->fcport->loop_id = FC_NO_LOOP_ID; - } else { - qla2x00_clear_loop_id(ea->fcport); - } + set_bit(ea->fcport->loop_id, vha->hw->loop_id_map); + ea->fcport->loop_id = FC_NO_LOOP_ID; qla24xx_post_gnl_work(vha, ea->fcport); break; case MBS_PORT_ID_USED: - ql_dbg(ql_dbg_disc, vha, 0x20ed, - "%s %d %8phC NPortId %02x%02x%02x inuse post gidpn\n", - __func__, __LINE__, ea->fcport->port_name, - ea->fcport->d_id.b.domain, ea->fcport->d_id.b.area, - ea->fcport->d_id.b.al_pa); - lid = ea->iop[1] & 0xffff; qlt_find_sess_invalidate_other(vha, wwn_to_u64(ea->fcport->port_name), diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index d6fe08de59a05..87e04c4a49821 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1203,7 +1203,8 @@ void qlt_schedule_sess_for_deletion(struct fc_port *sess, qla24xx_chk_fcp_state(sess); ql_dbg(ql_dbg_tgt, sess->vha, 0xe001, - "Scheduling sess %p for deletion\n", sess); + "Scheduling sess %p for deletion %8phC\n", + sess, sess->port_name); INIT_WORK(&sess->del_work, qla24xx_delete_sess_fn); queue_work(sess->vha->hw->wq, &sess->del_work); -- GitLab From d94b3a2375cbbd55e6961c7b0dd1f8a75ebc8e10 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 25 Sep 2018 12:28:55 +0300 Subject: [PATCH 1510/2269] fuse: Fix use-after-free in fuse_dev_do_read() commit bc78abbd55dd28e2287ec6d6502b842321a17c87 upstream. We may pick freed req in this way: [cpu0] [cpu1] fuse_dev_do_read() fuse_dev_do_write() list_move_tail(&req->list, ...); ... spin_unlock(&fpq->lock); ... ... request_end(fc, req); ... fuse_put_request(fc, req); if (test_bit(FR_INTERRUPTED, ...)) queue_interrupt(fiq, req); Fix that by keeping req alive until we finish all manipulations. Reported-by: syzbot+4e975615ca01f2277bdd@syzkaller.appspotmail.com Signed-off-by: Kirill Tkhai Signed-off-by: Miklos Szeredi Fixes: 46c34a348b0a ("fuse: no fc->lock for pqueue parts") Cc: # v4.2 Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index ee8105af40010..9a1ca92f97fb1 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1309,12 +1309,14 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, goto out_end; } list_move_tail(&req->list, &fpq->processing); + __fuse_get_request(req); spin_unlock(&fpq->lock); set_bit(FR_SENT, &req->flags); /* matches barrier in request_wait_answer() */ smp_mb__after_atomic(); if (test_bit(FR_INTERRUPTED, &req->flags)) queue_interrupt(fiq, req); + fuse_put_request(fc, req); return reqsize; -- GitLab From ab962e91008a93af5da403c652745883d19a6adf Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 25 Sep 2018 12:52:42 +0300 Subject: [PATCH 1511/2269] fuse: Fix use-after-free in fuse_dev_do_write() commit d2d2d4fb1f54eff0f3faa9762d84f6446a4bc5d0 upstream. After we found req in request_find() and released the lock, everything may happen with the req in parallel: cpu0 cpu1 fuse_dev_do_write() fuse_dev_do_write() req = request_find(fpq, ...) ... spin_unlock(&fpq->lock) ... ... req = request_find(fpq, oh.unique) ... spin_unlock(&fpq->lock) queue_interrupt(&fc->iq, req); ... ... ... ... ... request_end(fc, req); fuse_put_request(fc, req); ... queue_interrupt(&fc->iq, req); Signed-off-by: Kirill Tkhai Signed-off-by: Miklos Szeredi Fixes: 46c34a348b0a ("fuse: no fc->lock for pqueue parts") Cc: # v4.2 Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 9a1ca92f97fb1..48a88f94d5d85 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1874,16 +1874,20 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, /* Is it an interrupt reply? */ if (req->intr_unique == oh.unique) { + __fuse_get_request(req); spin_unlock(&fpq->lock); err = -EINVAL; - if (nbytes != sizeof(struct fuse_out_header)) + if (nbytes != sizeof(struct fuse_out_header)) { + fuse_put_request(fc, req); goto err_finish; + } if (oh.error == -ENOSYS) fc->no_interrupt = 1; else if (oh.error == -EAGAIN) queue_interrupt(&fc->iq, req); + fuse_put_request(fc, req); fuse_copy_finish(cs); return nbytes; -- GitLab From f6f21a2b70c6023c0b6ba103da62e214e02179bd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 28 Sep 2018 16:43:22 +0200 Subject: [PATCH 1512/2269] fuse: fix blocked_waitq wakeup commit 908a572b80f6e9577b45e81b3dfe2e22111286b8 upstream. Using waitqueue_active() is racy. Make sure we issue a wake_up() unconditionally after storing into fc->blocked. After that it's okay to optimize with waitqueue_active() since the first wake up provides the necessary barrier for all waiters, not the just the woken one. Signed-off-by: Miklos Szeredi Fixes: 3c18ef8117f0 ("fuse: optimize wake_up") Cc: # v3.10 Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 48a88f94d5d85..32537cfd29bb8 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -384,12 +384,19 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) if (test_bit(FR_BACKGROUND, &req->flags)) { spin_lock(&fc->lock); clear_bit(FR_BACKGROUND, &req->flags); - if (fc->num_background == fc->max_background) + if (fc->num_background == fc->max_background) { fc->blocked = 0; - - /* Wake up next waiter, if any */ - if (!fc->blocked && waitqueue_active(&fc->blocked_waitq)) wake_up(&fc->blocked_waitq); + } else if (!fc->blocked) { + /* + * Wake up next waiter, if any. It's okay to use + * waitqueue_active(), as we've already synced up + * fc->blocked with waiters with the wake_up() call + * above. + */ + if (waitqueue_active(&fc->blocked_waitq)) + wake_up(&fc->blocked_waitq); + } if (fc->num_background == fc->congestion_threshold && fc->sb) { clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC); -- GitLab From 78da72ee42d82cfd001e863510eeb270bdd08143 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 28 Sep 2018 16:43:22 +0200 Subject: [PATCH 1513/2269] fuse: set FR_SENT while locked commit 4c316f2f3ff315cb48efb7435621e5bfb81df96d upstream. Otherwise fuse_dev_do_write() could come in and finish off the request, and the set_bit(FR_SENT, ...) could trigger the WARN_ON(test_bit(FR_SENT, ...)) in request_end(). Signed-off-by: Miklos Szeredi Reported-by: syzbot+ef054c4d3f64cd7f7cec@syzkaller.appspotmai Fixes: 46c34a348b0a ("fuse: no fc->lock for pqueue parts") Cc: # v4.2 Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 32537cfd29bb8..a8b114ae931f9 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1317,8 +1317,8 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, } list_move_tail(&req->list, &fpq->processing); __fuse_get_request(req); - spin_unlock(&fpq->lock); set_bit(FR_SENT, &req->flags); + spin_unlock(&fpq->lock); /* matches barrier in request_wait_answer() */ smp_mb__after_atomic(); if (test_bit(FR_INTERRUPTED, &req->flags)) -- GitLab From 3c5cf7980bed91cc0f776eb9902b8b6adbe72edc Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 18 Oct 2018 09:45:49 +0300 Subject: [PATCH 1514/2269] ovl: fix recursive oi->lock in ovl_link() commit 6cd078702f2f33cb6b19a682de3e9184112f1a46 upstream. linking a non-copied-up file into a non-copied-up parent results in a nested call to mutex_lock_interruptible(&oi->lock). Fix this by copying up target parent before ovl_nlink_start(), same as done in ovl_rename(). ~/unionmount-testsuite$ ./run --ov -s ~/unionmount-testsuite$ ln /mnt/a/foo100 /mnt/a/dir100/ WARNING: possible recursive locking detected -------------------------------------------- ln/1545 is trying to acquire lock: 00000000bcce7c4c (&ovl_i_lock_key[depth]){+.+.}, at: ovl_copy_up_start+0x28/0x7d but task is already holding lock: 0000000026d73d5b (&ovl_i_lock_key[depth]){+.+.}, at: ovl_nlink_start+0x3c/0xc1 [SzM: this seems to be a false positive, but doing the copy-up first is harmless and removes the lockdep splat] Reported-by: syzbot+3ef5c0d1a5cb0b21e6be@syzkaller.appspotmail.com Fixes: 5f8415d6b87e ("ovl: persistent overlay inode nlink for...") Cc: # v4.13 Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi [amir: backport to v4.18] Signed-off-by: Amir Goldstein Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/dir.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index cc961a3bd3bde..0568898393f23 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -595,6 +595,11 @@ static int ovl_link(struct dentry *old, struct inode *newdir, if (err) goto out_drop_write; + err = ovl_copy_up(new->d_parent); + if (err) + goto out_drop_write; + + err = ovl_nlink_start(old, &locked); if (err) goto out_drop_write; -- GitLab From 639fa868ceeb70c59bec3bc54c828cc5566cca23 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 5 Sep 2018 17:33:08 +0800 Subject: [PATCH 1515/2269] MIPS: Loongson-3: Fix CPU UART irq delivery problem [ Upstream commit d06f8a2f1befb5a3d0aa660ab1c05e9b744456ea ] Masking/unmasking the CPU UART irq in CP0_Status (and redirecting it to other CPUs) may cause interrupts be lost, especially in multi-package machines (Package-0's UART irq cannot be delivered to others). So make mask_loongson_irq() and unmask_loongson_irq() be no-ops. The original problem (UART IRQ may deliver to any core) is also because of masking/unmasking the CPU UART irq in CP0_Status. So it is safe to remove all of the stuff. Signed-off-by: Huacai Chen Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/20433/ Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: Huacai Chen Signed-off-by: Sasha Levin --- arch/mips/loongson64/loongson-3/irq.c | 43 ++------------------------- 1 file changed, 3 insertions(+), 40 deletions(-) diff --git a/arch/mips/loongson64/loongson-3/irq.c b/arch/mips/loongson64/loongson-3/irq.c index cbeb20f9fc95c..2e115ab66a00f 100644 --- a/arch/mips/loongson64/loongson-3/irq.c +++ b/arch/mips/loongson64/loongson-3/irq.c @@ -102,45 +102,8 @@ static struct irqaction cascade_irqaction = { .name = "cascade", }; -static inline void mask_loongson_irq(struct irq_data *d) -{ - clear_c0_status(0x100 << (d->irq - MIPS_CPU_IRQ_BASE)); - irq_disable_hazard(); - - /* Workaround: UART IRQ may deliver to any core */ - if (d->irq == LOONGSON_UART_IRQ) { - int cpu = smp_processor_id(); - int node_id = cpu_logical_map(cpu) / loongson_sysconf.cores_per_node; - int core_id = cpu_logical_map(cpu) % loongson_sysconf.cores_per_node; - u64 intenclr_addr = smp_group[node_id] | - (u64)(&LOONGSON_INT_ROUTER_INTENCLR); - u64 introuter_lpc_addr = smp_group[node_id] | - (u64)(&LOONGSON_INT_ROUTER_LPC); - - *(volatile u32 *)intenclr_addr = 1 << 10; - *(volatile u8 *)introuter_lpc_addr = 0x10 + (1<irq == LOONGSON_UART_IRQ) { - int cpu = smp_processor_id(); - int node_id = cpu_logical_map(cpu) / loongson_sysconf.cores_per_node; - int core_id = cpu_logical_map(cpu) % loongson_sysconf.cores_per_node; - u64 intenset_addr = smp_group[node_id] | - (u64)(&LOONGSON_INT_ROUTER_INTENSET); - u64 introuter_lpc_addr = smp_group[node_id] | - (u64)(&LOONGSON_INT_ROUTER_LPC); - - *(volatile u32 *)intenset_addr = 1 << 10; - *(volatile u8 *)introuter_lpc_addr = 0x10 + (1<irq - MIPS_CPU_IRQ_BASE)); - irq_enable_hazard(); -} +static inline void mask_loongson_irq(struct irq_data *d) { } +static inline void unmask_loongson_irq(struct irq_data *d) { } /* For MIPS IRQs which shared by all cores */ static struct irq_chip loongson_irq_chip = { @@ -183,7 +146,7 @@ void __init mach_init_irq(void) chip->irq_set_affinity = plat_set_irq_affinity; irq_set_chip_and_handler(LOONGSON_UART_IRQ, - &loongson_irq_chip, handle_level_irq); + &loongson_irq_chip, handle_percpu_irq); /* setup HT1 irq */ setup_irq(LOONGSON_HT1_IRQ, &cascade_irqaction); -- GitLab From 14563f42672632609f637c88cfe7411d6a06ee64 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 5 Sep 2018 17:33:09 +0800 Subject: [PATCH 1516/2269] MIPS: Loongson-3: Fix BRIDGE irq delivery problem [ Upstream commit 360fe725f8849aaddc53475fef5d4a0c439b05ae ] After commit e509bd7da149dc349160 ("genirq: Allow migration of chained interrupts by installing default action") Loongson-3 fails at here: setup_irq(LOONGSON_HT1_IRQ, &cascade_irqaction); This is because both chained_action and cascade_irqaction don't have IRQF_SHARED flag. This will cause Loongson-3 resume fails because HPET timer interrupt can't be delivered during S3. So we set the irqchip of the chained irq to loongson_irq_chip which doesn't disable the chained irq in CP0.Status. Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/20434/ Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: Huacai Chen Signed-off-by: Sasha Levin --- arch/mips/include/asm/mach-loongson64/irq.h | 2 +- arch/mips/loongson64/loongson-3/irq.c | 13 +++---------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/arch/mips/include/asm/mach-loongson64/irq.h b/arch/mips/include/asm/mach-loongson64/irq.h index 3644b68c0cccd..be9f727a93280 100644 --- a/arch/mips/include/asm/mach-loongson64/irq.h +++ b/arch/mips/include/asm/mach-loongson64/irq.h @@ -10,7 +10,7 @@ #define MIPS_CPU_IRQ_BASE 56 #define LOONGSON_UART_IRQ (MIPS_CPU_IRQ_BASE + 2) /* UART */ -#define LOONGSON_HT1_IRQ (MIPS_CPU_IRQ_BASE + 3) /* HT1 */ +#define LOONGSON_BRIDGE_IRQ (MIPS_CPU_IRQ_BASE + 3) /* CASCADE */ #define LOONGSON_TIMER_IRQ (MIPS_CPU_IRQ_BASE + 7) /* CPU Timer */ #define LOONGSON_HT1_CFG_BASE loongson_sysconf.ht_control_base diff --git a/arch/mips/loongson64/loongson-3/irq.c b/arch/mips/loongson64/loongson-3/irq.c index 2e115ab66a00f..5605061f5f981 100644 --- a/arch/mips/loongson64/loongson-3/irq.c +++ b/arch/mips/loongson64/loongson-3/irq.c @@ -96,12 +96,6 @@ void mach_irq_dispatch(unsigned int pending) } } -static struct irqaction cascade_irqaction = { - .handler = no_action, - .flags = IRQF_NO_SUSPEND, - .name = "cascade", -}; - static inline void mask_loongson_irq(struct irq_data *d) { } static inline void unmask_loongson_irq(struct irq_data *d) { } @@ -147,11 +141,10 @@ void __init mach_init_irq(void) irq_set_chip_and_handler(LOONGSON_UART_IRQ, &loongson_irq_chip, handle_percpu_irq); + irq_set_chip_and_handler(LOONGSON_BRIDGE_IRQ, + &loongson_irq_chip, handle_percpu_irq); - /* setup HT1 irq */ - setup_irq(LOONGSON_HT1_IRQ, &cascade_irqaction); - - set_c0_status(STATUSF_IP2 | STATUSF_IP6); + set_c0_status(STATUSF_IP2 | STATUSF_IP3 | STATUSF_IP6); } #ifdef CONFIG_HOTPLUG_CPU -- GitLab From 1592c520d42724546c376d9e1e7aacd2f5de9c64 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 29 Oct 2018 18:30:13 -0700 Subject: [PATCH 1517/2269] xtensa: add NOTES section to the linker script commit 4119ba211bc4f1bf638f41e50b7a0f329f58aa16 upstream. This section collects all source .note.* sections together in the vmlinux image. Without it .note.Linux section may be placed at address 0, while the rest of the kernel is at its normal address, resulting in a huge vmlinux.bin image that may not be linked into the xtensa Image.elf. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/boot/Makefile | 2 +- arch/xtensa/kernel/vmlinux.lds.S | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/xtensa/boot/Makefile b/arch/xtensa/boot/Makefile index 53e4178711e69..8c20a7965bda0 100644 --- a/arch/xtensa/boot/Makefile +++ b/arch/xtensa/boot/Makefile @@ -34,7 +34,7 @@ boot-elf boot-redboot: $(addprefix $(obj)/,$(subdir-y)) \ $(addprefix $(obj)/,$(host-progs)) $(Q)$(MAKE) $(build)=$(obj)/$@ $(MAKECMDGOALS) -OBJCOPYFLAGS = --strip-all -R .comment -R .note.gnu.build-id -O binary +OBJCOPYFLAGS = --strip-all -R .comment -R .notes -O binary vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index 162c77e53ca84..3e04845a15e61 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -146,6 +146,7 @@ SECTIONS .fixup : { *(.fixup) } EXCEPTION_TABLE(16) + NOTES /* Data section */ _sdata = .; -- GitLab From f221f7b447f34778647bef9db70eed71648d4f28 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sun, 4 Nov 2018 01:46:00 -0700 Subject: [PATCH 1518/2269] xtensa: make sure bFLT stack is 16 byte aligned commit 0773495b1f5f1c5e23551843f87b5ff37e7af8f7 upstream. Xtensa ABI requires stack alignment to be at least 16. In noMMU configuration ARCH_SLAB_MINALIGN is used to align stack. Make it at least 16. This fixes the following runtime error in noMMU configuration, caused by interaction between insufficiently aligned stack and alloca function, that results in corruption of on-stack variable in the libc function glob: Caught unhandled exception in 'sh' (pid = 47, pc = 0x02d05d65) - should not happen EXCCAUSE is 15 Cc: stable@vger.kernel.org Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/include/asm/processor.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index 5b0027d4ecc05..a39cd81b741ad 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -24,7 +24,11 @@ # error Linux requires the Xtensa Windowed Registers Option. #endif -#define ARCH_SLAB_MINALIGN XCHAL_DATA_WIDTH +/* Xtensa ABI requires stack alignment to be at least 16 */ + +#define STACK_ALIGN (XCHAL_DATA_WIDTH > 16 ? XCHAL_DATA_WIDTH : 16) + +#define ARCH_SLAB_MINALIGN STACK_ALIGN /* * User space process size: 1 GB. -- GitLab From fa68fdf0a20137a416f5a5cce72aed098faa6858 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 13 Nov 2018 23:46:42 -0800 Subject: [PATCH 1519/2269] xtensa: fix boot parameters address translation commit 40dc948f234b73497c3278875eb08a01d5854d3f upstream. The bootloader may pass physical address of the boot parameters structure to the MMUv3 kernel in the register a2. Code in the _SetupMMU block in the arch/xtensa/kernel/head.S is supposed to map that physical address to the virtual address in the configured virtual memory layout. This code haven't been updated when additional 256+256 and 512+512 memory layouts were introduced and it may produce wrong addresses when used with these layouts. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/head.S | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S index 23ce62e604359..27c8e07ace43f 100644 --- a/arch/xtensa/kernel/head.S +++ b/arch/xtensa/kernel/head.S @@ -88,9 +88,12 @@ _SetupMMU: initialize_mmu #if defined(CONFIG_MMU) && XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY rsr a2, excsave1 - movi a3, 0x08000000 + movi a3, XCHAL_KSEG_PADDR + bltu a2, a3, 1f + sub a2, a2, a3 + movi a3, XCHAL_KSEG_SIZE bgeu a2, a3, 1f - movi a3, 0xd0000000 + movi a3, XCHAL_KSEG_CACHED_VADDR add a2, a2, a3 wsr a2, excsave1 1: -- GitLab From 5d33c77ba0fa216009728da96d33e0199b58e9ad Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Fri, 15 Jun 2018 16:42:54 +0200 Subject: [PATCH 1520/2269] um: Drop own definition of PTRACE_SYSEMU/_SINGLESTEP commit 0676b957c24bfb6e495449ba7b7e72c5b5d79233 upstream. 32bit UML used to define PTRACE_SYSEMU and PTRACE_SYSEMU_SINGLESTEP own its own because many years ago not all libcs had these request codes in their UAPI. These days PTRACE_SYSEMU/_SINGLESTEP is well known and part of glibc and our own define becomes problematic. With change c48831d0eebf ("linux/x86: sync sys/ptrace.h with Linux 4.14 [BZ #22433]") glibc turned PTRACE_SYSEMU/_SINGLESTEP into a enum and UML failed to build. Let's drop our define and rely on the fact that every libc has PTRACE_SYSEMU/_SINGLESTEP. Cc: Cc: Ritesh Raj Sarraf Reported-and-tested-by: Ritesh Raj Sarraf Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- arch/x86/um/shared/sysdep/ptrace_32.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/arch/x86/um/shared/sysdep/ptrace_32.h b/arch/x86/um/shared/sysdep/ptrace_32.h index b94a108de1dc8..ae00d22bce02c 100644 --- a/arch/x86/um/shared/sysdep/ptrace_32.h +++ b/arch/x86/um/shared/sysdep/ptrace_32.h @@ -10,20 +10,10 @@ static inline void update_debugregs(int seq) {} -/* syscall emulation path in ptrace */ - -#ifndef PTRACE_SYSEMU -#define PTRACE_SYSEMU 31 -#endif - void set_using_sysemu(int value); int get_using_sysemu(void); extern int sysemu_supported; -#ifndef PTRACE_SYSEMU_SINGLESTEP -#define PTRACE_SYSEMU_SINGLESTEP 32 -#endif - #define UPT_SYSCALL_ARG1(r) UPT_BX(r) #define UPT_SYSCALL_ARG2(r) UPT_CX(r) #define UPT_SYSCALL_ARG3(r) UPT_DX(r) -- GitLab From f074414aff064f7167756713ebdd8622592fe350 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 29 Aug 2018 21:20:10 +0200 Subject: [PATCH 1521/2269] clk: s2mps11: Fix matching when built as module and DT node contains compatible commit 8985167ecf57f97061599a155bb9652c84ea4913 upstream. When driver is built as module and DT node contains clocks compatible (e.g. "samsung,s2mps11-clk"), the module will not be autoloaded because module aliases won't match. The modalias from uevent: of:NclocksTCsamsung,s2mps11-clk The modalias from driver: platform:s2mps11-clk The devices are instantiated by parent's MFD. However both Device Tree bindings and parent define the compatible for clocks devices. In case of module matching this DT compatible will be used. The issue will not happen if this is a built-in (no need for module matching) or when clocks DT node does not contain compatible (not correct from bindings perspective but working for driver). Note when backporting to stable kernels: adjust the list of device ID entries. Cc: Fixes: 53c31b3437a6 ("mfd: sec-core: Add of_compatible strings for clock MFD cells") Signed-off-by: Krzysztof Kozlowski Acked-by: Stephen Boyd Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/clk-s2mps11.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/clk/clk-s2mps11.c b/drivers/clk/clk-s2mps11.c index fbaa84a33c46c..14071a57c9262 100644 --- a/drivers/clk/clk-s2mps11.c +++ b/drivers/clk/clk-s2mps11.c @@ -245,6 +245,36 @@ static const struct platform_device_id s2mps11_clk_id[] = { }; MODULE_DEVICE_TABLE(platform, s2mps11_clk_id); +#ifdef CONFIG_OF +/* + * Device is instantiated through parent MFD device and device matching is done + * through platform_device_id. + * + * However if device's DT node contains proper clock compatible and driver is + * built as a module, then the *module* matching will be done trough DT aliases. + * This requires of_device_id table. In the same time this will not change the + * actual *device* matching so do not add .of_match_table. + */ +static const struct of_device_id s2mps11_dt_match[] = { + { + .compatible = "samsung,s2mps11-clk", + .data = (void *)S2MPS11X, + }, { + .compatible = "samsung,s2mps13-clk", + .data = (void *)S2MPS13X, + }, { + .compatible = "samsung,s2mps14-clk", + .data = (void *)S2MPS14X, + }, { + .compatible = "samsung,s5m8767-clk", + .data = (void *)S5M8767X, + }, { + /* Sentinel */ + }, +}; +MODULE_DEVICE_TABLE(of, s2mps11_dt_match); +#endif + static struct platform_driver s2mps11_clk_driver = { .driver = { .name = "s2mps11-clk", -- GitLab From abe960b761eb6b3b32b099c80535b3d3943db040 Mon Sep 17 00:00:00 2001 From: Ronald Wahl Date: Wed, 10 Oct 2018 15:54:54 +0200 Subject: [PATCH 1522/2269] clk: at91: Fix division by zero in PLL recalc_rate() commit 0f5cb0e6225cae2f029944cb8c74617aab6ddd49 upstream. Commit a982e45dc150 ("clk: at91: PLL recalc_rate() now using cached MUL and DIV values") removed a check that prevents a division by zero. This now causes a stacktrace when booting the kernel on a at91 platform if the PLL DIV register contains zero. This commit reintroduces this check. Fixes: a982e45dc150 ("clk: at91: PLL recalc_rate() now using cached...") Cc: Signed-off-by: Ronald Wahl Acked-by: Ludovic Desroches Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/at91/clk-pll.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/clk/at91/clk-pll.c b/drivers/clk/at91/clk-pll.c index 72b6091eb7b94..dc7fbc796cb65 100644 --- a/drivers/clk/at91/clk-pll.c +++ b/drivers/clk/at91/clk-pll.c @@ -133,6 +133,9 @@ static unsigned long clk_pll_recalc_rate(struct clk_hw *hw, { struct clk_pll *pll = to_clk_pll(hw); + if (!pll->div || !pll->mul) + return 0; + return (parent_rate / pll->div) * (pll->mul + 1); } -- GitLab From 864aede99f55cfad8be37ae24a02bd92bc5606ad Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Tue, 16 Oct 2018 15:41:44 +0200 Subject: [PATCH 1523/2269] clk: rockchip: Fix static checker warning in rockchip_ddrclk_get_parent call commit 665636b2940d0897c4130253467f5e8c42eea392 upstream. Fixes the signedness bug returning '(-22)' on the return type by removing the sanity checker in rockchip_ddrclk_get_parent(). The function should return and unsigned value only and it's safe to remove the sanity checker as the core functions that call get_parent like clk_core_get_parent_by_index already ensures the validity of the clk index returned (index >= core->num_parents). Fixes: a4f182bf81f18 ("clk: rockchip: add new clock-type for the ddrclk") Cc: stable@vger.kernel.org Signed-off-by: Enric Balletbo i Serra Reviewed-by: Stephen Boyd Signed-off-by: Heiko Stuebner Signed-off-by: Greg Kroah-Hartman --- drivers/clk/rockchip/clk-ddr.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/clk/rockchip/clk-ddr.c b/drivers/clk/rockchip/clk-ddr.c index e8075359366b0..ebce5260068b7 100644 --- a/drivers/clk/rockchip/clk-ddr.c +++ b/drivers/clk/rockchip/clk-ddr.c @@ -80,16 +80,12 @@ static long rockchip_ddrclk_sip_round_rate(struct clk_hw *hw, static u8 rockchip_ddrclk_get_parent(struct clk_hw *hw) { struct rockchip_ddrclk *ddrclk = to_rockchip_ddrclk_hw(hw); - int num_parents = clk_hw_get_num_parents(hw); u32 val; val = clk_readl(ddrclk->reg_base + ddrclk->mux_offset) >> ddrclk->mux_shift; val &= GENMASK(ddrclk->mux_width - 1, 0); - if (val >= num_parents) - return -EINVAL; - return val; } -- GitLab From d2aaeb9a8e05442129694d8f206685f01644ba75 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Thu, 24 May 2018 17:23:41 +1200 Subject: [PATCH 1524/2269] clk: mvebu: use correct bit for 98DX3236 NAND commit 00c5a926af12a9f0236928dab3dc9faf621406a1 upstream. The correct fieldbit value for the NAND PLL reload trigger is 27. Fixes: commit e120c17a70e5 ("clk: mvebu: support for 98DX3236 SoC") Signed-off-by: Chris Packham Signed-off-by: Stephen Boyd Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/clk/mvebu/clk-corediv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/mvebu/clk-corediv.c b/drivers/clk/mvebu/clk-corediv.c index 8491979f40965..68f05c53d40e1 100644 --- a/drivers/clk/mvebu/clk-corediv.c +++ b/drivers/clk/mvebu/clk-corediv.c @@ -72,7 +72,7 @@ static const struct clk_corediv_desc mvebu_corediv_desc[] = { }; static const struct clk_corediv_desc mv98dx3236_corediv_desc[] = { - { .mask = 0x0f, .offset = 6, .fieldbit = 26 }, /* NAND clock */ + { .mask = 0x0f, .offset = 6, .fieldbit = 27 }, /* NAND clock */ }; #define to_corediv_clk(p) container_of(p, struct clk_corediv, hw) -- GitLab From 65dd3e59e96f0e196526f0634c1b4a39b42e0911 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Thu, 4 Oct 2018 17:29:03 -0400 Subject: [PATCH 1525/2269] media: ov7670: make "xclk" clock optional commit 786fa584eda86d6598db3b87c61dc81f68808d11 upstream. When the "xclk" clock was added, it was made mandatory. This broke the driver on an OLPC plaform which doesn't know such clock. Make it optional. Tested on a OLPC XO-1 laptop. Fixes: 0a024d634cee ("[media] ov7670: get xclk") Cc: stable@vger.kernel.org # 4.11+ Signed-off-by: Lubomir Rintel Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/ov7670.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/media/i2c/ov7670.c b/drivers/media/i2c/ov7670.c index e88549f0e7046..39ff73a6a8079 100644 --- a/drivers/media/i2c/ov7670.c +++ b/drivers/media/i2c/ov7670.c @@ -1612,23 +1612,29 @@ static int ov7670_probe(struct i2c_client *client, info->pclk_hb_disable = true; } - info->clk = devm_clk_get(&client->dev, "xclk"); - if (IS_ERR(info->clk)) - return PTR_ERR(info->clk); - ret = clk_prepare_enable(info->clk); - if (ret) - return ret; + info->clk = devm_clk_get(&client->dev, "xclk"); /* optional */ + if (IS_ERR(info->clk)) { + ret = PTR_ERR(info->clk); + if (ret == -ENOENT) + info->clk = NULL; + else + return ret; + } + if (info->clk) { + ret = clk_prepare_enable(info->clk); + if (ret) + return ret; + info->clock_speed = clk_get_rate(info->clk) / 1000000; + if (info->clock_speed < 10 || info->clock_speed > 48) { + ret = -EINVAL; + goto clk_disable; + } + } ret = ov7670_init_gpio(client, info); if (ret) goto clk_disable; - info->clock_speed = clk_get_rate(info->clk) / 1000000; - if (info->clock_speed < 10 || info->clock_speed > 48) { - ret = -EINVAL; - goto clk_disable; - } - /* Make sure it's an ov7670 */ ret = ov7670_detect(sd); if (ret) { -- GitLab From fc07f543a6d39399ba425c5bc588920fbdf65f39 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 26 Sep 2018 18:03:16 +0200 Subject: [PATCH 1526/2269] libceph: bump CEPH_MSG_MAX_DATA_LEN commit 94e6992bb560be8bffb47f287194adf070b57695 upstream. If the read is large enough, we end up spinning in the messenger: libceph: osd0 192.168.122.1:6801 io error libceph: osd0 192.168.122.1:6801 io error libceph: osd0 192.168.122.1:6801 io error This is a receive side limit, so only reads were affected. Cc: stable@vger.kernel.org Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- include/linux/ceph/libceph.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index c2ec44cf5098a..d3b04f9589a9a 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -81,7 +81,13 @@ struct ceph_options { #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) -#define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) + +/* + * Handle the largest possible rbd object in one message. + * There is no limit on the size of cephfs objects, but it has to obey + * rsize and wsize mount options anyway. + */ +#define CEPH_MSG_MAX_DATA_LEN (32*1024*1024) #define CEPH_AUTH_NAME_DEFAULT "guest" -- GitLab From 32ffde79e88f5a943d89b04ca3821ac8fb1ce7e5 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 27 Sep 2018 21:16:05 +0800 Subject: [PATCH 1527/2269] Revert "ceph: fix dentry leak in splice_dentry()" commit efe328230dc01aa0b1269aad0b5fae73eea4677a upstream. This reverts commit 8b8f53af1ed9df88a4c0fbfdf3db58f62060edf3. splice_dentry() is used by three places. For two places, req->r_dentry is passed to splice_dentry(). In the case of error, req->r_dentry does not get updated. So splice_dentry() should not drop reference. Cc: stable@vger.kernel.org # 4.18+ Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/inode.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index d5124ed35154e..a1492bdc6d030 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1087,8 +1087,12 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in) if (IS_ERR(realdn)) { pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n", PTR_ERR(realdn), dn, in, ceph_vinop(in)); - dput(dn); - dn = realdn; /* note realdn contains the error */ + dn = realdn; + /* + * Caller should release 'dn' in the case of error. + * If 'req->r_dentry' is passed to this function, + * caller should leave 'req->r_dentry' untouched. + */ goto out; } else if (realdn) { dout("dn %p (%d) spliced with %p (%d) " -- GitLab From 3b29cbb592bd712428607fee1ab214d42348719f Mon Sep 17 00:00:00 2001 From: Allen Wild Date: Tue, 26 Sep 2017 19:37:44 +0200 Subject: [PATCH 1528/2269] thermal: enable broadcom menu for arm64 bcm2835 commit fec3624f0bcdb6b20ef9ccf9d9d55d0d75d776f8 upstream. Moving the bcm2835 thermal driver to the broadcom directory prevented it from getting enabled for arm64 builds, since the broadcom directory is only available when 32-bit specific ARCH_BCM is set. Fix this by enabling the Broadcom menu for ARCH_BCM or ARCH_BCM2835. Fixes: 6892cf07e733 ("thermal: bcm2835: move to the broadcom subdirectory") Reviewed-by: Eric Anholt Signed-off-by: Allen Wild Signed-off-by: Stefan Wahren Signed-off-by: Eduardo Valentin Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 07002df4f83ac..e3f0d1fd17203 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -408,7 +408,7 @@ config MTK_THERMAL controller present in Mediatek SoCs menu "Broadcom thermal drivers" -depends on ARCH_BCM || COMPILE_TEST +depends on ARCH_BCM || ARCH_BCM2835 || COMPILE_TEST source "drivers/thermal/broadcom/Kconfig" endmenu -- GitLab From 07ea136219e0bbb653bb15247f1c1838d635f3bb Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 8 Oct 2018 12:57:34 +0200 Subject: [PATCH 1529/2269] mach64: fix display corruption on big endian machines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3c6c6a7878d00a3ac997a779c5b9861ff25dfcc8 upstream. The code for manual bit triple is not endian-clean. It builds the variable "hostdword" using byte accesses, therefore we must read the variable with "le32_to_cpu". The patch also enables (hardware or software) bit triple only if the image is monochrome (image->depth). If we want to blit full-color image, we shouldn't use the triple code. Signed-off-by: Mikulas Patocka Reviewed-by: Ville Syrjälä Cc: stable@vger.kernel.org Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/aty/mach64_accel.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/aty/mach64_accel.c b/drivers/video/fbdev/aty/mach64_accel.c index 2541a0e0de763..7c53a2380cc81 100644 --- a/drivers/video/fbdev/aty/mach64_accel.c +++ b/drivers/video/fbdev/aty/mach64_accel.c @@ -345,7 +345,7 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) * since Rage 3D IIc we have DP_HOST_TRIPLE_EN bit * this hwaccelerated triple has an issue with not aligned data */ - if (M64_HAS(HW_TRIPLE) && image->width % 8 == 0) + if (image->depth == 1 && M64_HAS(HW_TRIPLE) && image->width % 8 == 0) pix_width |= DP_HOST_TRIPLE_EN; } @@ -382,7 +382,7 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) src_bytes = (((image->width * image->depth) + 7) / 8) * image->height; /* manual triple each pixel */ - if (info->var.bits_per_pixel == 24 && !(pix_width & DP_HOST_TRIPLE_EN)) { + if (image->depth == 1 && info->var.bits_per_pixel == 24 && !(pix_width & DP_HOST_TRIPLE_EN)) { int inbit, outbit, mult24, byte_id_in_dword, width; u8 *pbitmapin = (u8*)image->data, *pbitmapout; u32 hostdword; @@ -415,7 +415,7 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) } } wait_for_fifo(1, par); - aty_st_le32(HOST_DATA0, hostdword, par); + aty_st_le32(HOST_DATA0, le32_to_cpu(hostdword), par); } } else { u32 *pbitmap, dwords = (src_bytes + 3) / 4; -- GitLab From 859eac9470c8c9493b8254f05e378f8a7ccaf642 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 8 Oct 2018 12:57:35 +0200 Subject: [PATCH 1530/2269] mach64: fix image corruption due to reading accelerator registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c09bcc91bb94ed91f1391bffcbe294963d605732 upstream. Reading the registers without waiting for engine idle returns unpredictable values. These unpredictable values result in display corruption - if atyfb_imageblit reads the content of DP_PIX_WIDTH with the bit DP_HOST_TRIPLE_EN set (from previous invocation), the driver would never ever clear the bit, resulting in display corruption. We don't want to wait for idle because it would degrade performance, so this patch modifies the driver so that it never reads accelerator registers. HOST_CNTL doesn't have to be read, we can just write it with HOST_BYTE_ALIGN because no other part of the driver cares if HOST_BYTE_ALIGN is set. DP_PIX_WIDTH is written in the functions atyfb_copyarea and atyfb_fillrect with the default value and in atyfb_imageblit with the value set according to the source image data. Signed-off-by: Mikulas Patocka Reviewed-by: Ville Syrjälä Cc: stable@vger.kernel.org Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/aty/mach64_accel.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/video/fbdev/aty/mach64_accel.c b/drivers/video/fbdev/aty/mach64_accel.c index 7c53a2380cc81..3ad46255f9904 100644 --- a/drivers/video/fbdev/aty/mach64_accel.c +++ b/drivers/video/fbdev/aty/mach64_accel.c @@ -127,7 +127,7 @@ void aty_init_engine(struct atyfb_par *par, struct fb_info *info) /* set host attributes */ wait_for_fifo(13, par); - aty_st_le32(HOST_CNTL, 0, par); + aty_st_le32(HOST_CNTL, HOST_BYTE_ALIGN, par); /* set pattern attributes */ aty_st_le32(PAT_REG0, 0, par); @@ -233,7 +233,8 @@ void atyfb_copyarea(struct fb_info *info, const struct fb_copyarea *area) rotation = rotation24bpp(dx, direction); } - wait_for_fifo(4, par); + wait_for_fifo(5, par); + aty_st_le32(DP_PIX_WIDTH, par->crtc.dp_pix_width, par); aty_st_le32(DP_SRC, FRGD_SRC_BLIT, par); aty_st_le32(SRC_Y_X, (sx << 16) | sy, par); aty_st_le32(SRC_HEIGHT1_WIDTH1, (width << 16) | area->height, par); @@ -269,7 +270,8 @@ void atyfb_fillrect(struct fb_info *info, const struct fb_fillrect *rect) rotation = rotation24bpp(dx, DST_X_LEFT_TO_RIGHT); } - wait_for_fifo(3, par); + wait_for_fifo(4, par); + aty_st_le32(DP_PIX_WIDTH, par->crtc.dp_pix_width, par); aty_st_le32(DP_FRGD_CLR, color, par); aty_st_le32(DP_SRC, BKGD_SRC_BKGD_CLR | FRGD_SRC_FRGD_CLR | MONO_SRC_ONE, @@ -284,7 +286,7 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) { struct atyfb_par *par = (struct atyfb_par *) info->par; u32 src_bytes, dx = image->dx, dy = image->dy, width = image->width; - u32 pix_width_save, pix_width, host_cntl, rotation = 0, src, mix; + u32 pix_width, rotation = 0, src, mix; if (par->asleep) return; @@ -296,8 +298,7 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) return; } - pix_width = pix_width_save = aty_ld_le32(DP_PIX_WIDTH, par); - host_cntl = aty_ld_le32(HOST_CNTL, par) | HOST_BYTE_ALIGN; + pix_width = par->crtc.dp_pix_width; switch (image->depth) { case 1: @@ -370,12 +371,11 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) mix = FRGD_MIX_D_XOR_S | BKGD_MIX_D; } - wait_for_fifo(6, par); - aty_st_le32(DP_WRITE_MASK, 0xFFFFFFFF, par); + wait_for_fifo(5, par); aty_st_le32(DP_PIX_WIDTH, pix_width, par); aty_st_le32(DP_MIX, mix, par); aty_st_le32(DP_SRC, src, par); - aty_st_le32(HOST_CNTL, host_cntl, par); + aty_st_le32(HOST_CNTL, HOST_BYTE_ALIGN, par); aty_st_le32(DST_CNTL, DST_Y_TOP_TO_BOTTOM | DST_X_LEFT_TO_RIGHT | rotation, par); draw_rect(dx, dy, width, image->height, par); @@ -424,8 +424,4 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) aty_st_le32(HOST_DATA0, get_unaligned_le32(pbitmap), par); } } - - /* restore pix_width */ - wait_for_fifo(1, par); - aty_st_le32(DP_PIX_WIDTH, pix_width_save, par); } -- GitLab From b3d3180689f1c433354ed01aa405ffee810df5f9 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 25 Jul 2018 19:47:19 -0500 Subject: [PATCH 1531/2269] reset: hisilicon: fix potential NULL pointer dereference commit e9a2310fb689151166df7fd9971093362d34bd79 upstream. There is a potential execution path in which function platform_get_resource() returns NULL. If this happens, we will end up having a NULL pointer dereference. Fix this by replacing devm_ioremap with devm_ioremap_resource, which has the NULL check and the memory region request. This code was detected with the help of Coccinelle. Cc: stable@vger.kernel.org Fixes: 97b7129cd2af ("reset: hisilicon: change the definition of hisi_reset_init") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/hisilicon/reset.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/clk/hisilicon/reset.c b/drivers/clk/hisilicon/reset.c index 2a5015c736ce6..43e82fa644226 100644 --- a/drivers/clk/hisilicon/reset.c +++ b/drivers/clk/hisilicon/reset.c @@ -109,9 +109,8 @@ struct hisi_reset_controller *hisi_reset_init(struct platform_device *pdev) return NULL; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - rstc->membase = devm_ioremap(&pdev->dev, - res->start, resource_size(res)); - if (!rstc->membase) + rstc->membase = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(rstc->membase)) return NULL; spin_lock_init(&rstc->lock); -- GitLab From 56bce0e5d5cca4377879fd983d454418f6a87263 Mon Sep 17 00:00:00 2001 From: Greg Edwards Date: Wed, 22 Aug 2018 13:21:53 -0600 Subject: [PATCH 1532/2269] vhost/scsi: truncate T10 PI iov_iter to prot_bytes commit 4542d623c7134bc1738f8a68ccb6dd546f1c264f upstream. Commands with protection information included were not truncating the protection iov_iter to the number of protection bytes in the command. This resulted in vhost_scsi mis-calculating the size of the protection SGL in vhost_scsi_calc_sgls(), and including both the protection and data SG entries in the protection SGL. Fixes: 09b13fa8c1a1 ("vhost/scsi: Add ANY_LAYOUT support in vhost_scsi_handle_vq") Signed-off-by: Greg Edwards Signed-off-by: Michael S. Tsirkin Fixes: 09b13fa8c1a1093e9458549ac8bb203a7c65c62a Cc: stable@vger.kernel.org Reviewed-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/scsi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index e47c5bc3ddcad..35ebf06d9ecb5 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -993,7 +993,8 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesin); } /* - * Set prot_iter to data_iter, and advance past any + * Set prot_iter to data_iter and truncate it to + * prot_bytes, and advance data_iter past any * preceeding prot_bytes that may be present. * * Also fix up the exp_data_len to reflect only the @@ -1002,6 +1003,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) if (prot_bytes) { exp_data_len -= prot_bytes; prot_iter = data_iter; + iov_iter_truncate(&prot_iter, prot_bytes); iov_iter_advance(&data_iter, prot_bytes); } tag = vhost64_to_cpu(vq, v_req_pi.tag); -- GitLab From 2cbff556f2e153af48564b125711417b7e6dc6ad Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 6 Nov 2018 00:51:21 -0800 Subject: [PATCH 1533/2269] scsi: qla2xxx: Initialize port speed to avoid setting lower speed commit f635e48e866ee1a47d2d42ce012fdcc07bf55853 upstream. This patch initializes port speed so that firmware does not set lower operating speed. Setting lower speed in firmware impacts WRITE perfomance. Fixes: 726b85487067 ("qla2xxx: Add framework for async fabric discovery") Cc: Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Tested-by: Laurence Oberman Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 041497c8ecd8f..aef1e1a555350 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -4236,6 +4236,7 @@ qla2x00_alloc_fcport(scsi_qla_host_t *vha, gfp_t flags) fcport->loop_id = FC_NO_LOOP_ID; qla2x00_set_fcport_state(fcport, FCS_UNCONFIGURED); fcport->supported_classes = FC_COS_UNSPECIFIED; + fcport->fp_speed = PORT_SPEED_UNKNOWN; fcport->ct_desc.ct_sns = dma_alloc_coherent(&vha->hw->pdev->dev, sizeof(struct ct_sns_pkt), &fcport->ct_desc.ct_sns_dma, -- GitLab From a061395786ef57f96830ae4791778d1916146cd0 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 14 Nov 2018 16:25:51 +0800 Subject: [PATCH 1534/2269] SCSI: fix queue cleanup race before queue initialization is done commit 8dc765d438f1e42b3e8227b3b09fad7d73f4ec9a upstream. c2856ae2f315d ("blk-mq: quiesce queue before freeing queue") has already fixed this race, however the implied synchronize_rcu() in blk_mq_quiesce_queue() can slow down LUN probe a lot, so caused performance regression. Then 1311326cf4755c7 ("blk-mq: avoid to synchronize rcu inside blk_cleanup_queue()") tried to quiesce queue for avoiding unnecessary synchronize_rcu() only when queue initialization is done, because it is usual to see lots of inexistent LUNs which need to be probed. However, turns out it isn't safe to quiesce queue only when queue initialization is done. Because when one SCSI command is completed, the user of sending command can be waken up immediately, then the scsi device may be removed, meantime the run queue in scsi_end_request() is still in-progress, so kernel panic can be caused. In Red Hat QE lab, there are several reports about this kind of kernel panic triggered during kernel booting. This patch tries to address the issue by grabing one queue usage counter during freeing one request and the following run queue. Fixes: 1311326cf4755c7 ("blk-mq: avoid to synchronize rcu inside blk_cleanup_queue()") Cc: Andrew Jones Cc: Bart Van Assche Cc: linux-scsi@vger.kernel.org Cc: Martin K. Petersen Cc: Christoph Hellwig Cc: James E.J. Bottomley Cc: stable Cc: jianchao.wang Signed-off-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-core.c | 5 ++--- drivers/scsi/scsi_lib.c | 8 ++++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 6aa2bc4e96529..0b14aebfd1a85 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -671,9 +671,8 @@ void blk_cleanup_queue(struct request_queue *q) * dispatch may still be in-progress since we dispatch requests * from more than one contexts. * - * No need to quiesce queue if it isn't initialized yet since - * blk_freeze_queue() should be enough for cases of passthrough - * request. + * We rely on driver to deal with the race in case that queue + * initialization isn't done. */ if (q->mq_ops && blk_queue_init_done(q)) blk_mq_quiesce_queue(q); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index bfd8f12d4e9ad..7f505c027ce78 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -683,6 +683,12 @@ static bool scsi_end_request(struct request *req, blk_status_t error, */ scsi_mq_uninit_cmd(cmd); + /* + * queue is still alive, so grab the ref for preventing it + * from being cleaned up during running queue. + */ + percpu_ref_get(&q->q_usage_counter); + __blk_mq_end_request(req, error); if (scsi_target(sdev)->single_lun || @@ -690,6 +696,8 @@ static bool scsi_end_request(struct request *req, blk_status_t error, kblockd_schedule_work(&sdev->requeue_work); else blk_mq_run_hw_queues(q, true); + + percpu_ref_put(&q->q_usage_counter); } else { unsigned long flags; -- GitLab From d172f2527081d0bb28a32f16343fd2ad84008c9f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 31 Oct 2018 08:41:34 +0000 Subject: [PATCH 1535/2269] soc: ti: QMSS: Fix usage of irq_set_affinity_hint commit 832ad0e3da4510fd17f98804abe512ea9a747035 upstream. The Keystone QMSS driver is pretty damaged, in the sense that it does things like this: irq_set_affinity_hint(irq, to_cpumask(&cpu_map)); where cpu_map is a local variable. As we leave the function, this will point to nowhere-land, and things will end-up badly. Instead, let's use a proper cpumask that gets allocated, giving the driver a chance to actually work with things like irqbalance as well as have a hypothetical 64bit future. Cc: stable@vger.kernel.org Acked-by: Santosh Shilimkar Signed-off-by: Marc Zyngier Signed-off-by: Olof Johansson Signed-off-by: Greg Kroah-Hartman --- drivers/soc/ti/knav_qmss.h | 4 ++-- drivers/soc/ti/knav_qmss_acc.c | 10 +++++----- drivers/soc/ti/knav_qmss_queue.c | 22 +++++++++++++++------- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/drivers/soc/ti/knav_qmss.h b/drivers/soc/ti/knav_qmss.h index 905b974d1bdc5..4686192e719ea 100644 --- a/drivers/soc/ti/knav_qmss.h +++ b/drivers/soc/ti/knav_qmss.h @@ -321,8 +321,8 @@ struct knav_range_ops { }; struct knav_irq_info { - int irq; - u32 cpu_map; + int irq; + struct cpumask *cpu_mask; }; struct knav_range_info { diff --git a/drivers/soc/ti/knav_qmss_acc.c b/drivers/soc/ti/knav_qmss_acc.c index 3d7225f4e77fe..672aebe1e3783 100644 --- a/drivers/soc/ti/knav_qmss_acc.c +++ b/drivers/soc/ti/knav_qmss_acc.c @@ -205,18 +205,18 @@ static int knav_range_setup_acc_irq(struct knav_range_info *range, { struct knav_device *kdev = range->kdev; struct knav_acc_channel *acc; - unsigned long cpu_map; + struct cpumask *cpu_mask; int ret = 0, irq; u32 old, new; if (range->flags & RANGE_MULTI_QUEUE) { acc = range->acc; irq = range->irqs[0].irq; - cpu_map = range->irqs[0].cpu_map; + cpu_mask = range->irqs[0].cpu_mask; } else { acc = range->acc + queue; irq = range->irqs[queue].irq; - cpu_map = range->irqs[queue].cpu_map; + cpu_mask = range->irqs[queue].cpu_mask; } old = acc->open_mask; @@ -239,8 +239,8 @@ static int knav_range_setup_acc_irq(struct knav_range_info *range, acc->name, acc->name); ret = request_irq(irq, knav_acc_int_handler, 0, acc->name, range); - if (!ret && cpu_map) { - ret = irq_set_affinity_hint(irq, to_cpumask(&cpu_map)); + if (!ret && cpu_mask) { + ret = irq_set_affinity_hint(irq, cpu_mask); if (ret) { dev_warn(range->kdev->dev, "Failed to set IRQ affinity\n"); diff --git a/drivers/soc/ti/knav_qmss_queue.c b/drivers/soc/ti/knav_qmss_queue.c index 39225de9d7f14..9879ca5f8c5f5 100644 --- a/drivers/soc/ti/knav_qmss_queue.c +++ b/drivers/soc/ti/knav_qmss_queue.c @@ -102,19 +102,17 @@ static int knav_queue_setup_irq(struct knav_range_info *range, struct knav_queue_inst *inst) { unsigned queue = inst->id - range->queue_base; - unsigned long cpu_map; int ret = 0, irq; if (range->flags & RANGE_HAS_IRQ) { irq = range->irqs[queue].irq; - cpu_map = range->irqs[queue].cpu_map; ret = request_irq(irq, knav_queue_int_handler, 0, inst->irq_name, inst); if (ret) return ret; disable_irq(irq); - if (cpu_map) { - ret = irq_set_affinity_hint(irq, to_cpumask(&cpu_map)); + if (range->irqs[queue].cpu_mask) { + ret = irq_set_affinity_hint(irq, range->irqs[queue].cpu_mask); if (ret) { dev_warn(range->kdev->dev, "Failed to set IRQ affinity\n"); @@ -1222,9 +1220,19 @@ static int knav_setup_queue_range(struct knav_device *kdev, range->num_irqs++; - if (IS_ENABLED(CONFIG_SMP) && oirq.args_count == 3) - range->irqs[i].cpu_map = - (oirq.args[2] & 0x0000ff00) >> 8; + if (IS_ENABLED(CONFIG_SMP) && oirq.args_count == 3) { + unsigned long mask; + int bit; + + range->irqs[i].cpu_mask = devm_kzalloc(dev, + cpumask_size(), GFP_KERNEL); + if (!range->irqs[i].cpu_mask) + return -ENOMEM; + + mask = (oirq.args[2] & 0x0000ff00) >> 8; + for_each_set_bit(bit, &mask, BITS_PER_LONG) + cpumask_set_cpu(bit, range->irqs[i].cpu_mask); + } } range->num_irqs = min(range->num_irqs, range->num_queues); -- GitLab From ce8bf4cfe4afbd95d7d2db4ffcff9fa6f530f993 Mon Sep 17 00:00:00 2001 From: Changwei Ge Date: Fri, 2 Nov 2018 15:48:15 -0700 Subject: [PATCH 1536/2269] ocfs2: fix a misuse a of brelse after failing ocfs2_check_dir_entry commit 29aa30167a0a2e6045a0d6d2e89d8168132333d5 upstream. Somehow, file system metadata was corrupted, which causes ocfs2_check_dir_entry() to fail in function ocfs2_dir_foreach_blk_el(). According to the original design intention, if above happens we should skip the problematic block and continue to retrieve dir entry. But there is obviouse misuse of brelse around related code. After failure of ocfs2_check_dir_entry(), current code just moves to next position and uses the problematic buffer head again and again during which the problematic buffer head is released for multiple times. I suppose, this a serious issue which is long-lived in ocfs2. This may cause other file systems which is also used in a the same host insane. So we should also consider about bakcporting this patch into linux -stable. Link: http://lkml.kernel.org/r/HK2PR06MB045211675B43EED794E597B6D56E0@HK2PR06MB0452.apcprd06.prod.outlook.com Signed-off-by: Changwei Ge Suggested-by: Changkuo Shi Reviewed-by: Andrew Morton Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/dir.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index febe6312ceff8..3c26f2dfedf1a 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -1896,8 +1896,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, /* On error, skip the f_pos to the next block. */ ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1; - brelse(bh); - continue; + break; } if (le64_to_cpu(de->inode)) { unsigned char d_type = DT_UNKNOWN; -- GitLab From 1d0ad8ac6d1c4d8d513700096e1786e23afcfc26 Mon Sep 17 00:00:00 2001 From: Wengang Wang Date: Fri, 16 Nov 2018 15:08:25 -0800 Subject: [PATCH 1537/2269] ocfs2: free up write context when direct IO failed commit 5040f8df56fb90c7919f1c9b0b6e54c843437456 upstream. The write context should also be freed even when direct IO failed. Otherwise a memory leak is introduced and entries remain in oi->ip_unwritten_list causing the following BUG later in unlink path: ERROR: bug expression: !list_empty(&oi->ip_unwritten_list) ERROR: Clear inode of 215043, inode has unwritten extents ... Call Trace: ? __set_current_blocked+0x42/0x68 ocfs2_evict_inode+0x91/0x6a0 [ocfs2] ? bit_waitqueue+0x40/0x33 evict+0xdb/0x1af iput+0x1a2/0x1f7 do_unlinkat+0x194/0x28f SyS_unlinkat+0x1b/0x2f do_syscall_64+0x79/0x1ae entry_SYSCALL_64_after_hwframe+0x151/0x0 This patch also logs, with frequency limit, direct IO failures. Link: http://lkml.kernel.org/r/20181102170632.25921-1-wen.gang.wang@oracle.com Signed-off-by: Wengang Wang Reviewed-by: Junxiao Bi Reviewed-by: Changwei Ge Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/aops.c | 12 ++++++++++-- fs/ocfs2/cluster/masklog.h | 9 +++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index d1516327b7875..99550f4bd159a 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2404,8 +2404,16 @@ static int ocfs2_dio_end_io(struct kiocb *iocb, /* this io's submitter should not have unlocked this before we could */ BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); - if (bytes > 0 && private) - ret = ocfs2_dio_end_io_write(inode, private, offset, bytes); + if (bytes <= 0) + mlog_ratelimited(ML_ERROR, "Direct IO failed, bytes = %lld", + (long long)bytes); + if (private) { + if (bytes > 0) + ret = ocfs2_dio_end_io_write(inode, private, offset, + bytes); + else + ocfs2_dio_free_write_ctx(inode, private); + } ocfs2_iocb_clear_rw_locked(iocb); diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 308ea0eb35fd1..a396096a5099f 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -178,6 +178,15 @@ do { \ ##__VA_ARGS__); \ } while (0) +#define mlog_ratelimited(mask, fmt, ...) \ +do { \ + static DEFINE_RATELIMIT_STATE(_rs, \ + DEFAULT_RATELIMIT_INTERVAL, \ + DEFAULT_RATELIMIT_BURST); \ + if (__ratelimit(&_rs)) \ + mlog(mask, fmt, ##__VA_ARGS__); \ +} while (0) + #define mlog_errno(st) ({ \ int _st = (st); \ if (_st != -ERESTARTSYS && _st != -EINTR && \ -- GitLab From e6e4f052a20670e324d7cebd68460228f00086e5 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 2 Nov 2018 15:47:59 -0700 Subject: [PATCH 1538/2269] mm: thp: relax __GFP_THISNODE for MADV_HUGEPAGE mappings commit ac5b2c18911ffe95c08d69273917f90212cf5659 upstream. THP allocation might be really disruptive when allocated on NUMA system with the local node full or hard to reclaim. Stefan has posted an allocation stall report on 4.12 based SLES kernel which suggests the same issue: kvm: page allocation stalls for 194572ms, order:9, mode:0x4740ca(__GFP_HIGHMEM|__GFP_IO|__GFP_FS|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE|__GFP_MOVABLE|__GFP_DIRECT_RECLAIM), nodemask=(null) kvm cpuset=/ mems_allowed=0-1 CPU: 10 PID: 84752 Comm: kvm Tainted: G W 4.12.0+98-ph 0000001 SLE15 (unreleased) Hardware name: Supermicro SYS-1029P-WTRT/X11DDW-NT, BIOS 2.0 12/05/2017 Call Trace: dump_stack+0x5c/0x84 warn_alloc+0xe0/0x180 __alloc_pages_slowpath+0x820/0xc90 __alloc_pages_nodemask+0x1cc/0x210 alloc_pages_vma+0x1e5/0x280 do_huge_pmd_wp_page+0x83f/0xf00 __handle_mm_fault+0x93d/0x1060 handle_mm_fault+0xc6/0x1b0 __do_page_fault+0x230/0x430 do_page_fault+0x2a/0x70 page_fault+0x7b/0x80 [...] Mem-Info: active_anon:126315487 inactive_anon:1612476 isolated_anon:5 active_file:60183 inactive_file:245285 isolated_file:0 unevictable:15657 dirty:286 writeback:1 unstable:0 slab_reclaimable:75543 slab_unreclaimable:2509111 mapped:81814 shmem:31764 pagetables:370616 bounce:0 free:32294031 free_pcp:6233 free_cma:0 Node 0 active_anon:254680388kB inactive_anon:1112760kB active_file:240648kB inactive_file:981168kB unevictable:13368kB isolated(anon):0kB isolated(file):0kB mapped:280240kB dirty:1144kB writeback:0kB shmem:95832kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 81225728kB writeback_tmp:0kB unstable:0kB all_unreclaimable? no Node 1 active_anon:250583072kB inactive_anon:5337144kB active_file:84kB inactive_file:0kB unevictable:49260kB isolated(anon):20kB isolated(file):0kB mapped:47016kB dirty:0kB writeback:4kB shmem:31224kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 31897600kB writeback_tmp:0kB unstable:0kB all_unreclaimable? no The defrag mode is "madvise" and from the above report it is clear that the THP has been allocated for MADV_HUGEPAGA vma. Andrea has identified that the main source of the problem is __GFP_THISNODE usage: : The problem is that direct compaction combined with the NUMA : __GFP_THISNODE logic in mempolicy.c is telling reclaim to swap very : hard the local node, instead of failing the allocation if there's no : THP available in the local node. : : Such logic was ok until __GFP_THISNODE was added to the THP allocation : path even with MPOL_DEFAULT. : : The idea behind the __GFP_THISNODE addition, is that it is better to : provide local memory in PAGE_SIZE units than to use remote NUMA THP : backed memory. That largely depends on the remote latency though, on : threadrippers for example the overhead is relatively low in my : experience. : : The combination of __GFP_THISNODE and __GFP_DIRECT_RECLAIM results in : extremely slow qemu startup with vfio, if the VM is larger than the : size of one host NUMA node. This is because it will try very hard to : unsuccessfully swapout get_user_pages pinned pages as result of the : __GFP_THISNODE being set, instead of falling back to PAGE_SIZE : allocations and instead of trying to allocate THP on other nodes (it : would be even worse without vfio type1 GUP pins of course, except it'd : be swapping heavily instead). Fix this by removing __GFP_THISNODE for THP requests which are requesting the direct reclaim. This effectivelly reverts 5265047ac301 on the grounds that the zone/node reclaim was known to be disruptive due to premature reclaim when there was memory free. While it made sense at the time for HPC workloads without NUMA awareness on rare machines, it was ultimately harmful in the majority of cases. The existing behaviour is similar, if not as widespare as it applies to a corner case but crucially, it cannot be tuned around like zone_reclaim_mode can. The default behaviour should always be to cause the least harm for the common case. If there are specialised use cases out there that want zone_reclaim_mode in specific cases, then it can be built on top. Longterm we should consider a memory policy which allows for the node reclaim like behavior for the specific memory ranges which would allow a [1] http://lkml.kernel.org/r/20180820032204.9591-1-aarcange@redhat.com Mel said: : Both patches look correct to me but I'm responding to this one because : it's the fix. The change makes sense and moves further away from the : severe stalling behaviour we used to see with both THP and zone reclaim : mode. : : I put together a basic experiment with usemem configured to reference a : buffer multiple times that is 80% the size of main memory on a 2-socket : box with symmetric node sizes and defrag set to "always". The defrag : setting is not the default but it would be functionally similar to : accessing a buffer with madvise(MADV_HUGEPAGE). Usemem is configured to : reference the buffer multiple times and while it's not an interesting : workload, it would be expected to complete reasonably quickly as it fits : within memory. The results were; : : usemem : vanilla noreclaim-v1 : Amean Elapsd-1 42.78 ( 0.00%) 26.87 ( 37.18%) : Amean Elapsd-3 27.55 ( 0.00%) 7.44 ( 73.00%) : Amean Elapsd-4 5.72 ( 0.00%) 5.69 ( 0.45%) : : This shows the elapsed time in seconds for 1 thread, 3 threads and 4 : threads referencing buffers 80% the size of memory. With the patches : applied, it's 37.18% faster for the single thread and 73% faster with two : threads. Note that 4 threads showing little difference does not indicate : the problem is related to thread counts. It's simply the case that 4 : threads gets spread so their workload mostly fits in one node. : : The overall view from /proc/vmstats is more startling : : 4.19.0-rc1 4.19.0-rc1 : vanillanoreclaim-v1r1 : Minor Faults 35593425 708164 : Major Faults 484088 36 : Swap Ins 3772837 0 : Swap Outs 3932295 0 : : Massive amounts of swap in/out without the patch : : Direct pages scanned 6013214 0 : Kswapd pages scanned 0 0 : Kswapd pages reclaimed 0 0 : Direct pages reclaimed 4033009 0 : : Lots of reclaim activity without the patch : : Kswapd efficiency 100% 100% : Kswapd velocity 0.000 0.000 : Direct efficiency 67% 100% : Direct velocity 11191.956 0.000 : : Mostly from direct reclaim context as you'd expect without the patch. : : Page writes by reclaim 3932314.000 0.000 : Page writes file 19 0 : Page writes anon 3932295 0 : Page reclaim immediate 42336 0 : : Writes from reclaim context is never good but the patch eliminates it. : : We should never have default behaviour to thrash the system for such a : basic workload. If zone reclaim mode behaviour is ever desired but on a : single task instead of a global basis then the sensible option is to build : a mempolicy that enforces that behaviour. This was a severe regression compared to previous kernels that made important workloads unusable and it starts when __GFP_THISNODE was added to THP allocations under MADV_HUGEPAGE. It is not a significant risk to go to the previous behavior before __GFP_THISNODE was added, it worked like that for years. This was simply an optimization to some lucky workloads that can fit in a single node, but it ended up breaking the VM for others that can't possibly fit in a single node, so going back is safe. [mhocko@suse.com: rewrote the changelog based on the one from Andrea] Link: http://lkml.kernel.org/r/20180925120326.24392-2-mhocko@kernel.org Fixes: 5265047ac301 ("mm, thp: really limit transparent hugepage allocation to local node") Signed-off-by: Andrea Arcangeli Signed-off-by: Michal Hocko Reported-by: Stefan Priebe Debugged-by: Andrea Arcangeli Reported-by: Alex Williamson Reviewed-by: Mel Gorman Tested-by: Mel Gorman Cc: Zi Yan Cc: Vlastimil Babka Cc: David Rientjes Cc: "Kirill A. Shutemov" Cc: [4.1+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mempolicy.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index ecbda7f5d4949..1b93535d875f3 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2012,8 +2012,36 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, nmask = policy_nodemask(gfp, pol); if (!nmask || node_isset(hpage_node, *nmask)) { mpol_cond_put(pol); - page = __alloc_pages_node(hpage_node, - gfp | __GFP_THISNODE, order); + /* + * We cannot invoke reclaim if __GFP_THISNODE + * is set. Invoking reclaim with + * __GFP_THISNODE set, would cause THP + * allocations to trigger heavy swapping + * despite there may be tons of free memory + * (including potentially plenty of THP + * already available in the buddy) on all the + * other NUMA nodes. + * + * At most we could invoke compaction when + * __GFP_THISNODE is set (but we would need to + * refrain from invoking reclaim even if + * compaction returned COMPACT_SKIPPED because + * there wasn't not enough memory to succeed + * compaction). For now just avoid + * __GFP_THISNODE instead of limiting the + * allocation path to a strict and single + * compaction invocation. + * + * Supposedly if direct reclaim was enabled by + * the caller, the app prefers THP regardless + * of the node it comes from so this would be + * more desiderable behavior than only + * providing THP originated from the local + * node in such case. + */ + if (!(gfp & __GFP_DIRECT_RECLAIM)) + gfp |= __GFP_THISNODE; + page = __alloc_pages_node(hpage_node, gfp, order); goto out; } } -- GitLab From 54ab59528026cc6140be55fd918eedbdf2ee9677 Mon Sep 17 00:00:00 2001 From: Vasily Khoruzhick Date: Thu, 25 Oct 2018 12:15:43 -0700 Subject: [PATCH 1539/2269] netfilter: conntrack: fix calculation of next bucket number in early_drop commit f393808dc64149ccd0e5a8427505ba2974a59854 upstream. If there's no entry to drop in bucket that corresponds to the hash, early_drop() should look for it in other buckets. But since it increments hash instead of bucket number, it actually looks in the same bucket 8 times: hsize is 16k by default (14 bits) and hash is 32-bit value, so reciprocal_scale(hash, hsize) returns the same value for hash..hash+7 in most cases. Fix it by increasing bucket number instead of hash and rename _hash to bucket to avoid future confusion. Fixes: 3e86638e9a0b ("netfilter: conntrack: consider ct netns in early_drop logic") Cc: # v4.7+ Signed-off-by: Vasily Khoruzhick Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_conntrack_core.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index a268acc48af05..b793b55d14885 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -932,19 +932,22 @@ static unsigned int early_drop_list(struct net *net, return drops; } -static noinline int early_drop(struct net *net, unsigned int _hash) +static noinline int early_drop(struct net *net, unsigned int hash) { - unsigned int i; + unsigned int i, bucket; for (i = 0; i < NF_CT_EVICTION_RANGE; i++) { struct hlist_nulls_head *ct_hash; - unsigned int hash, hsize, drops; + unsigned int hsize, drops; rcu_read_lock(); nf_conntrack_get_ht(&ct_hash, &hsize); - hash = reciprocal_scale(_hash++, hsize); + if (!i) + bucket = reciprocal_scale(hash, hsize); + else + bucket = (bucket + 1) % hsize; - drops = early_drop_list(net, &ct_hash[hash]); + drops = early_drop_list(net, &ct_hash[bucket]); rcu_read_unlock(); if (drops) { -- GitLab From 2d155427ffb2057fd7ece3e735b204b393fd62cf Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 5 Nov 2018 14:54:56 +0100 Subject: [PATCH 1540/2269] ARM: 8809/1: proc-v7: fix Thumb annotation of cpu_v7_hvc_switch_mm commit 6282e916f774e37845c65d1eae9f8c649004f033 upstream. Due to what appears to be a copy/paste error, the opening ENTRY() of cpu_v7_hvc_switch_mm() lacks a matching ENDPROC(), and instead, the one for cpu_v7_smc_switch_mm() is duplicated. Given that it is ENDPROC() that emits the Thumb annotation, the cpu_v7_hvc_switch_mm() routine will be called in ARM mode on a Thumb2 kernel, resulting in the following splat: Internal error: Oops - undefined instruction: 0 [#1] SMP THUMB2 Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.18.0-rc1-00030-g4d28ad89189d-dirty #488 Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 PC is at cpu_v7_hvc_switch_mm+0x12/0x18 LR is at flush_old_exec+0x31b/0x570 pc : [] lr : [] psr: 00000013 sp : ee899e50 ip : 00000000 fp : 00000001 r10: eda28f34 r9 : eda31800 r8 : c12470e0 r7 : eda1fc00 r6 : eda53000 r5 : 00000000 r4 : ee88c000 r3 : c0316eec r2 : 00000001 r1 : eda53000 r0 : 6da6c000 Flags: nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Note the 'ISA ARM' in the last line. Fix this by using the correct name in ENDPROC(). Cc: Fixes: 10115105cb3a ("ARM: spectre-v2: add firmware based hardening") Reviewed-by: Dave Martin Acked-by: Marc Zyngier Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/proc-v7.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 12468d9378d88..d8d90cf65b39f 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -112,7 +112,7 @@ ENTRY(cpu_v7_hvc_switch_mm) hvc #0 ldmfd sp!, {r0 - r3} b cpu_v7_switch_mm -ENDPROC(cpu_v7_smc_switch_mm) +ENDPROC(cpu_v7_hvc_switch_mm) #endif ENTRY(cpu_v7_iciallu_switch_mm) mov r3, #0 -- GitLab From fbd703540e22c5f8820a0bffbee4801fb4067e6b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 11 Oct 2018 13:06:16 +0200 Subject: [PATCH 1541/2269] mtd: docg3: don't set conflicting BCH_CONST_PARAMS option commit be2e1c9dcf76886a83fb1c433a316e26d4ca2550 upstream. I noticed during the creation of another bugfix that the BCH_CONST_PARAMS option that is set by DOCG3 breaks setting variable parameters for any other users of the BCH library code. The only other user we have today is the MTD_NAND software BCH implementation (most flash controllers use hardware BCH these days and are not affected). I considered removing BCH_CONST_PARAMS entirely because of the inherent conflict, but according to the description in lib/bch.c there is a significant performance benefit in keeping it. To avoid the immediate problem of the conflict between MTD_NAND_BCH and DOCG3, this only sets the constant parameters if MTD_NAND_BCH is disabled, which should fix the problem for all cases that are affected. This should also work for all stable kernels. Note that there is only one machine that actually seems to use the DOCG3 driver (arch/arm/mach-pxa/mioa701.c), so most users should have the driver disabled, but it almost certainly shows up if we wanted to test random kernels on machines that use software BCH in MTD. Fixes: d13d19ece39f ("mtd: docg3: add ECC correction code") Cc: stable@vger.kernel.org Cc: Robert Jarzmik Signed-off-by: Arnd Bergmann Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/devices/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig index 6def5445e03e1..64731e3221ef7 100644 --- a/drivers/mtd/devices/Kconfig +++ b/drivers/mtd/devices/Kconfig @@ -206,7 +206,7 @@ comment "Disk-On-Chip Device Drivers" config MTD_DOCG3 tristate "M-Systems Disk-On-Chip G3" select BCH - select BCH_CONST_PARAMS + select BCH_CONST_PARAMS if !MTD_NAND_BCH select BITREVERSE ---help--- This provides an MTD device driver for the M-Systems DiskOnChip -- GitLab From 9c6f231e8ad5947b757c66bf0a8f5c10ec83ccc2 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 8 Nov 2018 18:17:03 +0800 Subject: [PATCH 1542/2269] of, numa: Validate some distance map rules commit 89c38422e072bb453e3045b8f1b962a344c3edea upstream. Currently the NUMA distance map parsing does not validate the distance table for the distance-matrix rules 1-2 in [1]. However the arch NUMA code may enforce some of these rules, but not all. Such is the case for the arm64 port, which does not enforce the rule that the distance between separates nodes cannot equal LOCAL_DISTANCE. The patch adds the following rules validation: - distance of node to self equals LOCAL_DISTANCE - distance of separate nodes > LOCAL_DISTANCE This change avoids a yet-unresolved crash reported in [2]. A note on dealing with symmetrical distances between nodes: Validating symmetrical distances between nodes is difficult. If it were mandated in the bindings that every distance must be recorded in the table, then it would be easy. However, it isn't. In addition to this, it is also possible to record [b, a] distance only (and not [a, b]). So, when processing the table for [b, a], we cannot assert that current distance of [a, b] != [b, a] as invalid, as [a, b] distance may not be present in the table and current distance would be default at REMOTE_DISTANCE. As such, we maintain the policy that we overwrite distance [a, b] = [b, a] for b > a. This policy is different to kernel ACPI SLIT validation, which allows non-symmetrical distances (ACPI spec SLIT rules allow it). However, the distance debug message is dropped as it may be misleading (for a distance which is later overwritten). Some final notes on semantics: - It is implied that it is the responsibility of the arch NUMA code to reset the NUMA distance map for an error in distance map parsing. - It is the responsibility of the FW NUMA topology parsing (whether OF or ACPI) to enforce NUMA distance rules, and not arch NUMA code. [1] Documents/devicetree/bindings/numa.txt [2] https://www.spinics.net/lists/arm-kernel/msg683304.html Cc: stable@vger.kernel.org # 4.7 Signed-off-by: John Garry Acked-by: Will Deacon Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/of/of_numa.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c index 2db1f7a04baf0..0b29ee9ee8c34 100644 --- a/drivers/of/of_numa.c +++ b/drivers/of/of_numa.c @@ -126,9 +126,14 @@ static int __init of_numa_parse_distance_map_v1(struct device_node *map) distance = of_read_number(matrix, 1); matrix++; + if ((nodea == nodeb && distance != LOCAL_DISTANCE) || + (nodea != nodeb && distance <= LOCAL_DISTANCE)) { + pr_err("Invalid distance[node%d -> node%d] = %d\n", + nodea, nodeb, distance); + return -EINVAL; + } + numa_set_distance(nodea, nodeb, distance); - pr_debug("distance[node%d -> node%d] = %d\n", - nodea, nodeb, distance); /* Set default distance of node B->A same as A->B */ if (nodeb > nodea) -- GitLab From c46d34365a1983225abee5fbe283c2852c17ac8e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 9 Nov 2018 15:22:07 -0500 Subject: [PATCH 1543/2269] x86/cpu/vmware: Do not trace vmware_sched_clock() commit 15035388439f892017d38b05214d3cda6578af64 upstream. When running function tracing on a Linux guest running on VMware Workstation, the guest would crash. This is due to tracing of the sched_clock internal call of the VMware vmware_sched_clock(), which causes an infinite recursion within the tracing code (clock calls must not be traced). Make vmware_sched_clock() not traced by ftrace. Fixes: 80e9a4f21fd7c ("x86/vmware: Add paravirt sched clock") Reported-by: GwanYeong Kim Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Borislav Petkov CC: Alok Kataria CC: GwanYeong Kim CC: "H. Peter Anvin" CC: Ingo Molnar Cc: stable@vger.kernel.org CC: Thomas Gleixner CC: virtualization@lists.linux-foundation.org CC: x86-ml Link: http://lkml.kernel.org/r/20181109152207.4d3e7d70@gandalf.local.home Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/vmware.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 8e005329648b6..d805202c63cdc 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -77,7 +77,7 @@ static __init int setup_vmw_sched_clock(char *s) } early_param("no-vmw-sched-clock", setup_vmw_sched_clock); -static unsigned long long vmware_sched_clock(void) +static unsigned long long notrace vmware_sched_clock(void) { unsigned long long ns; -- GitLab From de51aafd80bedac4ce10072510203d67eb6ea3b4 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 4 Nov 2018 03:48:57 +0000 Subject: [PATCH 1544/2269] x86/hyper-v: Enable PIT shutdown quirk commit 1de72c706488b7be664a601cf3843bd01e327e58 upstream. Hyper-V emulation of the PIT has a quirk such that the normal PIT shutdown path doesn't work, because clearing the counter register restarts the timer. Disable the counter clearing on PIT shutdown. Signed-off-by: Michael Kelley Signed-off-by: Thomas Gleixner Cc: "gregkh@linuxfoundation.org" Cc: "devel@linuxdriverproject.org" Cc: "daniel.lezcano@linaro.org" Cc: "virtualization@lists.linux-foundation.org" Cc: "jgross@suse.com" Cc: "akataria@vmware.com" Cc: "olaf@aepfle.de" Cc: "apw@canonical.com" Cc: vkuznets Cc: "jasowang@redhat.com" Cc: "marcelo.cerri@canonical.com" Cc: KY Srinivasan Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1541303219-11142-3-git-send-email-mikelley@microsoft.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mshyperv.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 85eb5fc180c81..c0201b11e9e2a 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -243,6 +244,16 @@ static void __init ms_hyperv_init_platform(void) if (efi_enabled(EFI_BOOT)) x86_platform.get_nmi_reason = hv_get_nmi_reason; + /* + * Hyper-V VMs have a PIT emulation quirk such that zeroing the + * counter register during PIT shutdown restarts the PIT. So it + * continues to interrupt @18.2 HZ. Setting i8253_clear_counter + * to false tells pit_shutdown() not to zero the counter so that + * the PIT really is shutdown. Generation 2 VMs don't have a PIT, + * and setting this value has no effect. + */ + i8253_clear_counter_on_shutdown = false; + #if IS_ENABLED(CONFIG_HYPERV) /* * Setup the hook to get control post apic initialization. -- GitLab From d85114a0a131e839070728f2d7bc253a9bfa8eb8 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 22 Oct 2018 09:19:04 -0700 Subject: [PATCH 1545/2269] termios, tty/tty_baudrate.c: fix buffer overrun commit 991a25194097006ec1e0d2e0814ff920e59e3465 upstream. On architectures with CBAUDEX == 0 (Alpha and PowerPC), the code in tty_baudrate.c does not do any limit checking on the tty_baudrate[] array, and in fact a buffer overrun is possible on both architectures. Add a limit check to prevent that situation. This will be followed by a much bigger cleanup/simplification patch. Signed-off-by: H. Peter Anvin (Intel) Requested-by: Cc: Johan Hovold Cc: Jiri Slaby Cc: Al Viro Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Cc: Eugene Syromiatnikov Cc: Alan Cox Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_baudrate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/tty_baudrate.c b/drivers/tty/tty_baudrate.c index ebc797fc1afd3..42e5683147d5f 100644 --- a/drivers/tty/tty_baudrate.c +++ b/drivers/tty/tty_baudrate.c @@ -76,7 +76,7 @@ speed_t tty_termios_baud_rate(struct ktermios *termios) else cbaud += 15; } - return baud_table[cbaud]; + return cbaud >= n_baud_table ? 0 : baud_table[cbaud]; } EXPORT_SYMBOL(tty_termios_baud_rate); @@ -112,7 +112,7 @@ speed_t tty_termios_input_baud_rate(struct ktermios *termios) else cbaud += 15; } - return baud_table[cbaud]; + return cbaud >= n_baud_table ? 0 : baud_table[cbaud]; #else return tty_termios_baud_rate(termios); #endif -- GitLab From 4fa1a679ed0140e84e8d4a25d0c3edd8b02b9976 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Mon, 22 Oct 2018 09:19:05 -0700 Subject: [PATCH 1546/2269] arch/alpha, termios: implement BOTHER, IBSHIFT and termios2 commit d0ffb805b729322626639336986bc83fc2e60871 upstream. Alpha has had c_ispeed and c_ospeed, but still set speeds in c_cflags using arbitrary flags. Because BOTHER is not defined, the general Linux code doesn't allow setting arbitrary baud rates, and because CBAUDEX == 0, we can have an array overrun of the baud_rate[] table in drivers/tty/tty_baudrate.c if (c_cflags & CBAUD) == 037. Resolve both problems by #defining BOTHER to 037 on Alpha. However, userspace still needs to know if setting BOTHER is actually safe given legacy kernels (does anyone actually care about that on Alpha anymore?), so enable the TCGETS2/TCSETS*2 ioctls on Alpha, even though they use the same structure. Define struct termios2 just for compatibility; it is the exact same structure as struct termios. In a future patchset, this will be cleaned up so the uapi headers are usable from libc. Signed-off-by: H. Peter Anvin (Intel) Cc: Jiri Slaby Cc: Al Viro Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Cc: Eugene Syromiatnikov Cc: Cc: Cc: Johan Hovold Cc: Alan Cox Cc: Signed-off-by: Greg Kroah-Hartman --- arch/alpha/include/asm/termios.h | 8 +++++++- arch/alpha/include/uapi/asm/ioctls.h | 5 +++++ arch/alpha/include/uapi/asm/termbits.h | 17 +++++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/arch/alpha/include/asm/termios.h b/arch/alpha/include/asm/termios.h index 6a8c53dec57e6..b7c77bb1bfd20 100644 --- a/arch/alpha/include/asm/termios.h +++ b/arch/alpha/include/asm/termios.h @@ -73,9 +73,15 @@ }) #define user_termios_to_kernel_termios(k, u) \ - copy_from_user(k, u, sizeof(struct termios)) + copy_from_user(k, u, sizeof(struct termios2)) #define kernel_termios_to_user_termios(u, k) \ + copy_to_user(u, k, sizeof(struct termios2)) + +#define user_termios_to_kernel_termios_1(k, u) \ + copy_from_user(k, u, sizeof(struct termios)) + +#define kernel_termios_to_user_termios_1(u, k) \ copy_to_user(u, k, sizeof(struct termios)) #endif /* _ALPHA_TERMIOS_H */ diff --git a/arch/alpha/include/uapi/asm/ioctls.h b/arch/alpha/include/uapi/asm/ioctls.h index 3729d92d3fa85..dc8c20ac7191f 100644 --- a/arch/alpha/include/uapi/asm/ioctls.h +++ b/arch/alpha/include/uapi/asm/ioctls.h @@ -32,6 +32,11 @@ #define TCXONC _IO('t', 30) #define TCFLSH _IO('t', 31) +#define TCGETS2 _IOR('T', 42, struct termios2) +#define TCSETS2 _IOW('T', 43, struct termios2) +#define TCSETSW2 _IOW('T', 44, struct termios2) +#define TCSETSF2 _IOW('T', 45, struct termios2) + #define TIOCSWINSZ _IOW('t', 103, struct winsize) #define TIOCGWINSZ _IOR('t', 104, struct winsize) #define TIOCSTART _IO('t', 110) /* start output, like ^Q */ diff --git a/arch/alpha/include/uapi/asm/termbits.h b/arch/alpha/include/uapi/asm/termbits.h index 05e0398a83a6e..53b3ecff2f502 100644 --- a/arch/alpha/include/uapi/asm/termbits.h +++ b/arch/alpha/include/uapi/asm/termbits.h @@ -26,6 +26,19 @@ struct termios { speed_t c_ospeed; /* output speed */ }; +/* Alpha has identical termios and termios2 */ + +struct termios2 { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_cc[NCCS]; /* control characters */ + cc_t c_line; /* line discipline (== c_cc[19]) */ + speed_t c_ispeed; /* input speed */ + speed_t c_ospeed; /* output speed */ +}; + /* Alpha has matching termios and ktermios */ struct ktermios { @@ -148,6 +161,7 @@ struct ktermios { #define B3000000 00034 #define B3500000 00035 #define B4000000 00036 +#define BOTHER 00037 #define CSIZE 00001400 #define CS5 00000000 @@ -165,6 +179,9 @@ struct ktermios { #define CMSPAR 010000000000 /* mark or space (stick) parity */ #define CRTSCTS 020000000000 /* flow control */ +#define CIBAUD 07600000 +#define IBSHIFT 16 + /* c_lflag bits */ #define ISIG 0x00000080 #define ICANON 0x00000100 -- GitLab From ca35750b5fb7bd3c4233a77b0b2de6f6a9c6cbdc Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Wed, 6 Jun 2018 21:42:32 +0200 Subject: [PATCH 1547/2269] watchdog/core: Add missing prototypes for weak functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 81bd415c91eb966118d773dddf254aebf3022411 upstream. The split out of the hard lockup detector exposed two new weak functions, but no prototypes for them, which triggers the build warning: kernel/watchdog.c:109:12: warning: no previous prototype for ‘watchdog_nmi_enable’ [-Wmissing-prototypes] kernel/watchdog.c:115:13: warning: no previous prototype for ‘watchdog_nmi_disable’ [-Wmissing-prototypes] Add the prototypes. Fixes: 73ce0511c436 ("kernel/watchdog.c: move hardlockup detector to separate file") Signed-off-by: Mathieu Malaterre Signed-off-by: Thomas Gleixner Cc: Babu Moger Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180606194232.17653-1-malat@debian.org Signed-off-by: Greg Kroah-Hartman --- include/linux/nmi.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index b8d868d23e797..50d1439953385 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -113,6 +113,8 @@ static inline int hardlockup_detector_perf_init(void) { return 0; } void watchdog_nmi_stop(void); void watchdog_nmi_start(void); int watchdog_nmi_probe(void); +int watchdog_nmi_enable(unsigned int cpu); +void watchdog_nmi_disable(unsigned int cpu); /** * touch_nmi_watchdog - restart NMI watchdog timeout. -- GitLab From 84e7fe7bd079dcc81622e56912f002287cda8b32 Mon Sep 17 00:00:00 2001 From: Lu Fengqi Date: Wed, 24 Oct 2018 20:24:03 +0800 Subject: [PATCH 1548/2269] btrfs: fix pinned underflow after transaction aborted commit fcd5e74288f7d36991b1f0fb96b8c57079645e38 upstream. When running generic/475, we may get the following warning in dmesg: [ 6902.102154] WARNING: CPU: 3 PID: 18013 at fs/btrfs/extent-tree.c:9776 btrfs_free_block_groups+0x2af/0x3b0 [btrfs] [ 6902.109160] CPU: 3 PID: 18013 Comm: umount Tainted: G W O 4.19.0-rc8+ #8 [ 6902.110971] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 [ 6902.112857] RIP: 0010:btrfs_free_block_groups+0x2af/0x3b0 [btrfs] [ 6902.118921] RSP: 0018:ffffc9000459bdb0 EFLAGS: 00010286 [ 6902.120315] RAX: ffff880175050bb0 RBX: ffff8801124a8000 RCX: 0000000000170007 [ 6902.121969] RDX: 0000000000000002 RSI: 0000000000170007 RDI: ffffffff8125fb74 [ 6902.123716] RBP: ffff880175055d10 R08: 0000000000000000 R09: 0000000000000000 [ 6902.125417] R10: 0000000000000000 R11: 0000000000000000 R12: ffff880175055d88 [ 6902.127129] R13: ffff880175050bb0 R14: 0000000000000000 R15: dead000000000100 [ 6902.129060] FS: 00007f4507223780(0000) GS:ffff88017ba00000(0000) knlGS:0000000000000000 [ 6902.130996] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 6902.132558] CR2: 00005623599cac78 CR3: 000000014b700001 CR4: 00000000003606e0 [ 6902.134270] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 6902.135981] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 6902.137836] Call Trace: [ 6902.138939] close_ctree+0x171/0x330 [btrfs] [ 6902.140181] ? kthread_stop+0x146/0x1f0 [ 6902.141277] generic_shutdown_super+0x6c/0x100 [ 6902.142517] kill_anon_super+0x14/0x30 [ 6902.143554] btrfs_kill_super+0x13/0x100 [btrfs] [ 6902.144790] deactivate_locked_super+0x2f/0x70 [ 6902.146014] cleanup_mnt+0x3b/0x70 [ 6902.147020] task_work_run+0x9e/0xd0 [ 6902.148036] do_syscall_64+0x470/0x600 [ 6902.149142] ? trace_hardirqs_off_thunk+0x1a/0x1c [ 6902.150375] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 6902.151640] RIP: 0033:0x7f45077a6a7b [ 6902.157324] RSP: 002b:00007ffd589f3e68 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [ 6902.159187] RAX: 0000000000000000 RBX: 000055e8eec732b0 RCX: 00007f45077a6a7b [ 6902.160834] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 000055e8eec73490 [ 6902.162526] RBP: 0000000000000000 R08: 000055e8eec734b0 R09: 00007ffd589f26c0 [ 6902.164141] R10: 0000000000000000 R11: 0000000000000246 R12: 000055e8eec73490 [ 6902.165815] R13: 00007f4507ac61a4 R14: 0000000000000000 R15: 00007ffd589f40d8 [ 6902.167553] irq event stamp: 0 [ 6902.168998] hardirqs last enabled at (0): [<0000000000000000>] (null) [ 6902.170731] hardirqs last disabled at (0): [] copy_process.part.55+0x3b0/0x1f00 [ 6902.172773] softirqs last enabled at (0): [] copy_process.part.55+0x3b0/0x1f00 [ 6902.174671] softirqs last disabled at (0): [<0000000000000000>] (null) [ 6902.176407] ---[ end trace 463138c2986b275c ]--- [ 6902.177636] BTRFS info (device dm-3): space_info 4 has 273465344 free, is not full [ 6902.179453] BTRFS info (device dm-3): space_info total=276824064, used=4685824, pinned=18446744073708158976, reserved=0, may_use=0, readonly=65536 In the above line there's "pinned=18446744073708158976" which is an unsigned u64 value of -1392640, an obvious underflow. When transaction_kthread is running cleanup_transaction(), another fsstress is running btrfs_commit_transaction(). The btrfs_finish_extent_commit() may get the same range as btrfs_destroy_pinned_extent() got, which causes the pinned underflow. Fixes: d4b450cd4b33 ("Btrfs: fix race between transaction commit and empty block group removal") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Lu Fengqi Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5cf1bbe9754cf..0e67cee73c532 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4428,13 +4428,23 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info, unpin = pinned_extents; again: while (1) { + /* + * The btrfs_finish_extent_commit() may get the same range as + * ours between find_first_extent_bit and clear_extent_dirty. + * Hence, hold the unused_bg_unpin_mutex to avoid double unpin + * the same extent range. + */ + mutex_lock(&fs_info->unused_bg_unpin_mutex); ret = find_first_extent_bit(unpin, 0, &start, &end, EXTENT_DIRTY, NULL); - if (ret) + if (ret) { + mutex_unlock(&fs_info->unused_bg_unpin_mutex); break; + } clear_extent_dirty(unpin, start, end); btrfs_error_unpin_extent_range(fs_info, start, end); + mutex_unlock(&fs_info->unused_bg_unpin_mutex); cond_resched(); } -- GitLab From 6dcd34f106ec113db47dddf9a70f0af7d81af468 Mon Sep 17 00:00:00 2001 From: Robbie Ko Date: Tue, 30 Oct 2018 18:04:04 +0800 Subject: [PATCH 1549/2269] Btrfs: fix cur_offset in the error case for nocow commit 506481b20e818db40b6198815904ecd2d6daee64 upstream. When the cow_file_range fails, the related resources are unlocked according to the range [start..end), so the unlock cannot be repeated in run_delalloc_nocow. In some cases (e.g. cur_offset <= end && cow_start != -1), cur_offset is not updated correctly, so move the cur_offset update before cow_file_range. kernel BUG at mm/page-writeback.c:2663! Internal error: Oops - BUG: 0 [#1] SMP CPU: 3 PID: 31525 Comm: kworker/u8:7 Tainted: P O Hardware name: Realtek_RTD1296 (DT) Workqueue: writeback wb_workfn (flush-btrfs-1) task: ffffffc076db3380 ti: ffffffc02e9ac000 task.ti: ffffffc02e9ac000 PC is at clear_page_dirty_for_io+0x1bc/0x1e8 LR is at clear_page_dirty_for_io+0x14/0x1e8 pc : [] lr : [] pstate: 40000145 sp : ffffffc02e9af4f0 Process kworker/u8:7 (pid: 31525, stack limit = 0xffffffc02e9ac020) Call trace: [] clear_page_dirty_for_io+0x1bc/0x1e8 [] extent_clear_unlock_delalloc+0x1e4/0x210 [btrfs] [] run_delalloc_nocow+0x3b8/0x948 [btrfs] [] run_delalloc_range+0x250/0x3a8 [btrfs] [] writepage_delalloc.isra.21+0xbc/0x1d8 [btrfs] [] __extent_writepage+0xe8/0x248 [btrfs] [] extent_write_cache_pages.isra.17+0x164/0x378 [btrfs] [] extent_writepages+0x48/0x68 [btrfs] [] btrfs_writepages+0x20/0x30 [btrfs] [] do_writepages+0x30/0x88 [] __writeback_single_inode+0x34/0x198 [] writeback_sb_inodes+0x184/0x3c0 [] __writeback_inodes_wb+0x6c/0xc0 [] wb_writeback+0x1b8/0x1c0 [] wb_workfn+0x150/0x250 [] process_one_work+0x1dc/0x388 [] worker_thread+0x130/0x500 [] kthread+0x10c/0x110 [] ret_from_fork+0x10/0x40 Code: d503201f a9025bb5 a90363b7 f90023b9 (d4210000) CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Filipe Manana Signed-off-by: Robbie Ko Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 90568a21fa779..1c340d6c85680 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1565,12 +1565,11 @@ out_check: } btrfs_release_path(path); - if (cur_offset <= end && cow_start == (u64)-1) { + if (cur_offset <= end && cow_start == (u64)-1) cow_start = cur_offset; - cur_offset = end; - } if (cow_start != (u64)-1) { + cur_offset = end; ret = cow_file_range(inode, locked_page, cow_start, end, end, page_started, nr_written, 1, NULL); if (ret) -- GitLab From c7a082fb631a3282667b60ad36bfbeea45322cdd Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 5 Nov 2018 11:14:05 +0000 Subject: [PATCH 1550/2269] Btrfs: fix infinite loop on inode eviction after deduplication of eof block commit 11023d3f5fdf89bba5e1142127701ca6e6014587 upstream. If we attempt to deduplicate the last block of a file A into the middle of a file B, and file A's size is not a multiple of the block size, we end rounding the deduplication length to 0 bytes, to avoid the data corruption issue fixed by commit de02b9f6bb65 ("Btrfs: fix data corruption when deduplicating between different files"). However a length of zero will cause the insertion of an extent state with a start value greater (by 1) then the end value, leading to a corrupt extent state that will trigger a warning and cause chaos such as an infinite loop during inode eviction. Example trace: [96049.833585] ------------[ cut here ]------------ [96049.833714] WARNING: CPU: 0 PID: 24448 at fs/btrfs/extent_io.c:436 insert_state+0x101/0x120 [btrfs] [96049.833767] CPU: 0 PID: 24448 Comm: xfs_io Not tainted 4.19.0-rc7-btrfs-next-39 #1 [96049.833768] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.2-0-gf9626ccb91-prebuilt.qemu-project.org 04/01/2014 [96049.833780] RIP: 0010:insert_state+0x101/0x120 [btrfs] [96049.833783] RSP: 0018:ffffafd2c3707af0 EFLAGS: 00010282 [96049.833785] RAX: 0000000000000000 RBX: 000000000004dfff RCX: 0000000000000006 [96049.833786] RDX: 0000000000000007 RSI: ffff99045c143230 RDI: ffff99047b2168a0 [96049.833787] RBP: ffff990457851cd0 R08: 0000000000000001 R09: 0000000000000000 [96049.833787] R10: ffffafd2c3707ab8 R11: 0000000000000000 R12: ffff9903b93b12c8 [96049.833788] R13: 000000000004e000 R14: ffffafd2c3707b80 R15: ffffafd2c3707b78 [96049.833790] FS: 00007f5c14e7d700(0000) GS:ffff99047b200000(0000) knlGS:0000000000000000 [96049.833791] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [96049.833792] CR2: 00007f5c146abff8 CR3: 0000000115f4c004 CR4: 00000000003606f0 [96049.833795] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [96049.833796] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [96049.833796] Call Trace: [96049.833809] __set_extent_bit+0x46c/0x6a0 [btrfs] [96049.833823] lock_extent_bits+0x6b/0x210 [btrfs] [96049.833831] ? _raw_spin_unlock+0x24/0x30 [96049.833841] ? test_range_bit+0xdf/0x130 [btrfs] [96049.833853] lock_extent_range+0x8e/0x150 [btrfs] [96049.833864] btrfs_double_extent_lock+0x78/0xb0 [btrfs] [96049.833875] btrfs_extent_same_range+0x14e/0x550 [btrfs] [96049.833885] ? rcu_read_lock_sched_held+0x3f/0x70 [96049.833890] ? __kmalloc_node+0x2b0/0x2f0 [96049.833899] ? btrfs_dedupe_file_range+0x19a/0x280 [btrfs] [96049.833909] btrfs_dedupe_file_range+0x270/0x280 [btrfs] [96049.833916] vfs_dedupe_file_range_one+0xd9/0xe0 [96049.833919] vfs_dedupe_file_range+0x131/0x1b0 [96049.833924] do_vfs_ioctl+0x272/0x6e0 [96049.833927] ? __fget+0x113/0x200 [96049.833931] ksys_ioctl+0x70/0x80 [96049.833933] __x64_sys_ioctl+0x16/0x20 [96049.833937] do_syscall_64+0x60/0x1b0 [96049.833939] entry_SYSCALL_64_after_hwframe+0x49/0xbe [96049.833941] RIP: 0033:0x7f5c1478ddd7 [96049.833943] RSP: 002b:00007ffe15b196a8 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 [96049.833945] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f5c1478ddd7 [96049.833946] RDX: 00005625ece322d0 RSI: 00000000c0189436 RDI: 0000000000000004 [96049.833947] RBP: 0000000000000000 R08: 00007f5c14a46f48 R09: 0000000000000040 [96049.833948] R10: 0000000000000541 R11: 0000000000000202 R12: 0000000000000000 [96049.833949] R13: 0000000000000000 R14: 0000000000000004 R15: 00005625ece322d0 [96049.833954] irq event stamp: 6196 [96049.833956] hardirqs last enabled at (6195): [] console_unlock+0x503/0x640 [96049.833958] hardirqs last disabled at (6196): [] trace_hardirqs_off_thunk+0x1a/0x1c [96049.833959] softirqs last enabled at (6114): [] __do_softirq+0x370/0x421 [96049.833964] softirqs last disabled at (6095): [] irq_exit+0xcd/0xe0 [96049.833965] ---[ end trace db7b05f01b7fa10c ]--- [96049.935816] R13: 0000000000000000 R14: 00005562e5259240 R15: 00007ffff092b910 [96049.935822] irq event stamp: 6584 [96049.935823] hardirqs last enabled at (6583): [] console_unlock+0x503/0x640 [96049.935825] hardirqs last disabled at (6584): [] trace_hardirqs_off_thunk+0x1a/0x1c [96049.935827] softirqs last enabled at (6328): [] __do_softirq+0x370/0x421 [96049.935828] softirqs last disabled at (6313): [] irq_exit+0xcd/0xe0 [96049.935829] ---[ end trace db7b05f01b7fa123 ]--- [96049.935840] ------------[ cut here ]------------ [96049.936065] WARNING: CPU: 1 PID: 24463 at fs/btrfs/extent_io.c:436 insert_state+0x101/0x120 [btrfs] [96049.936107] CPU: 1 PID: 24463 Comm: umount Tainted: G W 4.19.0-rc7-btrfs-next-39 #1 [96049.936108] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.2-0-gf9626ccb91-prebuilt.qemu-project.org 04/01/2014 [96049.936117] RIP: 0010:insert_state+0x101/0x120 [btrfs] [96049.936119] RSP: 0018:ffffafd2c3637bc0 EFLAGS: 00010282 [96049.936120] RAX: 0000000000000000 RBX: 000000000004dfff RCX: 0000000000000006 [96049.936121] RDX: 0000000000000007 RSI: ffff990445cf88e0 RDI: ffff99047b2968a0 [96049.936122] RBP: ffff990457851cd0 R08: 0000000000000001 R09: 0000000000000000 [96049.936123] R10: ffffafd2c3637b88 R11: 0000000000000000 R12: ffff9904574301e8 [96049.936124] R13: 000000000004e000 R14: ffffafd2c3637c50 R15: ffffafd2c3637c48 [96049.936125] FS: 00007fe4b87e72c0(0000) GS:ffff99047b280000(0000) knlGS:0000000000000000 [96049.936126] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [96049.936128] CR2: 00005562e52618d8 CR3: 00000001151c8005 CR4: 00000000003606e0 [96049.936129] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [96049.936131] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [96049.936131] Call Trace: [96049.936141] __set_extent_bit+0x46c/0x6a0 [btrfs] [96049.936154] lock_extent_bits+0x6b/0x210 [btrfs] [96049.936167] btrfs_evict_inode+0x1e1/0x5a0 [btrfs] [96049.936172] evict+0xbf/0x1c0 [96049.936174] dispose_list+0x51/0x80 [96049.936176] evict_inodes+0x193/0x1c0 [96049.936180] generic_shutdown_super+0x3f/0x110 [96049.936182] kill_anon_super+0xe/0x30 [96049.936189] btrfs_kill_super+0x13/0x100 [btrfs] [96049.936191] deactivate_locked_super+0x3a/0x70 [96049.936193] cleanup_mnt+0x3b/0x80 [96049.936195] task_work_run+0x93/0xc0 [96049.936198] exit_to_usermode_loop+0xfa/0x100 [96049.936201] do_syscall_64+0x17f/0x1b0 [96049.936202] entry_SYSCALL_64_after_hwframe+0x49/0xbe [96049.936204] RIP: 0033:0x7fe4b80cfb37 [96049.936206] RSP: 002b:00007ffff092b688 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [96049.936207] RAX: 0000000000000000 RBX: 00005562e5259060 RCX: 00007fe4b80cfb37 [96049.936208] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 00005562e525faa0 [96049.936209] RBP: 00005562e525faa0 R08: 00005562e525f770 R09: 0000000000000015 [96049.936210] R10: 00000000000006b4 R11: 0000000000000246 R12: 00007fe4b85d1e64 [96049.936211] R13: 0000000000000000 R14: 00005562e5259240 R15: 00007ffff092b910 [96049.936211] R13: 0000000000000000 R14: 00005562e5259240 R15: 00007ffff092b910 [96049.936216] irq event stamp: 6616 [96049.936219] hardirqs last enabled at (6615): [] console_unlock+0x503/0x640 [96049.936219] hardirqs last disabled at (6616): [] trace_hardirqs_off_thunk+0x1a/0x1c [96049.936222] softirqs last enabled at (6328): [] __do_softirq+0x370/0x421 [96049.936222] softirqs last disabled at (6313): [] irq_exit+0xcd/0xe0 [96049.936223] ---[ end trace db7b05f01b7fa124 ]--- The second stack trace, from inode eviction, is repeated forever due to the infinite loop during eviction. This is the same type of problem fixed way back in 2015 by commit 113e8283869b ("Btrfs: fix inode eviction infinite loop after extent_same ioctl") and commit ccccf3d67294 ("Btrfs: fix inode eviction infinite loop after cloning into it"). So fix this by returning immediately if the deduplication range length gets rounded down to 0 bytes, as there is nothing that needs to be done in such case. Example reproducer: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ xfs_io -f -c "pwrite -S 0xe6 0 100" /mnt/foo $ xfs_io -f -c "pwrite -S 0xe6 0 1M" /mnt/bar # Unmount the filesystem and mount it again so that we start without any # extent state records when we ask for the deduplication. $ umount /mnt $ mount /dev/sdb /mnt $ xfs_io -c "dedupe /mnt/foo 0 500K 100" /mnt/bar # This unmount triggers the infinite loop. $ umount /mnt A test case for fstests will follow soon. Fixes: de02b9f6bb65 ("Btrfs: fix data corruption when deduplicating between different files") CC: # 4.19+ Reviewed-by: Nikolay Borisov Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 9333e4cda68d2..4cdfc6f8e6a9d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3178,6 +3178,8 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, const u64 sz = BTRFS_I(src)->root->fs_info->sectorsize; len = round_down(i_size_read(src), sz) - loff; + if (len == 0) + return 0; olen = len; } } -- GitLab From d62eead029806345c60becda77169d5e01009510 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 5 Nov 2018 11:14:17 +0000 Subject: [PATCH 1551/2269] Btrfs: fix data corruption due to cloning of eof block commit ac765f83f1397646c11092a032d4f62c3d478b81 upstream. We currently allow cloning a range from a file which includes the last block of the file even if the file's size is not aligned to the block size. This is fine and useful when the destination file has the same size, but when it does not and the range ends somewhere in the middle of the destination file, it leads to corruption because the bytes between the EOF and the end of the block have undefined data (when there is support for discard/trimming they have a value of 0x00). Example: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ export foo_size=$((256 * 1024 + 100)) $ xfs_io -f -c "pwrite -S 0x3c 0 $foo_size" /mnt/foo $ xfs_io -f -c "pwrite -S 0xb5 0 1M" /mnt/bar $ xfs_io -c "reflink /mnt/foo 0 512K $foo_size" /mnt/bar $ od -A d -t x1 /mnt/bar 0000000 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 * 0524288 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c * 0786528 3c 3c 3c 3c 00 00 00 00 00 00 00 00 00 00 00 00 0786544 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 0790528 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 * 1048576 The bytes in the range from 786532 (512Kb + 256Kb + 100 bytes) to 790527 (512Kb + 256Kb + 4Kb - 1) got corrupted, having now a value of 0x00 instead of 0xb5. This is similar to the problem we had for deduplication that got recently fixed by commit de02b9f6bb65 ("Btrfs: fix data corruption when deduplicating between different files"). Fix this by not allowing such operations to be performed and return the errno -EINVAL to user space. This is what XFS is doing as well at the VFS level. This change however now makes us return -EINVAL instead of -EOPNOTSUPP for cases where the source range maps to an inline extent and the destination range's end is smaller then the destination file's size, since the detection of inline extents is done during the actual process of dropping file extent items (at __btrfs_drop_extents()). Returning the -EINVAL error is done early on and solely based on the input parameters (offsets and length) and destination file's size. This makes us consistent with XFS and anyone else supporting cloning since this case is now checked at a higher level in the VFS and is where the -EINVAL will be returned from starting with kernel 4.20 (the VFS changed was introduced in 4.20-rc1 by commit 07d19dc9fbe9 ("vfs: avoid problematic remapping requests into partial EOF block"). So this change is more geared towards stable kernels, as it's unlikely the new VFS checks get removed intentionally. A test case for fstests follows soon, as well as an update to filter existing tests that expect -EOPNOTSUPP to accept -EINVAL as well. CC: # 4.4+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4cdfc6f8e6a9d..cddd63b9103fb 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3909,9 +3909,17 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src, goto out_unlock; if (len == 0) olen = len = src->i_size - off; - /* if we extend to eof, continue to block boundary */ - if (off + len == src->i_size) + /* + * If we extend to eof, continue to block boundary if and only if the + * destination end offset matches the destination file's size, otherwise + * we would be corrupting data by placing the eof block into the middle + * of a file. + */ + if (off + len == src->i_size) { + if (!IS_ALIGNED(len, bs) && destoff + len < inode->i_size) + goto out_unlock; len = ALIGN(src->i_size, bs) - off; + } if (len == 0) { ret = 0; -- GitLab From 79d80b876396693e5b6d7d345963b83a2f917816 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 4 Nov 2018 03:48:54 +0000 Subject: [PATCH 1552/2269] clockevents/drivers/i8253: Add support for PIT shutdown quirk commit 35b69a420bfb56b7b74cb635ea903db05e357bec upstream. Add support for platforms where pit_shutdown() doesn't work because of a quirk in the PIT emulation. On these platforms setting the counter register to zero causes the PIT to start running again, negating the shutdown. Provide a global variable that controls whether the counter register is zero'ed, which platform specific code can override. Signed-off-by: Michael Kelley Signed-off-by: Thomas Gleixner Cc: "gregkh@linuxfoundation.org" Cc: "devel@linuxdriverproject.org" Cc: "daniel.lezcano@linaro.org" Cc: "virtualization@lists.linux-foundation.org" Cc: "jgross@suse.com" Cc: "akataria@vmware.com" Cc: "olaf@aepfle.de" Cc: "apw@canonical.com" Cc: vkuznets Cc: "jasowang@redhat.com" Cc: "marcelo.cerri@canonical.com" Cc: KY Srinivasan Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1541303219-11142-2-git-send-email-mikelley@microsoft.com Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/i8253.c | 14 ++++++++++++-- include/linux/i8253.h | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/i8253.c b/drivers/clocksource/i8253.c index 9c38895542f4a..d4350bb10b83a 100644 --- a/drivers/clocksource/i8253.c +++ b/drivers/clocksource/i8253.c @@ -20,6 +20,13 @@ DEFINE_RAW_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); +/* + * Handle PIT quirk in pit_shutdown() where zeroing the counter register + * restarts the PIT, negating the shutdown. On platforms with the quirk, + * platform specific code can set this to false. + */ +bool i8253_clear_counter_on_shutdown __ro_after_init = true; + #ifdef CONFIG_CLKSRC_I8253 /* * Since the PIT overflows every tick, its not very useful @@ -109,8 +116,11 @@ static int pit_shutdown(struct clock_event_device *evt) raw_spin_lock(&i8253_lock); outb_p(0x30, PIT_MODE); - outb_p(0, PIT_CH0); - outb_p(0, PIT_CH0); + + if (i8253_clear_counter_on_shutdown) { + outb_p(0, PIT_CH0); + outb_p(0, PIT_CH0); + } raw_spin_unlock(&i8253_lock); return 0; diff --git a/include/linux/i8253.h b/include/linux/i8253.h index e6bb36a97519b..8336b2f6f8346 100644 --- a/include/linux/i8253.h +++ b/include/linux/i8253.h @@ -21,6 +21,7 @@ #define PIT_LATCH ((PIT_TICK_RATE + HZ/2) / HZ) extern raw_spinlock_t i8253_lock; +extern bool i8253_clear_counter_on_shutdown; extern struct clock_event_device i8253_clockevent; extern void clockevent_i8253_init(bool oneshot); -- GitLab From 9910b2578039587832804469610f6742c2d06148 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sat, 3 Nov 2018 17:11:19 -0400 Subject: [PATCH 1553/2269] ext4: add missing brelse() update_backups()'s error path commit ea0abbb648452cdb6e1734b702b6330a7448fcf8 upstream. Fixes: ac27a0ec112a ("ext4: initial copy of files from ext3") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 2.6.19 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index e344e606c054c..bf58402da7ac0 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1097,8 +1097,10 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data, backup_block, backup_block - ext4_group_first_block_no(sb, group)); BUFFER_TRACE(bh, "get_write_access"); - if ((err = ext4_journal_get_write_access(handle, bh))) + if ((err = ext4_journal_get_write_access(handle, bh))) { + brelse(bh); break; + } lock_buffer(bh); memcpy(bh->b_data, data, size); if (rest) -- GitLab From feddbc0114dfc5f602ac471e400f36d1e9c6a4b6 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sat, 3 Nov 2018 16:22:10 -0400 Subject: [PATCH 1554/2269] ext4: add missing brelse() in set_flexbg_block_bitmap()'s error path commit cea5794122125bf67559906a0762186cf417099c upstream. Fixes: 33afdcc5402d ("ext4: add a function which sets up group blocks ...") Cc: stable@kernel.org # 3.3 Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index bf58402da7ac0..46098de7222e5 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -444,16 +444,18 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, bh); - if (err) + if (err) { + brelse(bh); return err; + } ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", block, block - start, count2); ext4_set_bits(bh->b_data, block - start, count2); err = ext4_handle_dirty_metadata(handle, NULL, bh); + brelse(bh); if (unlikely(err)) return err; - brelse(bh); } return 0; -- GitLab From 64176ffd7ffcd22dd748cf800fd3c1df8d18a137 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sat, 3 Nov 2018 16:50:08 -0400 Subject: [PATCH 1555/2269] ext4: add missing brelse() add_new_gdb_meta_bg()'s error path commit 61a9c11e5e7a0dab5381afa5d9d4dd5ebf18f7a0 upstream. Fixes: 01f795f9e0d6 ("ext4: add online resizing support for meta_bg ...") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 3.7 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 46098de7222e5..e689438ffb576 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -901,6 +901,7 @@ static int add_new_gdb_meta_bg(struct super_block *sb, sizeof(struct buffer_head *), GFP_NOFS); if (!n_group_desc) { + brelse(gdb_bh); err = -ENOMEM; ext4_warning(sb, "not enough memory for %lu groups", gdb_num + 1); @@ -916,8 +917,6 @@ static int add_new_gdb_meta_bg(struct super_block *sb, kvfree(o_group_desc); BUFFER_TRACE(gdb_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, gdb_bh); - if (unlikely(err)) - brelse(gdb_bh); return err; } -- GitLab From 4e7e558e8bfa8017a04a9735fc2ed65cf3b1ea3c Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sat, 3 Nov 2018 16:13:17 -0400 Subject: [PATCH 1556/2269] ext4: avoid potential extra brelse in setup_new_flex_group_blocks() commit 9e4028935cca3f9ef9b6a90df9da6f1f94853536 upstream. Currently bh is set to NULL only during first iteration of for cycle, then this pointer is not cleared after end of using. Therefore rollback after errors can lead to extra brelse(bh) call, decrements bh counter and later trigger an unexpected warning in __brelse() Patch moves brelse() calls in body of cycle to exclude requirement of brelse() call in rollback. Fixes: 33afdcc5402d ("ext4: add a function which sets up group blocks ...") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 3.3+ Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index e689438ffb576..46565a6f8137f 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -592,7 +592,6 @@ handle_bb: bh = bclean(handle, sb, block); if (IS_ERR(bh)) { err = PTR_ERR(bh); - bh = NULL; goto out; } overhead = ext4_group_overhead_blocks(sb, group); @@ -604,9 +603,9 @@ handle_bb: ext4_mark_bitmap_end(group_data[i].blocks_count, sb->s_blocksize * 8, bh->b_data); err = ext4_handle_dirty_metadata(handle, NULL, bh); + brelse(bh); if (err) goto out; - brelse(bh); handle_ib: if (bg_flags[i] & EXT4_BG_INODE_UNINIT) @@ -621,18 +620,16 @@ handle_ib: bh = bclean(handle, sb, block); if (IS_ERR(bh)) { err = PTR_ERR(bh); - bh = NULL; goto out; } ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, bh->b_data); err = ext4_handle_dirty_metadata(handle, NULL, bh); + brelse(bh); if (err) goto out; - brelse(bh); } - bh = NULL; /* Mark group tables in block bitmap */ for (j = 0; j < GROUP_TABLE_COUNT; j++) { @@ -663,7 +660,6 @@ handle_ib: } out: - brelse(bh); err2 = ext4_journal_stop(handle); if (err2 && !err) err = err2; -- GitLab From dd2775337188cc97bcc8ef75f876fdf55a4a6501 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 9 Nov 2018 11:34:40 -0500 Subject: [PATCH 1557/2269] ext4: missing !bh check in ext4_xattr_inode_write() commit eb6984fa4ce2837dcb1f66720a600f31b0bb3739 upstream. According to Ted Ts'o ext4_getblk() called in ext4_xattr_inode_write() should not return bh = NULL The only time that bh could be NULL, then, would be in the case of something really going wrong; a programming error elsewhere (perhaps a wild pointer dereference) or I/O error causing on-disk file system corruption (although that would be highly unlikely given that we had *just* allocated the blocks and so the metadata blocks in question probably would still be in the cache). Fixes: e50e5129f384 ("ext4: xattr-in-inode support") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 4.13 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 9bc50eef61279..34975e1aa7d21 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1387,6 +1387,12 @@ retry: bh = ext4_getblk(handle, ea_inode, block, 0); if (IS_ERR(bh)) return PTR_ERR(bh); + if (!bh) { + WARN_ON_ONCE(1); + EXT4_ERROR_INODE(ea_inode, + "ext4_getblk() return bh = NULL"); + return -EFSCORRUPTED; + } ret = ext4_journal_get_write_access(handle, bh); if (ret) goto out; -- GitLab From 3caa7b620c0f9e2047468447ab26684daae1913f Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Tue, 6 Nov 2018 16:20:40 -0500 Subject: [PATCH 1558/2269] ext4: fix possible inode leak in the retry loop of ext4_resize_fs() commit db6aee62406d9fbb53315fcddd81f1dc271d49fa upstream. Fixes: 1c6bd7173d66 ("ext4: convert file system to meta_bg if needed ...") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 3.7 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 46565a6f8137f..075738830b879 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -2028,6 +2028,10 @@ retry: n_blocks_count_retry = 0; free_flex_gd(flex_gd); flex_gd = NULL; + if (resize_inode) { + iput(resize_inode); + resize_inode = NULL; + } goto retry; } -- GitLab From da497e7a38bb774a1ed6f6264b52704c9330e1fd Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Tue, 6 Nov 2018 16:49:50 -0500 Subject: [PATCH 1559/2269] ext4: avoid buffer leak on shutdown in ext4_mark_iloc_dirty() commit a6758309a005060b8297a538a457c88699cb2520 upstream. ext4_mark_iloc_dirty() callers expect that it releases iloc->bh even if it returns an error. Fixes: 0db1ff222d40 ("ext4: add shutdown bit and check for it") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 4.11 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9808df52ceca6..bd2bf83b1a1f7 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5671,9 +5671,10 @@ int ext4_mark_iloc_dirty(handle_t *handle, { int err = 0; - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) { + put_bh(iloc->bh); return -EIO; - + } if (IS_I_VERSION(inode)) inode_inc_iversion(inode); -- GitLab From cffd3297173a7397da8171cd2ef20ae8eec8cee8 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Tue, 6 Nov 2018 17:01:36 -0500 Subject: [PATCH 1560/2269] ext4: avoid buffer leak in ext4_orphan_add() after prior errors commit feaf264ce7f8d54582e2f66eb82dd9dd124c94f3 upstream. Fixes: d745a8c20c1f ("ext4: reduce contention on s_orphan_lock") Fixes: 6e3617e579e0 ("ext4: Handle non empty on-disk orphan link") Cc: Dmitry Monakhov Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 2.6.34 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/namei.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index eb0d8ee39827a..cfa0dc2d97cd4 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2820,7 +2820,9 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) list_del_init(&EXT4_I(inode)->i_orphan); mutex_unlock(&sbi->s_orphan_lock); } - } + } else + brelse(iloc.bh); + jbd_debug(4, "superblock will point to %lu\n", inode->i_ino); jbd_debug(4, "orphan inode %lu will point to %d\n", inode->i_ino, NEXT_ORPHAN(inode)); -- GitLab From 65b1ce496c490e74b88f6fafc84a3eed756807a4 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Tue, 6 Nov 2018 16:16:01 -0500 Subject: [PATCH 1561/2269] ext4: fix missing cleanup if ext4_alloc_flex_bg_array() fails while resizing commit f348e2241fb73515d65b5d77dd9c174128a7fbf2 upstream. Fixes: 117fff10d7f1 ("ext4: grow the s_flex_groups array as needed ...") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 3.7 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 075738830b879..c2cbd21bf023a 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1992,7 +1992,7 @@ retry: err = ext4_alloc_flex_bg_array(sb, n_group + 1); if (err) - return err; + goto out; err = ext4_mb_alloc_groupinfo(sb, n_group + 1); if (err) -- GitLab From bf04dace8318d6ae20a11c9e7692f36d9e387c23 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 6 Nov 2018 17:18:17 -0500 Subject: [PATCH 1562/2269] ext4: avoid possible double brelse() in add_new_gdb() on error path commit 4f32c38b4662312dd3c5f113d8bdd459887fb773 upstream. Fixes: b40971426a83 ("ext4: add error checking to calls to ...") Reported-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 2.6.38 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index c2cbd21bf023a..deebb8842c824 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -846,6 +846,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); if (unlikely(err)) { ext4_std_error(sb, err); + iloc.bh = NULL; goto exit_inode; } brelse(dind); -- GitLab From 1124e5a89bd285468563df6723ceebab754735f6 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 7 Nov 2018 10:32:53 -0500 Subject: [PATCH 1563/2269] ext4: fix possible leak of sbi->s_group_desc_leak in error path commit 9e463084cdb22e0b56b2dfbc50461020409a5fd3 upstream. Fixes: bfe0a5f47ada ("ext4: add more mount time checks of the superblock") Reported-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 4.18 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 46ad267ef6d6c..d788b963e7aca 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4012,6 +4012,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_groups_count = blocks_count; sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); + if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) != + le32_to_cpu(es->s_inodes_count)) { + ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu", + le32_to_cpu(es->s_inodes_count), + ((u64)sbi->s_groups_count * sbi->s_inodes_per_group)); + ret = -EINVAL; + goto failed_mount; + } db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); if (ext4_has_feature_meta_bg(sb)) { @@ -4031,14 +4039,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ret = -ENOMEM; goto failed_mount; } - if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) != - le32_to_cpu(es->s_inodes_count)) { - ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu", - le32_to_cpu(es->s_inodes_count), - ((u64)sbi->s_groups_count * sbi->s_inodes_per_group)); - ret = -EINVAL; - goto failed_mount; - } bgl_lock_init(sbi->s_blockgroup_lock); -- GitLab From 181224f970c445bc3de2b3b42e30f2e2a294e30f Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 7 Nov 2018 10:56:28 -0500 Subject: [PATCH 1564/2269] ext4: fix possible leak of s_journal_flag_rwsem in error path commit af18e35bfd01e6d65a5e3ef84ffe8b252d1628c5 upstream. Fixes: c8585c6fcaf2 ("ext4: fix races between changing inode journal ...") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 4.7 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d788b963e7aca..7fd64f5f70f0f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4442,6 +4442,7 @@ failed_mount6: percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); + percpu_free_rwsem(&sbi->s_journal_flag_rwsem); failed_mount5: ext4_ext_release(sb); ext4_release_system_zone(sb); -- GitLab From 70ca35b44acea71dff052bb45e9c806a991d8bca Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 7 Nov 2018 11:01:33 -0500 Subject: [PATCH 1565/2269] ext4: fix buffer leak in ext4_xattr_get_block() on error path commit ecaaf408478b6fb4d9986f9b6652f3824e374f4c upstream. Fixes: dec214d00e0d ("ext4: xattr inode deduplication") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 4.13 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 34975e1aa7d21..3e6f326c34803 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2281,8 +2281,10 @@ static struct buffer_head *ext4_xattr_get_block(struct inode *inode) if (!bh) return ERR_PTR(-EIO); error = ext4_xattr_check_block(inode, bh); - if (error) + if (error) { + brelse(bh); return ERR_PTR(error); + } return bh; } -- GitLab From 8113972d21440db9d098d363ff00cca96d9ce1ab Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 7 Nov 2018 11:07:01 -0500 Subject: [PATCH 1566/2269] ext4: release bs.bh before re-using in ext4_xattr_block_find() commit 45ae932d246f721e6584430017176cbcadfde610 upstream. bs.bh was taken in previous ext4_xattr_block_find() call, it should be released before re-using Fixes: 7e01c8e5420b ("ext3/4: fix uninitialized bs in ...") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 2.6.26 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 3e6f326c34803..49b3d9b038e02 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2404,6 +2404,8 @@ retry_inode: error = ext4_xattr_block_set(handle, inode, &i, &bs); } else if (error == -ENOSPC) { if (EXT4_I(inode)->i_file_acl && !bs.s.base) { + brelse(bs.bh); + bs.bh = NULL; error = ext4_xattr_block_find(inode, &i, &bs); if (error) goto cleanup; -- GitLab From 4096fc0912778196f04d66fef5189a897dba0131 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 7 Nov 2018 11:10:21 -0500 Subject: [PATCH 1567/2269] ext4: fix buffer leak in ext4_xattr_move_to_block() on error path commit 6bdc9977fcdedf47118d2caf7270a19f4b6d8a8f upstream. Fixes: 3f2571c1f91f ("ext4: factor out xattr moving") Fixes: 6dd4ee7cab7e ("ext4: Expand extra_inodes space per ...") Reviewed-by: Jan Kara Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 2.6.23 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 49b3d9b038e02..da46b51629378 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2626,6 +2626,8 @@ out: kfree(buffer); if (is) brelse(is->iloc.bh); + if (bs) + brelse(bs->bh); kfree(is); kfree(bs); -- GitLab From be7e29ce83f689887c198eeb45b2a81ba6497ea4 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 7 Nov 2018 11:14:35 -0500 Subject: [PATCH 1568/2269] ext4: fix buffer leak in ext4_expand_extra_isize_ea() on error path commit 53692ec074d00589c2cf1d6d17ca76ad0adce6ec upstream. Fixes: de05ca852679 ("ext4: move call to ext4_error() into ...") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 4.17 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index da46b51629378..a5923a1d0ff4f 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2707,7 +2707,6 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, struct ext4_inode *raw_inode, handle_t *handle) { struct ext4_xattr_ibody_header *header; - struct buffer_head *bh; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); static unsigned int mnt_count; size_t min_offs; @@ -2748,13 +2747,17 @@ retry: * EA block can hold new_extra_isize bytes. */ if (EXT4_I(inode)->i_file_acl) { + struct buffer_head *bh; + bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); error = -EIO; if (!bh) goto cleanup; error = ext4_xattr_check_block(inode, bh); - if (error) + if (error) { + brelse(bh); goto cleanup; + } base = BHDR(bh); end = bh->b_data + bh->b_size; min_offs = end - base; -- GitLab From 68b908750ea4cb285aef4f58ed89056e3b0039db Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 7 Nov 2018 22:36:23 -0500 Subject: [PATCH 1569/2269] ext4: fix buffer leak in __ext4_read_dirblock() on error path commit de59fae0043f07de5d25e02ca360f7d57bfa5866 upstream. Fixes: dc6982ff4db1 ("ext4: refactor code to read directory blocks ...") Signed-off-by: Vasily Averin Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 3.9 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/namei.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index cfa0dc2d97cd4..4e301b0cdfb53 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -125,6 +125,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, if (!is_dx_block && type == INDEX) { ext4_error_inode(inode, func, line, block, "directory leaf block found instead of index block"); + brelse(bh); return ERR_PTR(-EFSCORRUPTED); } if (!ext4_has_metadata_csum(inode->i_sb) || -- GitLab From 5e64ee8788f585d3e7c144fc232efa45eb61a202 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 22 Oct 2018 10:21:38 -0500 Subject: [PATCH 1570/2269] mount: Retest MNT_LOCKED in do_umount commit 25d202ed820ee347edec0bf3bf553544556bf64b upstream. It was recently pointed out that the one instance of testing MNT_LOCKED outside of the namespace_sem is in ksys_umount. Fix that by adding a test inside of do_umount with namespace_sem and the mount_lock held. As it helps to fail fails the existing test is maintained with an additional comment pointing out that it may be racy because the locks are not held. Cc: stable@vger.kernel.org Reported-by: Al Viro Fixes: 5ff9d8a65ce8 ("vfs: Lock in place mounts from more privileged users") Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 9dc146e7b5e0a..97d3907ca177c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1625,8 +1625,13 @@ static int do_umount(struct mount *mnt, int flags) namespace_lock(); lock_mount_hash(); - event++; + /* Recheck MNT_LOCKED with the locks held */ + retval = -EINVAL; + if (mnt->mnt.mnt_flags & MNT_LOCKED) + goto out; + + event++; if (flags & MNT_DETACH) { if (!list_empty(&mnt->mnt_list)) umount_tree(mnt, UMOUNT_PROPAGATE); @@ -1640,6 +1645,7 @@ static int do_umount(struct mount *mnt, int flags) retval = 0; } } +out: unlock_mount_hash(); namespace_unlock(); return retval; @@ -1730,7 +1736,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags) goto dput_and_out; if (!check_mnt(mnt)) goto dput_and_out; - if (mnt->mnt.mnt_flags & MNT_LOCKED) + if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */ goto dput_and_out; retval = -EPERM; if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN)) -- GitLab From c4585861f8f8055e02fd42b81259a0cb81e32f71 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 25 Oct 2018 09:04:18 -0500 Subject: [PATCH 1571/2269] mount: Don't allow copying MNT_UNBINDABLE|MNT_LOCKED mounts commit df7342b240185d58d3d9665c0bbf0a0f5570ec29 upstream. Jonathan Calmels from NVIDIA reported that he's able to bypass the mount visibility security check in place in the Linux kernel by using a combination of the unbindable property along with the private mount propagation option to allow a unprivileged user to see a path which was purposefully hidden by the root user. Reproducer: # Hide a path to all users using a tmpfs root@castiana:~# mount -t tmpfs tmpfs /sys/devices/ root@castiana:~# # As an unprivileged user, unshare user namespace and mount namespace stgraber@castiana:~$ unshare -U -m -r # Confirm the path is still not accessible root@castiana:~# ls /sys/devices/ # Make /sys recursively unbindable and private root@castiana:~# mount --make-runbindable /sys root@castiana:~# mount --make-private /sys # Recursively bind-mount the rest of /sys over to /mnnt root@castiana:~# mount --rbind /sys/ /mnt # Access our hidden /sys/device as an unprivileged user root@castiana:~# ls /mnt/devices/ breakpoint cpu cstate_core cstate_pkg i915 intel_pt isa kprobe LNXSYSTM:00 msr pci0000:00 platform pnp0 power software system tracepoint uncore_arb uncore_cbox_0 uncore_cbox_1 uprobe virtual Solve this by teaching copy_tree to fail if a mount turns out to be both unbindable and locked. Cc: stable@vger.kernel.org Fixes: 5ff9d8a65ce8 ("vfs: Lock in place mounts from more privileged users") Reported-by: Jonathan Calmels Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 97d3907ca177c..86273dab89daa 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1814,8 +1814,14 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, for (s = r; s; s = next_mnt(s, r)) { if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(s)) { - s = skip_mnt_tree(s); - continue; + if (s->mnt.mnt_flags & MNT_LOCKED) { + /* Both unbindable and locked. */ + q = ERR_PTR(-EPERM); + goto out; + } else { + s = skip_mnt_tree(s); + continue; + } } if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(s->mnt.mnt_root)) { -- GitLab From f3f529741e54be10d162797275fe432cebe51b96 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 25 Oct 2018 12:05:11 -0500 Subject: [PATCH 1572/2269] mount: Prevent MNT_DETACH from disconnecting locked mounts commit 9c8e0a1b683525464a2abe9fb4b54404a50ed2b4 upstream. Timothy Baldwin wrote: > As per mount_namespaces(7) unprivileged users should not be able to look under mount points: > > Mounts that come as a single unit from more privileged mount are locked > together and may not be separated in a less privileged mount namespace. > > However they can: > > 1. Create a mount namespace. > 2. In the mount namespace open a file descriptor to the parent of a mount point. > 3. Destroy the mount namespace. > 4. Use the file descriptor to look under the mount point. > > I have reproduced this with Linux 4.16.18 and Linux 4.18-rc8. > > The setup: > > $ sudo sysctl kernel.unprivileged_userns_clone=1 > kernel.unprivileged_userns_clone = 1 > $ mkdir -p A/B/Secret > $ sudo mount -t tmpfs hide A/B > > > "Secret" is indeed hidden as expected: > > $ ls -lR A > A: > total 0 > drwxrwxrwt 2 root root 40 Feb 12 21:08 B > > A/B: > total 0 > > > The attack revealing "Secret": > > $ unshare -Umr sh -c "exec unshare -m ls -lR /proc/self/fd/4/ 4 /proc/self/fd/4/: > total 0 > drwxr-xr-x 3 root root 60 Feb 12 21:08 B > > /proc/self/fd/4/B: > total 0 > drwxr-xr-x 2 root root 40 Feb 12 21:08 Secret > > /proc/self/fd/4/B/Secret: > total 0 I tracked this down to put_mnt_ns running passing UMOUNT_SYNC and disconnecting all of the mounts in a mount namespace. Fix this by factoring drop_mounts out of drop_collected_mounts and passing 0 instead of UMOUNT_SYNC. There are two possible behavior differences that result from this. - No longer setting UMOUNT_SYNC will no longer set MNT_SYNC_UMOUNT on the vfsmounts being unmounted. This effects the lazy rcu walk by kicking the walk out of rcu mode and forcing it to be a non-lazy walk. - No longer disconnecting locked mounts will keep some mounts around longer as they stay because the are locked to other mounts. There are only two users of drop_collected mounts: audit_tree.c and put_mnt_ns. In audit_tree.c the mounts are private and there are no rcu lazy walks only calls to iterate_mounts. So the changes should have no effect except for a small timing effect as the connected mounts are disconnected. In put_mnt_ns there may be references from process outside the mount namespace to the mounts. So the mounts remaining connected will be the bug fix that is needed. That rcu walks are allowed to continue appears not to be a problem especially as the rcu walk change was about an implementation detail not about semantics. Cc: stable@vger.kernel.org Fixes: 5ff9d8a65ce8 ("vfs: Lock in place mounts from more privileged users") Reported-by: Timothy Baldwin Tested-by: Timothy Baldwin Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index 86273dab89daa..e9c13eedd7393 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1874,7 +1874,7 @@ void drop_collected_mounts(struct vfsmount *mnt) { namespace_lock(); lock_mount_hash(); - umount_tree(real_mount(mnt), UMOUNT_SYNC); + umount_tree(real_mount(mnt), 0); unlock_mount_hash(); namespace_unlock(); } -- GitLab From ce583650a908b4b4fdfb3ac13edacf17d4c55c2f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 27 Sep 2018 17:17:49 +0000 Subject: [PATCH 1573/2269] kdb: use correct pointer when 'btc' calls 'btt' commit dded2e159208a9edc21dd5c5f583afa28d378d39 upstream. On a powerpc 8xx, 'btc' fails as follows: Entering kdb (current=0x(ptrval), pid 282) due to Keyboard Entry kdb> btc btc: cpu status: Currently on cpu 0 Available cpus: 0 kdb_getarea: Bad address 0x0 when booting the kernel with 'debug_boot_weak_hash', it fails as well Entering kdb (current=0xba99ad80, pid 284) due to Keyboard Entry kdb> btc btc: cpu status: Currently on cpu 0 Available cpus: 0 kdb_getarea: Bad address 0xba99ad80 On other platforms, Oopses have been observed too, see https://github.com/linuxppc/linux/issues/139 This is due to btc calling 'btt' with %p pointer as an argument. This patch replaces %p by %px to get the real pointer value as expected by 'btt' Fixes: ad67b74d2469 ("printk: hash addresses printed with %p") Cc: Signed-off-by: Christophe Leroy Reviewed-by: Daniel Thompson Signed-off-by: Daniel Thompson Signed-off-by: Greg Kroah-Hartman --- kernel/debug/kdb/kdb_bt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c index 6ad4a9fcbd6f7..7921ae4fca8de 100644 --- a/kernel/debug/kdb/kdb_bt.c +++ b/kernel/debug/kdb/kdb_bt.c @@ -179,14 +179,14 @@ kdb_bt(int argc, const char **argv) kdb_printf("no process for cpu %ld\n", cpu); return 0; } - sprintf(buf, "btt 0x%p\n", KDB_TSK(cpu)); + sprintf(buf, "btt 0x%px\n", KDB_TSK(cpu)); kdb_parse(buf); return 0; } kdb_printf("btc: cpu status: "); kdb_parse("cpu\n"); for_each_online_cpu(cpu) { - sprintf(buf, "btt 0x%p\n", KDB_TSK(cpu)); + sprintf(buf, "btt 0x%px\n", KDB_TSK(cpu)); kdb_parse(buf); touch_nmi_watchdog(); } -- GitLab From dedde93bd683730e78da54db42e52cb87cdb8a0c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 27 Sep 2018 17:17:57 +0000 Subject: [PATCH 1574/2269] kdb: print real address of pointers instead of hashed addresses commit 568fb6f42ac6851320adaea25f8f1b94de14e40a upstream. Since commit ad67b74d2469 ("printk: hash addresses printed with %p"), all pointers printed with %p are printed with hashed addresses instead of real addresses in order to avoid leaking addresses in dmesg and syslog. But this applies to kdb too, with is unfortunate: Entering kdb (current=0x(ptrval), pid 329) due to Keyboard Entry kdb> ps 15 sleeping system daemon (state M) processes suppressed, use 'ps A' to see all. Task Addr Pid Parent [*] cpu State Thread Command 0x(ptrval) 329 328 1 0 R 0x(ptrval) *sh 0x(ptrval) 1 0 0 0 S 0x(ptrval) init 0x(ptrval) 3 2 0 0 D 0x(ptrval) rcu_gp 0x(ptrval) 4 2 0 0 D 0x(ptrval) rcu_par_gp 0x(ptrval) 5 2 0 0 D 0x(ptrval) kworker/0:0 0x(ptrval) 6 2 0 0 D 0x(ptrval) kworker/0:0H 0x(ptrval) 7 2 0 0 D 0x(ptrval) kworker/u2:0 0x(ptrval) 8 2 0 0 D 0x(ptrval) mm_percpu_wq 0x(ptrval) 10 2 0 0 D 0x(ptrval) rcu_preempt The whole purpose of kdb is to debug, and for debugging real addresses need to be known. In addition, data displayed by kdb doesn't go into dmesg. This patch replaces all %p by %px in kdb in order to display real addresses. Fixes: ad67b74d2469 ("printk: hash addresses printed with %p") Cc: Signed-off-by: Christophe Leroy Signed-off-by: Daniel Thompson Signed-off-by: Greg Kroah-Hartman --- kernel/debug/kdb/kdb_main.c | 14 +++++++------- kernel/debug/kdb/kdb_support.c | 12 ++++++------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 07aefa8dbee85..993db6b2348e7 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1182,7 +1182,7 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, if (reason == KDB_REASON_DEBUG) { /* special case below */ } else { - kdb_printf("\nEntering kdb (current=0x%p, pid %d) ", + kdb_printf("\nEntering kdb (current=0x%px, pid %d) ", kdb_current, kdb_current ? kdb_current->pid : 0); #if defined(CONFIG_SMP) kdb_printf("on processor %d ", raw_smp_processor_id()); @@ -1198,7 +1198,7 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, */ switch (db_result) { case KDB_DB_BPT: - kdb_printf("\nEntering kdb (0x%p, pid %d) ", + kdb_printf("\nEntering kdb (0x%px, pid %d) ", kdb_current, kdb_current->pid); #if defined(CONFIG_SMP) kdb_printf("on processor %d ", raw_smp_processor_id()); @@ -2037,7 +2037,7 @@ static int kdb_lsmod(int argc, const char **argv) if (mod->state == MODULE_STATE_UNFORMED) continue; - kdb_printf("%-20s%8u 0x%p ", mod->name, + kdb_printf("%-20s%8u 0x%px ", mod->name, mod->core_layout.size, (void *)mod); #ifdef CONFIG_MODULE_UNLOAD kdb_printf("%4d ", module_refcount(mod)); @@ -2048,7 +2048,7 @@ static int kdb_lsmod(int argc, const char **argv) kdb_printf(" (Loading)"); else kdb_printf(" (Live)"); - kdb_printf(" 0x%p", mod->core_layout.base); + kdb_printf(" 0x%px", mod->core_layout.base); #ifdef CONFIG_MODULE_UNLOAD { @@ -2330,7 +2330,7 @@ void kdb_ps1(const struct task_struct *p) return; cpu = kdb_process_cpu(p); - kdb_printf("0x%p %8d %8d %d %4d %c 0x%p %c%s\n", + kdb_printf("0x%px %8d %8d %d %4d %c 0x%px %c%s\n", (void *)p, p->pid, p->parent->pid, kdb_task_has_cpu(p), kdb_process_cpu(p), kdb_task_state_char(p), @@ -2343,7 +2343,7 @@ void kdb_ps1(const struct task_struct *p) } else { if (KDB_TSK(cpu) != p) kdb_printf(" Error: does not match running " - "process table (0x%p)\n", KDB_TSK(cpu)); + "process table (0x%px)\n", KDB_TSK(cpu)); } } } @@ -2722,7 +2722,7 @@ int kdb_register_flags(char *cmd, for_each_kdbcmd(kp, i) { if (kp->cmd_name && (strcmp(kp->cmd_name, cmd) == 0)) { kdb_printf("Duplicate kdb command registered: " - "%s, func %p help %s\n", cmd, func, help); + "%s, func %px help %s\n", cmd, func, help); return 1; } } diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index d35cc2d3a4cc0..84422d2b95c05 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -40,7 +40,7 @@ int kdbgetsymval(const char *symname, kdb_symtab_t *symtab) { if (KDB_DEBUG(AR)) - kdb_printf("kdbgetsymval: symname=%s, symtab=%p\n", symname, + kdb_printf("kdbgetsymval: symname=%s, symtab=%px\n", symname, symtab); memset(symtab, 0, sizeof(*symtab)); symtab->sym_start = kallsyms_lookup_name(symname); @@ -88,7 +88,7 @@ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab) char *knt1 = NULL; if (KDB_DEBUG(AR)) - kdb_printf("kdbnearsym: addr=0x%lx, symtab=%p\n", addr, symtab); + kdb_printf("kdbnearsym: addr=0x%lx, symtab=%px\n", addr, symtab); memset(symtab, 0, sizeof(*symtab)); if (addr < 4096) @@ -149,7 +149,7 @@ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab) symtab->mod_name = "kernel"; if (KDB_DEBUG(AR)) kdb_printf("kdbnearsym: returns %d symtab->sym_start=0x%lx, " - "symtab->mod_name=%p, symtab->sym_name=%p (%s)\n", ret, + "symtab->mod_name=%px, symtab->sym_name=%px (%s)\n", ret, symtab->sym_start, symtab->mod_name, symtab->sym_name, symtab->sym_name); @@ -887,13 +887,13 @@ void debug_kusage(void) __func__, dah_first); if (dah_first) { h_used = (struct debug_alloc_header *)debug_alloc_pool; - kdb_printf("%s: h_used %p size %d\n", __func__, h_used, + kdb_printf("%s: h_used %px size %d\n", __func__, h_used, h_used->size); } do { h_used = (struct debug_alloc_header *) ((char *)h_free + dah_overhead + h_free->size); - kdb_printf("%s: h_used %p size %d caller %p\n", + kdb_printf("%s: h_used %px size %d caller %px\n", __func__, h_used, h_used->size, h_used->caller); h_free = (struct debug_alloc_header *) (debug_alloc_pool + h_free->next); @@ -902,7 +902,7 @@ void debug_kusage(void) ((char *)h_free + dah_overhead + h_free->size); if ((char *)h_used - debug_alloc_pool != sizeof(debug_alloc_pool_aligned)) - kdb_printf("%s: h_used %p size %d caller %p\n", + kdb_printf("%s: h_used %px size %d caller %px\n", __func__, h_used, h_used->size, h_used->caller); out: spin_unlock(&dap_lock); -- GitLab From a56609f725c3cfff71e0878036263477d59755a4 Mon Sep 17 00:00:00 2001 From: Frank Sorenson Date: Tue, 30 Oct 2018 15:10:40 -0500 Subject: [PATCH 1575/2269] sunrpc: correct the computation for page_ptr when truncating commit 5d7a5bcb67c70cbc904057ef52d3fcfeb24420bb upstream. When truncating the encode buffer, the page_ptr is getting advanced, causing the next page to be skipped while encoding. The page is still included in the response, so the response contains a page of bogus data. We need to adjust the page_ptr backwards to ensure we encode the next page into the correct place. We saw this triggered when concurrent directory modifications caused nfsd4_encode_direct_fattr() to return nfserr_noent, and the resulting call to xdr_truncate_encode() corrupted the READDIR reply. Signed-off-by: Frank Sorenson Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xdr.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index e34f4ee7f2b6c..13695ba8fc540 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -639,11 +639,10 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len) WARN_ON_ONCE(xdr->iov); return; } - if (fraglen) { + if (fraglen) xdr->end = head->iov_base + head->iov_len; - xdr->page_ptr--; - } /* (otherwise assume xdr->end is already set) */ + xdr->page_ptr--; head->iov_len = len; buf->len = len; xdr->p = head->iov_base + head->iov_len; -- GitLab From 6d1c38aa47d432f425c5829261eaa8e624274a4f Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Thu, 8 Nov 2018 11:11:36 -0500 Subject: [PATCH 1576/2269] nfsd: COPY and CLONE operations require the saved filehandle to be set commit 01310bb7c9c98752cc763b36532fab028e0f8f81 upstream. Make sure we have a saved filehandle, otherwise we'll oops with a null pointer dereference in nfs4_preprocess_stateid_op(). Signed-off-by: Scott Mayhew Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4proc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 6d16399a350e0..ee765abad2efb 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1049,6 +1049,9 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { __be32 status; + if (!cstate->save_fh.fh_dentry) + return nfserr_nofilehandle; + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh, src_stateid, RD_STATE, src, NULL); if (status) { -- GitLab From 677f423c995d27bfafd34a7b86773830db6ac09f Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 5 Nov 2018 03:48:25 +0000 Subject: [PATCH 1577/2269] rtc: hctosys: Add missing range error reporting commit 7ce9a992ffde8ce93d5ae5767362a5c7389ae895 upstream. Fix an issue with the 32-bit range error path in `rtc_hctosys' where no error code is set and consequently the successful preceding call result from `rtc_read_time' is propagated to `rtc_hctosys_ret'. This in turn makes any subsequent call to `hctosys_show' incorrectly report in sysfs that the system time has been set from this RTC while it has not. Set the error to ERANGE then if we can't express the result due to an overflow. Signed-off-by: Maciej W. Rozycki Fixes: b3a5ac42ab18 ("rtc: hctosys: Ensure system time doesn't overflow time_t") Cc: stable@vger.kernel.org # 4.17+ Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/hctosys.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/hctosys.c b/drivers/rtc/hctosys.c index e79f2a181ad24..b9ec4a16db1f6 100644 --- a/drivers/rtc/hctosys.c +++ b/drivers/rtc/hctosys.c @@ -50,8 +50,10 @@ static int __init rtc_hctosys(void) tv64.tv_sec = rtc_tm_to_time64(&tm); #if BITS_PER_LONG == 32 - if (tv64.tv_sec > INT_MAX) + if (tv64.tv_sec > INT_MAX) { + err = -ERANGE; goto err_read; + } #endif err = do_settimeofday64(&tv64); -- GitLab From a42d933dc28180d92c88131e4cc77fe1b747b5e3 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Fri, 9 Nov 2018 14:51:46 +0100 Subject: [PATCH 1578/2269] fuse: fix use-after-free in fuse_direct_IO() commit ebacb81273599555a7a19f7754a1451206a5fc4f upstream. In async IO blocking case the additional reference to the io is taken for it to survive fuse_aio_complete(). In non blocking case this additional reference is not needed, however we still reference io to figure out whether to wait for completion or not. This is wrong and will lead to use-after-free. Fix it by storing blocking information in separate variable. This was spotted by KASAN when running generic/208 fstest. Signed-off-by: Lukas Czerner Reported-by: Zorro Lang Signed-off-by: Miklos Szeredi Fixes: 744742d692e3 ("fuse: Add reference counting for fuse_io_priv") Cc: # v4.6 Signed-off-by: Greg Kroah-Hartman --- fs/fuse/file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index fb4738ef162f3..47d7a510be5be 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2912,10 +2912,12 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) } if (io->async) { + bool blocking = io->blocking; + fuse_aio_complete(io, ret < 0 ? ret : 0, -1); /* we have a non-extending, async request, so return */ - if (!io->blocking) + if (!blocking) return -EIOCBQUEUED; wait_for_completion(&wait); -- GitLab From 6ceec07cc84a26152e5678e121c824f3d1097de6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 9 Nov 2018 15:52:16 +0100 Subject: [PATCH 1579/2269] fuse: fix leaked notify reply commit 7fabaf303458fcabb694999d6fa772cc13d4e217 upstream. fuse_request_send_notify_reply() may fail if the connection was reset for some reason (e.g. fs was unmounted). Don't leak request reference in this case. Besides leaking memory, this resulted in fc->num_waiting not being decremented and hence fuse_wait_aborted() left in a hanging and unkillable state. Fixes: 2d45ba381a74 ("fuse: add retrieve request") Fixes: b8f95e5d13f5 ("fuse: umount should wait for all requests") Reported-and-tested-by: syzbot+6339eda9cb4ebbc4c37b@syzkaller.appspotmail.com Signed-off-by: Miklos Szeredi Cc: #v2.6.36 Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index a8b114ae931f9..f7280c44cd4bc 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1721,8 +1721,10 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, req->in.args[1].size = total_len; err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique); - if (err) + if (err) { fuse_retrieve_end(fc, req); + fuse_put_request(fc, req); + } return err; } -- GitLab From 56d10194be8102077430b683b2bb156d7ffdd84e Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 1 Jul 2018 13:56:54 -0700 Subject: [PATCH 1580/2269] configfs: replace strncpy with memcpy commit 1823342a1f2b47a4e6f5667f67cd28ab6bc4d6cd upstream. gcc 8.1.0 complains: fs/configfs/symlink.c:67:3: warning: 'strncpy' output truncated before terminating nul copying as many bytes from a string as its length fs/configfs/symlink.c: In function 'configfs_get_link': fs/configfs/symlink.c:63:13: note: length computed here Using strncpy() is indeed less than perfect since the length of data to be copied has already been determined with strlen(). Replace strncpy() with memcpy() to address the warning and optimize the code a little. Signed-off-by: Guenter Roeck Signed-off-by: Christoph Hellwig Signed-off-by: Nobuhiro Iwamatsu Signed-off-by: Greg Kroah-Hartman --- fs/configfs/symlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index c8aabba502f6d..9993cdb81e7d7 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -64,7 +64,7 @@ static void fill_item_path(struct config_item * item, char * buffer, int length) /* back up enough to print this bus id with '/' */ length -= cur; - strncpy(buffer + length,config_item_name(p),cur); + memcpy(buffer + length, config_item_name(p), cur); *(buffer + --length) = '/'; } } -- GitLab From 6c75d139608a603642cd731ce75e4222754fff0e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 5 Nov 2018 22:57:24 +0000 Subject: [PATCH 1581/2269] gfs2: Put bitmap buffers in put_super commit 10283ea525d30f2e99828978fd04d8427876a7ad upstream. gfs2_put_super calls gfs2_clear_rgrpd to destroy the gfs2_rgrpd objects attached to the resource group glocks. That function should release the buffers attached to the gfs2_bitmap objects (bi_bh), but the call to gfs2_rgrp_brelse for doing that is missing. When gfs2_releasepage later runs across these buffers which are still referenced, it refuses to free them. This causes the pages the buffers are attached to to remain referenced as well. With enough mount/unmount cycles, the system will eventually run out of memory. Fix this by adding the missing call to gfs2_rgrp_brelse in gfs2_clear_rgrpd. (Also fix a gfs2_rgrp_relse -> gfs2_rgrp_brelse typo in a comment.) Fixes: 39b0f1e92908 ("GFS2: Don't brelse rgrp buffer_heads every allocation") Cc: stable@vger.kernel.org # v4.2+ Signed-off-by: Andreas Gruenbacher Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/rgrp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 5fe033131f036..b0eee90738ff4 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -706,6 +706,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) if (gl) { glock_clear_object(gl, rgd); + gfs2_rgrp_brelse(rgd); gfs2_glock_put(gl); } @@ -1115,7 +1116,7 @@ static u32 count_unlinked(struct gfs2_rgrpd *rgd) * @rgd: the struct gfs2_rgrpd describing the RG to read in * * Read in all of a Resource Group's header and bitmap blocks. - * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps. + * Caller must eventually call gfs2_rgrp_brelse() to free the bitmaps. * * Returns: errno */ -- GitLab From fdc427442b374e84077d4214733764efb1a38a0e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 3 Nov 2018 14:56:00 -0700 Subject: [PATCH 1582/2269] crypto: user - fix leaking uninitialized memory to userspace commit f43f39958beb206b53292801e216d9b8a660f087 upstream. All bytes of the NETLINK_CRYPTO report structures must be initialized, since they are copied to userspace. The change from strncpy() to strlcpy() broke this. As a minimal fix, change it back. Fixes: 4473710df1f8 ("crypto: user - Prepare for CRYPTO_MAX_ALG_NAME expansion") Cc: # v4.12+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/crypto_user.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index 0dbe2be7f7831..b5758768920b5 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -83,7 +83,7 @@ static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_cipher rcipher; - strlcpy(rcipher.type, "cipher", sizeof(rcipher.type)); + strncpy(rcipher.type, "cipher", sizeof(rcipher.type)); rcipher.blocksize = alg->cra_blocksize; rcipher.min_keysize = alg->cra_cipher.cia_min_keysize; @@ -102,7 +102,7 @@ static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_comp rcomp; - strlcpy(rcomp.type, "compression", sizeof(rcomp.type)); + strncpy(rcomp.type, "compression", sizeof(rcomp.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS, sizeof(struct crypto_report_comp), &rcomp)) goto nla_put_failure; @@ -116,7 +116,7 @@ static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_acomp racomp; - strlcpy(racomp.type, "acomp", sizeof(racomp.type)); + strncpy(racomp.type, "acomp", sizeof(racomp.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_ACOMP, sizeof(struct crypto_report_acomp), &racomp)) @@ -131,7 +131,7 @@ static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_akcipher rakcipher; - strlcpy(rakcipher.type, "akcipher", sizeof(rakcipher.type)); + strncpy(rakcipher.type, "akcipher", sizeof(rakcipher.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_AKCIPHER, sizeof(struct crypto_report_akcipher), &rakcipher)) @@ -146,7 +146,7 @@ static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_kpp rkpp; - strlcpy(rkpp.type, "kpp", sizeof(rkpp.type)); + strncpy(rkpp.type, "kpp", sizeof(rkpp.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_KPP, sizeof(struct crypto_report_kpp), &rkpp)) @@ -160,10 +160,10 @@ nla_put_failure: static int crypto_report_one(struct crypto_alg *alg, struct crypto_user_alg *ualg, struct sk_buff *skb) { - strlcpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name)); - strlcpy(ualg->cru_driver_name, alg->cra_driver_name, + strncpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name)); + strncpy(ualg->cru_driver_name, alg->cra_driver_name, sizeof(ualg->cru_driver_name)); - strlcpy(ualg->cru_module_name, module_name(alg->cra_module), + strncpy(ualg->cru_module_name, module_name(alg->cra_module), sizeof(ualg->cru_module_name)); ualg->cru_type = 0; @@ -176,7 +176,7 @@ static int crypto_report_one(struct crypto_alg *alg, if (alg->cra_flags & CRYPTO_ALG_LARVAL) { struct crypto_report_larval rl; - strlcpy(rl.type, "larval", sizeof(rl.type)); + strncpy(rl.type, "larval", sizeof(rl.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_LARVAL, sizeof(struct crypto_report_larval), &rl)) goto nla_put_failure; -- GitLab From efcbe502ddcc9f28d88a7c35b90f121a4e176823 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 16 Nov 2018 15:08:35 -0800 Subject: [PATCH 1583/2269] lib/ubsan.c: don't mark __ubsan_handle_builtin_unreachable as noreturn commit 1c23b4108d716cc848b38532063a8aca4f86add8 upstream. gcc-8 complains about the prototype for this function: lib/ubsan.c:432:1: error: ignoring attribute 'noreturn' in declaration of a built-in function '__ubsan_handle_builtin_unreachable' because it conflicts with attribute 'const' [-Werror=attributes] This is actually a GCC's bug. In GCC internals __ubsan_handle_builtin_unreachable() declared with both 'noreturn' and 'const' attributes instead of only 'noreturn': https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84210 Workaround this by removing the noreturn attribute. [aryabinin: add information about GCC bug in changelog] Link: http://lkml.kernel.org/r/20181107144516.4587-1-aryabinin@virtuozzo.com Signed-off-by: Arnd Bergmann Signed-off-by: Andrey Ryabinin Acked-by: Olof Johansson Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/ubsan.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/ubsan.c b/lib/ubsan.c index 50d1d5c25deb8..60e108c5c1734 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -451,8 +451,7 @@ void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data, EXPORT_SYMBOL(__ubsan_handle_shift_out_of_bounds); -void __noreturn -__ubsan_handle_builtin_unreachable(struct unreachable_data *data) +void __ubsan_handle_builtin_unreachable(struct unreachable_data *data) { unsigned long flags; -- GitLab From 52fcb8dd8cc224740259800eee9d2031c8ffa830 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Fri, 16 Nov 2018 15:08:04 -0800 Subject: [PATCH 1584/2269] hugetlbfs: fix kernel BUG at fs/hugetlbfs/inode.c:444! commit 5e41540c8a0f0e98c337dda8b391e5dda0cde7cf upstream. This bug has been experienced several times by the Oracle DB team. The BUG is in remove_inode_hugepages() as follows: /* * If page is mapped, it was faulted in after being * unmapped in caller. Unmap (again) now after taking * the fault mutex. The mutex will prevent faults * until we finish removing the page. * * This race can only happen in the hole punch case. * Getting here in a truncate operation is a bug. */ if (unlikely(page_mapped(page))) { BUG_ON(truncate_op); In this case, the elevated map count is not the result of a race. Rather it was incorrectly incremented as the result of a bug in the huge pmd sharing code. Consider the following: - Process A maps a hugetlbfs file of sufficient size and alignment (PUD_SIZE) that a pmd page could be shared. - Process B maps the same hugetlbfs file with the same size and alignment such that a pmd page is shared. - Process B then calls mprotect() to change protections for the mapping with the shared pmd. As a result, the pmd is 'unshared'. - Process B then calls mprotect() again to chage protections for the mapping back to their original value. pmd remains unshared. - Process B then forks and process C is created. During the fork process, we do dup_mm -> dup_mmap -> copy_page_range to copy page tables. Copying page tables for hugetlb mappings is done in the routine copy_hugetlb_page_range. In copy_hugetlb_page_range(), the destination pte is obtained by: dst_pte = huge_pte_alloc(dst, addr, sz); If pmd sharing is possible, the returned pointer will be to a pte in an existing page table. In the situation above, process C could share with either process A or process B. Since process A is first in the list, the returned pte is a pointer to a pte in process A's page table. However, the check for pmd sharing in copy_hugetlb_page_range is: /* If the pagetables are shared don't copy or take references */ if (dst_pte == src_pte) continue; Since process C is sharing with process A instead of process B, the above test fails. The code in copy_hugetlb_page_range which follows assumes dst_pte points to a huge_pte_none pte. It copies the pte entry from src_pte to dst_pte and increments this map count of the associated page. This is how we end up with an elevated map count. To solve, check the dst_pte entry for huge_pte_none. If !none, this implies PMD sharing so do not copy. Link: http://lkml.kernel.org/r/20181105212315.14125-1-mike.kravetz@oracle.com Fixes: c5c99429fa57 ("fix hugepages leak due to pagetable page sharing") Signed-off-by: Mike Kravetz Reviewed-by: Naoya Horiguchi Cc: Michal Hocko Cc: Hugh Dickins Cc: Andrea Arcangeli Cc: "Kirill A . Shutemov" Cc: Davidlohr Bueso Cc: Prakash Sangappa Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e073099083ca3..f46040aed2da1 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3211,7 +3211,7 @@ static int is_hugetlb_entry_hwpoisoned(pte_t pte) int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma) { - pte_t *src_pte, *dst_pte, entry; + pte_t *src_pte, *dst_pte, entry, dst_entry; struct page *ptepage; unsigned long addr; int cow; @@ -3239,15 +3239,30 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, break; } - /* If the pagetables are shared don't copy or take references */ - if (dst_pte == src_pte) + /* + * If the pagetables are shared don't copy or take references. + * dst_pte == src_pte is the common case of src/dest sharing. + * + * However, src could have 'unshared' and dst shares with + * another vma. If dst_pte !none, this implies sharing. + * Check here before taking page table lock, and once again + * after taking the lock below. + */ + dst_entry = huge_ptep_get(dst_pte); + if ((dst_pte == src_pte) || !huge_pte_none(dst_entry)) continue; dst_ptl = huge_pte_lock(h, dst, dst_pte); src_ptl = huge_pte_lockptr(h, src, src_pte); spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); entry = huge_ptep_get(src_pte); - if (huge_pte_none(entry)) { /* skip none entry */ + dst_entry = huge_ptep_get(dst_pte); + if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) { + /* + * Skip if src entry none. Also, skip in the + * unlikely case dst entry !none as this implies + * sharing with another vma. + */ ; } else if (unlikely(is_hugetlb_entry_migration(entry) || is_hugetlb_entry_hwpoisoned(entry))) { -- GitLab From d85699259bcda421e28f22b72ef2bd9e9907d66b Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 16 Nov 2018 15:08:11 -0800 Subject: [PATCH 1585/2269] mm/swapfile.c: use kvzalloc for swap_info_struct allocation commit 873d7bcfd066663e3e50113dc4a0de19289b6354 upstream. Commit a2468cc9bfdf ("swap: choose swap device according to numa node") changed 'avail_lists' field of 'struct swap_info_struct' to an array. In popular linux distros it increased size of swap_info_struct up to 40 Kbytes and now swap_info_struct allocation requires order-4 page. Switch to kvzmalloc allows to avoid unexpected allocation failures. Link: http://lkml.kernel.org/r/fc23172d-3c75-21e2-d551-8b1808cbe593@virtuozzo.com Fixes: a2468cc9bfdf ("swap: choose swap device according to numa node") Signed-off-by: Vasily Averin Acked-by: Aaron Lu Acked-by: Michal Hocko Reviewed-by: Andrew Morton Cc: Huang Ying Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/swapfile.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 8cbc7d6fd52e2..08e8cd21770c8 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2830,7 +2830,7 @@ static struct swap_info_struct *alloc_swap_info(void) unsigned int type; int i; - p = kzalloc(sizeof(*p), GFP_KERNEL); + p = kvzalloc(sizeof(*p), GFP_KERNEL); if (!p) return ERR_PTR(-ENOMEM); @@ -2841,7 +2841,7 @@ static struct swap_info_struct *alloc_swap_info(void) } if (type >= MAX_SWAPFILES) { spin_unlock(&swap_lock); - kfree(p); + kvfree(p); return ERR_PTR(-EPERM); } if (type >= nr_swapfiles) { @@ -2855,7 +2855,7 @@ static struct swap_info_struct *alloc_swap_info(void) smp_wmb(); nr_swapfiles++; } else { - kfree(p); + kvfree(p); p = swap_info[type]; /* * Do not memset this entry: a racing procfs swap_next() -- GitLab From 011d0d8b5bba980c56ecbf8ad7482b721cf52f6f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 14 Nov 2018 09:55:42 -0800 Subject: [PATCH 1586/2269] efi/arm/libstub: Pack FDT after populating it commit 72a58a63a164b4e9d2d914e65caeb551846883f1 upstream. Commit: 24d7c494ce46 ("efi/arm-stub: Round up FDT allocation to mapping size") increased the allocation size for the FDT image created by the stub to a fixed value of 2 MB, to simplify the former code that made several attempts with increasing values for the size. This is reasonable given that the allocation is of type EFI_LOADER_DATA, which is released to the kernel unless it is explicitly memblock_reserve()d by the early boot code. However, this allocation size leaked into the 'size' field of the FDT header metadata, and so the entire allocation remains occupied by the device tree binary, even if most of it is not used to store device tree information. So call fdt_pack() to shrink the FDT data structure to its minimum size after populating all the fields, so that the remaining memory is no longer wasted. Signed-off-by: Ard Biesheuvel Cc: # v4.12+ Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: 24d7c494ce46 ("efi/arm-stub: Round up FDT allocation to mapping size") Link: http://lkml.kernel.org/r/20181114175544.12860-4-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/fdt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index 8830fa601e45d..0c0d2312f4a8a 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -158,6 +158,10 @@ static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, return efi_status; } } + + /* shrink the FDT back to its minimum size */ + fdt_pack(fdt); + return EFI_SUCCESS; fdt_set_fail: -- GitLab From 7fb7a5ab36922bec92072199cb5085ff04882ed2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 5 Aug 2018 13:48:07 +0100 Subject: [PATCH 1587/2269] drm/rockchip: Allow driver to be shutdown on reboot/kexec commit 7f3ef5dedb146e3d5063b6845781ad1bb59b92b5 upstream. Leaving the DRM driver enabled on reboot or kexec has the annoying effect of leaving the display generating transactions whilst the IOMMU has been shut down. In turn, the IOMMU driver (which shares its interrupt line with the VOP) starts warning either on shutdown or when entering the secondary kernel in the kexec case (nothing is expected on that front). A cheap way of ensuring that things are nicely shut down is to register a shutdown callback in the platform driver. Signed-off-by: Marc Zyngier Tested-by: Vicente Bergas Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20180805124807.18169-1-marc.zyngier@arm.com Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/rockchip/rockchip_drm_drv.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c index ff3d0f5efbb14..4cacb03f67332 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@ -425,6 +425,11 @@ static int rockchip_drm_platform_remove(struct platform_device *pdev) return 0; } +static void rockchip_drm_platform_shutdown(struct platform_device *pdev) +{ + rockchip_drm_platform_remove(pdev); +} + static const struct of_device_id rockchip_drm_dt_ids[] = { { .compatible = "rockchip,display-subsystem", }, { /* sentinel */ }, @@ -434,6 +439,7 @@ MODULE_DEVICE_TABLE(of, rockchip_drm_dt_ids); static struct platform_driver rockchip_drm_platform_driver = { .probe = rockchip_drm_platform_probe, .remove = rockchip_drm_platform_remove, + .shutdown = rockchip_drm_platform_shutdown, .driver = { .name = "rockchip-drm", .of_match_table = rockchip_drm_dt_ids, -- GitLab From a02eadf0639bb5e007e7e270385b97e9c0a4a944 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 28 Aug 2018 14:16:23 -0500 Subject: [PATCH 1588/2269] drm/amdgpu: add missing CHIP_HAINAN in amdgpu_ucode_get_load_type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d9997b64c52b70bd98c48f443f068253621d1ffc upstream. This caused a confusing error message, but there is functionally no problem since the default method is DIRECT. Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 36c763310df5f..684769c9a48e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -247,6 +247,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) case CHIP_PITCAIRN: case CHIP_VERDE: case CHIP_OLAND: + case CHIP_HAINAN: return AMDGPU_FW_LOAD_DIRECT; #endif #ifdef CONFIG_DRM_AMDGPU_CIK -- GitLab From 438f22af0688cb512f800d61f98cb8bd9b54e323 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 6 Sep 2018 17:43:21 -0400 Subject: [PATCH 1589/2269] drm/nouveau: Check backlight IDs are >= 0, not > 0 commit dc854914999d5d52ac1b31740cb0ea8d89d0372e upstream. Remember, ida IDs start at 0, not 1! Signed-off-by: Lyude Paul Reviewed-by: Karol Herbst Cc: stable@vger.kernel.org Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_backlight.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c index 408b955e5c39a..6dd72bc32897a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_backlight.c +++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c @@ -116,7 +116,7 @@ nv40_backlight_init(struct drm_connector *connector) &nv40_bl_ops, &props); if (IS_ERR(bd)) { - if (bl_connector.id > 0) + if (bl_connector.id >= 0) ida_simple_remove(&bl_ida, bl_connector.id); return PTR_ERR(bd); } @@ -249,7 +249,7 @@ nv50_backlight_init(struct drm_connector *connector) nv_encoder, ops, &props); if (IS_ERR(bd)) { - if (bl_connector.id > 0) + if (bl_connector.id >= 0) ida_simple_remove(&bl_ida, bl_connector.id); return PTR_ERR(bd); } -- GitLab From b1fd8f59962239c7317933a0522dac8ef8c121a1 Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Fri, 9 Nov 2018 11:00:12 +0200 Subject: [PATCH 1590/2269] drm/dp_mst: Check if primary mstb is null commit 23d8003907d094f77cf959228e2248d6db819fa7 upstream. Unfortunately drm_dp_get_mst_branch_device which is called from both drm_dp_mst_handle_down_rep and drm_dp_mst_handle_up_rep seem to rely on that mgr->mst_primary is not NULL, which seem to be wrong as it can be cleared with simultaneous mode set, if probing fails or in other case. mgr->lock mutex doesn't protect against that as it might just get assigned to NULL right before, not simultaneously. There are currently bugs 107738, 108616 bugs which crash in drm_dp_get_mst_branch_device, caused by this issue. v2: Refactored the code, as it was nicely noticed. Fixed Bugzilla bug numbers(second was 108616, but not 108816) and added links. [changed title and added stable cc] Signed-off-by: Lyude Paul Signed-off-by: Stanislav Lisovskiy Cc: stable@vger.kernel.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108616 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107738 Link: https://patchwork.freedesktop.org/patch/msgid/20181109090012.24438-1-stanislav.lisovskiy@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index c022ab6e84bdd..2a4cf68373246 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1232,6 +1232,9 @@ static struct drm_dp_mst_branch *drm_dp_get_mst_branch_device(struct drm_dp_mst_ mutex_lock(&mgr->lock); mstb = mgr->mst_primary; + if (!mstb) + goto out; + for (i = 0; i < lct - 1; i++) { int shift = (i % 2) ? 0 : 4; int port_num = (rad[i / 2] >> shift) & 0xf; -- GitLab From b24df93f18880cde76a69532a23d1cb1f568b611 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 3 Oct 2018 17:49:51 +0300 Subject: [PATCH 1591/2269] drm/i915: Restore vblank interrupts earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7cada4d0b7a0fb813dbc9777fec092e9ed0546e9 upstream. Plane sanitation needs vblank interrupts (on account of CxSR disable). So let's restore vblank interrupts earlier. v2: Make it actually build v3: Add comment to explain why we need this (Daniel) Cc: stable@vger.kernel.org Cc: Dennis Tested-by: Dennis Tested-by: Peter Nowee Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105637 Fixes: b1e01595a66d ("drm/i915: Redo plane sanitation during readout") Signed-off-by: Ville Syrjälä Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20181003144951.4397-1-ville.syrjala@linux.intel.com (cherry picked from commit 68bc30deac625b8be8d3950b30dc93d09a3645f5) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_display.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 2006ab44fbf9b..944cb3c2ba5c1 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14829,13 +14829,9 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, I915_READ(reg) & ~PIPECONF_FRAME_START_DELAY_MASK); } - /* restore vblank interrupts to correct state */ - drm_crtc_vblank_reset(&crtc->base); if (crtc->active) { struct intel_plane *plane; - drm_crtc_vblank_on(&crtc->base); - /* Disable everything but the primary plane */ for_each_intel_plane_on_crtc(dev, crtc, plane) { const struct intel_plane_state *plane_state = @@ -15148,7 +15144,6 @@ intel_modeset_setup_hw_state(struct drm_device *dev, struct drm_modeset_acquire_ctx *ctx) { struct drm_i915_private *dev_priv = to_i915(dev); - enum pipe pipe; struct intel_crtc *crtc; struct intel_encoder *encoder; int i; @@ -15167,15 +15162,23 @@ intel_modeset_setup_hw_state(struct drm_device *dev, /* HW state is read out, now we need to sanitize this mess. */ get_encoder_power_domains(dev_priv); - intel_sanitize_plane_mapping(dev_priv); + /* + * intel_sanitize_plane_mapping() may need to do vblank + * waits, so we need vblank interrupts restored beforehand. + */ + for_each_intel_crtc(&dev_priv->drm, crtc) { + drm_crtc_vblank_reset(&crtc->base); - for_each_intel_encoder(dev, encoder) { - intel_sanitize_encoder(encoder); + if (crtc->active) + drm_crtc_vblank_on(&crtc->base); } - for_each_pipe(dev_priv, pipe) { - crtc = intel_get_crtc_for_pipe(dev_priv, pipe); + intel_sanitize_plane_mapping(dev_priv); + for_each_intel_encoder(dev, encoder) + intel_sanitize_encoder(encoder); + + for_each_intel_crtc(&dev_priv->drm, crtc) { intel_sanitize_crtc(crtc, ctx); intel_dump_pipe_config(crtc, crtc->config, "[setup_hw_state]"); -- GitLab From 9ab995dba88232317946124055eb89c915bd5f33 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 8 Oct 2018 19:24:32 -0400 Subject: [PATCH 1592/2269] drm/i915: Don't unset intel_connector->mst_port commit 80c188695a77eddaa6e8885510ff4ef59fd478c3 upstream. Currently we set intel_connector->mst_port to NULL to signify that the MST port has been removed from the system so that we can prevent further action on the port such as connector probes, mode probing, etc. However, we're going to need access to intel_connector->mst_port in order to fixup ->best_encoder() so that it can always return the correct encoder for an MST port to prevent legacy DPMS prop changes from failing. This should be safe, so instead keep intel_connector->mst_port always set and instead just check the status of drm_connector->regustered to signify whether or not the connector has disappeared from the system. Changes since v2: - Add a comment to mst_port_gone (Jani Nikula) - Change mst_port_gone to a u8 instead of a bool, per the kernel bot. Apparently bool is discouraged in structs these days Changes since v4: - Don't use mst_port_gone at all! Just check if the connector is registered or not - Daniel Vetter Signed-off-by: Lyude Paul Reviewed-by: Daniel Vetter Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20181008232437.5571-4-lyude@redhat.com (cherry picked from commit 6ed5bb1fbad34382c8cfe9a9bf737e9a43053df5) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_dp_mst.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 93fc8ab9bb318..8d73edc033fe6 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -311,9 +311,8 @@ static int intel_dp_mst_get_ddc_modes(struct drm_connector *connector) struct edid *edid; int ret; - if (!intel_dp) { + if (!READ_ONCE(connector->registered)) return intel_connector_update_modes(connector, NULL); - } edid = drm_dp_mst_get_edid(connector, &intel_dp->mst_mgr, intel_connector->port); ret = intel_connector_update_modes(connector, edid); @@ -328,9 +327,10 @@ intel_dp_mst_detect(struct drm_connector *connector, bool force) struct intel_connector *intel_connector = to_intel_connector(connector); struct intel_dp *intel_dp = intel_connector->mst_port; - if (!intel_dp) + if (!READ_ONCE(connector->registered)) return connector_status_disconnected; - return drm_dp_mst_detect_port(connector, &intel_dp->mst_mgr, intel_connector->port); + return drm_dp_mst_detect_port(connector, &intel_dp->mst_mgr, + intel_connector->port); } static void @@ -370,7 +370,7 @@ intel_dp_mst_mode_valid(struct drm_connector *connector, int bpp = 24; /* MST uses fixed bpp */ int max_rate, mode_rate, max_lanes, max_link_clock; - if (!intel_dp) + if (!READ_ONCE(connector->registered)) return MODE_ERROR; max_link_clock = intel_dp_max_link_rate(intel_dp); @@ -399,7 +399,7 @@ static struct drm_encoder *intel_mst_atomic_best_encoder(struct drm_connector *c struct intel_dp *intel_dp = intel_connector->mst_port; struct intel_crtc *crtc = to_intel_crtc(state->crtc); - if (!intel_dp) + if (!READ_ONCE(connector->registered)) return NULL; return &intel_dp->mst_encoders[crtc->pipe]->base.base; } @@ -491,7 +491,6 @@ static void intel_dp_register_mst_connector(struct drm_connector *connector) static void intel_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, struct drm_connector *connector) { - struct intel_connector *intel_connector = to_intel_connector(connector); struct drm_i915_private *dev_priv = to_i915(connector->dev); drm_connector_unregister(connector); @@ -499,10 +498,6 @@ static void intel_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, if (dev_priv->fbdev) drm_fb_helper_remove_one_connector(&dev_priv->fbdev->helper, connector); - /* prevent race with the check in ->detect */ - drm_modeset_lock(&connector->dev->mode_config.connection_mutex, NULL); - intel_connector->mst_port = NULL; - drm_modeset_unlock(&connector->dev->mode_config.connection_mutex); drm_connector_unreference(connector); DRM_DEBUG_KMS("\n"); -- GitLab From 99e43c5415276df371aaab9765876cf4f71cdee4 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 8 Oct 2018 19:24:33 -0400 Subject: [PATCH 1593/2269] drm/i915: Skip vcpi allocation for MSTB ports that are gone commit c02ba4ef16eefe663fdefcccaa57fad32d5481bf upstream. Since we need to be able to allow DPMS on->off prop changes after an MST port has disappeared from the system, we need to be able to make sure we can compute a config for the resulting atomic commit. Currently this is impossible when the port has disappeared, since the VCPI slot searching we try to do in intel_dp_mst_compute_config() will fail with -EINVAL. Since the only commits we want to allow on no-longer-present MST ports are ones that shut off display hardware, we already know that no VCPI allocations are needed. So, hardcode the VCPI slot count to 0 when intel_dp_mst_compute_config() is called on an MST port that's gone. Changes since V4: - Don't use mst_port_gone at all, just check whether or not the drm connector is registered - Daniel Vetter Signed-off-by: Lyude Paul Reviewed-by: Daniel Vetter Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20181008232437.5571-5-lyude@redhat.com (cherry picked from commit f67207d78ceaf98b7531bc22df6f21328559c8d4) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_dp_mst.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 8d73edc033fe6..b83a8f2b2a3a7 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -37,11 +37,11 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; - struct intel_connector *connector = - to_intel_connector(conn_state->connector); + struct drm_connector *connector = conn_state->connector; + void *port = to_intel_connector(connector)->port; struct drm_atomic_state *state = pipe_config->base.state; int bpp; - int lane_count, slots; + int lane_count, slots = 0; const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; int mst_pbn; bool reduce_m_n = drm_dp_has_quirk(&intel_dp->desc, @@ -66,17 +66,23 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, pipe_config->port_clock = intel_dp_max_link_rate(intel_dp); - if (drm_dp_mst_port_has_audio(&intel_dp->mst_mgr, connector->port)) + if (drm_dp_mst_port_has_audio(&intel_dp->mst_mgr, port)) pipe_config->has_audio = true; mst_pbn = drm_dp_calc_pbn_mode(adjusted_mode->crtc_clock, bpp); pipe_config->pbn = mst_pbn; - slots = drm_dp_atomic_find_vcpi_slots(state, &intel_dp->mst_mgr, - connector->port, mst_pbn); - if (slots < 0) { - DRM_DEBUG_KMS("failed finding vcpi slots:%d\n", slots); - return false; + /* Zombie connectors can't have VCPI slots */ + if (READ_ONCE(connector->registered)) { + slots = drm_dp_atomic_find_vcpi_slots(state, + &intel_dp->mst_mgr, + port, + mst_pbn); + if (slots < 0) { + DRM_DEBUG_KMS("failed finding vcpi slots:%d\n", + slots); + return false; + } } intel_link_compute_m_n(bpp, lane_count, -- GitLab From 943f7d5be05c44999145c053b53ce330217d3992 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 12 Oct 2018 15:02:28 +0100 Subject: [PATCH 1594/2269] drm/i915: Large page offsets for pread/pwrite commit ab0d6a141843e0b4b2709dfd37b53468b5452c3a upstream. Handle integer overflow when computing the sub-page length for shmem backed pread/pwrite. Reported-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: stable@vger.kernel.org Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20181012140228.29783-1-chris@chris-wilson.co.uk (cherry picked from commit a5e856a5348f6cd50889d125c40bbeec7328e466) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f354cfe63f7be..1f19e6d9a7179 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -980,11 +980,7 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj, offset = offset_in_page(args->offset); for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { struct page *page = i915_gem_object_get_page(obj, idx); - int length; - - length = remain; - if (offset + length > PAGE_SIZE) - length = PAGE_SIZE - offset; + unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); ret = shmem_pread(page, offset, length, user_data, page_to_phys(page) & obj_do_bit17_swizzling, @@ -1406,11 +1402,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, offset = offset_in_page(args->offset); for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { struct page *page = i915_gem_object_get_page(obj, idx); - int length; - - length = remain; - if (offset + length > PAGE_SIZE) - length = PAGE_SIZE - offset; + unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); ret = shmem_pwrite(page, offset, length, user_data, page_to_phys(page) & obj_do_bit17_swizzling, -- GitLab From dfd13ef64df70fe49c7839b1ed3ee460759364de Mon Sep 17 00:00:00 2001 From: Clint Taylor Date: Thu, 25 Oct 2018 11:52:00 -0700 Subject: [PATCH 1595/2269] drm/i915/hdmi: Add HDMI 2.0 audio clock recovery N values commit 6503493145cba4413ecd3d4d153faeef4a1e9b85 upstream. HDMI 2.0 594Mhz modes were incorrectly selecting 25.200Mhz Automatic N value mode instead of HDMI specification values. V2: Fix 88.2 Hz N value Cc: Jani Nikula Cc: stable@vger.kernel.org Signed-off-by: Clint Taylor Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1540493521-1746-2-git-send-email-clinton.a.taylor@intel.com (cherry picked from commit 5a400aa3c562c4a726b4da286e63c96db905ade1) Signed-off-by: Joonas Lahtinen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_audio.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 9240fa79de7ca..bc9f735395bc2 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -134,6 +134,9 @@ static const struct { /* HDMI N/CTS table */ #define TMDS_297M 297000 #define TMDS_296M 296703 +#define TMDS_594M 594000 +#define TMDS_593M 593407 + static const struct { int sample_rate; int clock; @@ -154,6 +157,20 @@ static const struct { { 176400, TMDS_297M, 18816, 247500 }, { 192000, TMDS_296M, 23296, 281250 }, { 192000, TMDS_297M, 20480, 247500 }, + { 44100, TMDS_593M, 8918, 937500 }, + { 44100, TMDS_594M, 9408, 990000 }, + { 48000, TMDS_593M, 5824, 562500 }, + { 48000, TMDS_594M, 6144, 594000 }, + { 32000, TMDS_593M, 5824, 843750 }, + { 32000, TMDS_594M, 3072, 445500 }, + { 88200, TMDS_593M, 17836, 937500 }, + { 88200, TMDS_594M, 18816, 990000 }, + { 96000, TMDS_593M, 11648, 562500 }, + { 96000, TMDS_594M, 12288, 594000 }, + { 176400, TMDS_593M, 35672, 937500 }, + { 176400, TMDS_594M, 37632, 990000 }, + { 192000, TMDS_593M, 23296, 562500 }, + { 192000, TMDS_594M, 24576, 594000 }, }; /* get AUD_CONFIG_PIXEL_CLOCK_HDMI_* value for mode */ -- GitLab From 46c397cd043fce2a2248c9025211eb9c5a16f845 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 5 Nov 2018 21:46:04 +0200 Subject: [PATCH 1596/2269] drm/i915: Don't oops during modeset shutdown after lpe audio deinit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6a8915d0f8cf323e1beb792a33095cf652db4056 upstream. We deinit the lpe audio device before we call drm_atomic_helper_shutdown(), which means the platform device may already be gone when it comes time to shut down the crtc. As we don't know when the last reference to the platform device gets dropped by the audio driver we can't assume that the device and its data are still around when turning off the crtc. Mark the platform device as gone as soon as we do the audio deinit. Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181105194604.6994-1-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson (cherry picked from commit f45a7977d1140c11f334e01a9f77177ed68e3bfa) Signed-off-by: Joonas Lahtinen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_lpe_audio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index 8a541d0e3e80d..30280323e1d85 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -303,8 +303,10 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv) lpe_audio_platdev_destroy(dev_priv); irq_free_desc(dev_priv->lpe_audio.irq); -} + dev_priv->lpe_audio.irq = -1; + dev_priv->lpe_audio.platdev = NULL; +} /** * intel_lpe_audio_notify() - notify lpe audio event -- GitLab From a5d74ffb65754b4fc751289a287f21b26838df40 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 2 Nov 2018 16:12:09 +0000 Subject: [PATCH 1597/2269] drm/i915: Mark pin flags as u64 commit 0014868b9c3c1dda1de6711cf58c3486fb422d07 upstream. Since the flags are being used to operate on a u64 variable, they too need to be marked as such so that the inverses are full width (and not zero extended on 32b kernels and bdw+). Reported-by: Sergii Romantsov Signed-off-by: Chris Wilson Cc: stable@vger.kernel.org Reviewed-by: Lionel Landwerlin Reviewed-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20181102161232.17742-2-chris@chris-wilson.co.uk (cherry picked from commit 83b466b1dc5f0b4d33f0a901e8b00197a8f3582d) Signed-off-by: Joonas Lahtinen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_gtt.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index b4e3aa7c0ce19..0dbbe840f5f01 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -584,20 +584,20 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, u64 start, u64 end, unsigned int flags); /* Flags used by pin/bind&friends. */ -#define PIN_NONBLOCK BIT(0) -#define PIN_MAPPABLE BIT(1) -#define PIN_ZONE_4G BIT(2) -#define PIN_NONFAULT BIT(3) -#define PIN_NOEVICT BIT(4) - -#define PIN_MBZ BIT(5) /* I915_VMA_PIN_OVERFLOW */ -#define PIN_GLOBAL BIT(6) /* I915_VMA_GLOBAL_BIND */ -#define PIN_USER BIT(7) /* I915_VMA_LOCAL_BIND */ -#define PIN_UPDATE BIT(8) - -#define PIN_HIGH BIT(9) -#define PIN_OFFSET_BIAS BIT(10) -#define PIN_OFFSET_FIXED BIT(11) +#define PIN_NONBLOCK BIT_ULL(0) +#define PIN_MAPPABLE BIT_ULL(1) +#define PIN_ZONE_4G BIT_ULL(2) +#define PIN_NONFAULT BIT_ULL(3) +#define PIN_NOEVICT BIT_ULL(4) + +#define PIN_MBZ BIT_ULL(5) /* I915_VMA_PIN_OVERFLOW */ +#define PIN_GLOBAL BIT_ULL(6) /* I915_VMA_GLOBAL_BIND */ +#define PIN_USER BIT_ULL(7) /* I915_VMA_LOCAL_BIND */ +#define PIN_UPDATE BIT_ULL(8) + +#define PIN_HIGH BIT_ULL(9) +#define PIN_OFFSET_BIAS BIT_ULL(10) +#define PIN_OFFSET_FIXED BIT_ULL(11) #define PIN_OFFSET_MASK (-I915_GTT_PAGE_SIZE) #endif -- GitLab From 6576364f6496f5a95f8384d57fcb2b5377765a44 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 8 Nov 2018 08:17:38 +0000 Subject: [PATCH 1598/2269] drm/i915/execlists: Force write serialisation into context image vs execution commit 0a823e8fd4fd67726697854578f3584ee3a49b1d upstream. Ensure that the writes into the context image are completed prior to the register mmio to trigger execution. Although previously we were assured by the SDM that all writes are flushed before an uncached memory transaction (our mmio write to submit the context to HW for execution), we have empirical evidence to believe that this is not actually the case. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108656 References: https://bugs.freedesktop.org/show_bug.cgi?id=108315 References: https://bugs.freedesktop.org/show_bug.cgi?id=106887 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Acked-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20181108081740.25615-1-chris@chris-wilson.co.uk Cc: stable@vger.kernel.org (cherry picked from commit 987abd5c62f92ee4970b45aa077f47949974e615) Signed-off-by: Joonas Lahtinen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_lrc.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d638b641b7601..0775e71ea95b9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -325,7 +325,8 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail); - /* True 32b PPGTT with dynamic page allocation: update PDP + /* + * True 32b PPGTT with dynamic page allocation: update PDP * registers and point the unallocated PDPs to scratch page. * PML4 is allocated during ppgtt init, so this is not needed * in 48-bit mode. @@ -333,6 +334,17 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) if (ppgtt && !i915_vm_is_48bit(&ppgtt->base)) execlists_update_context_pdps(ppgtt, reg_state); + /* + * Make sure the context image is complete before we submit it to HW. + * + * Ostensibly, writes (including the WCB) should be flushed prior to + * an uncached write such as our mmio register access, the empirical + * evidence (esp. on Braswell) suggests that the WC write into memory + * may not be visible to the HW prior to the completion of the UC + * register write and that we may begin execution from the context + * before its image is complete leading to invalid PD chasing. + */ + wmb(); return ce->lrc_desc; } -- GitLab From 9891f6816000e7addacb6f88986cc2d4f9004be0 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 31 Oct 2018 16:11:49 -0700 Subject: [PATCH 1599/2269] CONFIG_XEN_PV breaks xen_create_contiguous_region on ARM commit f9005571701920551bcf54a500973fb61f2e1eda upstream. xen_create_contiguous_region has now only an implementation if CONFIG_XEN_PV is defined. However, on ARM we never set CONFIG_XEN_PV but we do have an implementation of xen_create_contiguous_region which is required for swiotlb-xen to work correctly (although it just sets *dma_handle). [backport: remove change to xen_remap_pfn] Cc: # 4.12 Fixes: 16624390816c ("xen: create xen_create/destroy_contiguous_region() stubs for PVHVM only builds") Signed-off-by: Stefano Stabellini Reviewed-by: Juergen Gross CC: Jeff.Kubascik@dornerworks.com CC: Jarvis.Roach@dornerworks.com CC: Nathan.Studer@dornerworks.com CC: vkuznets@redhat.com CC: boris.ostrovsky@oracle.com CC: jgross@suse.com CC: julien.grall@arm.com Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- include/xen/xen-ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index a95e65ec83c35..8d3786f290d93 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -40,7 +40,7 @@ int xen_setup_shutdown_event(void); extern unsigned long *xen_contiguous_bitmap; -#ifdef CONFIG_XEN_PV +#if defined(CONFIG_XEN_PV) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, unsigned int address_bits, dma_addr_t *dma_handle); -- GitLab From cd09665ab60b6dcfd416999202f0e9f5e6157797 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 31 Oct 2018 12:15:23 +0100 Subject: [PATCH 1600/2269] ovl: check whiteout in ovl_create_over_whiteout() commit 5e1275808630ea3b2c97c776f40e475017535f72 upstream. Kaixuxia repors that it's possible to crash overlayfs by removing the whiteout on the upper layer before creating a directory over it. This is a reproducer: mkdir lower upper work merge touch lower/file mount -t overlay overlay -olowerdir=lower,upperdir=upper,workdir=work merge rm merge/file ls -al merge/file rm upper/file ls -al merge/ mkdir merge/file Before commencing with a vfs_rename(..., RENAME_EXCHANGE) verify that the lookup of "upper" is positive and is a whiteout, and return ESTALE otherwise. Reported by: kaixuxia Signed-off-by: Miklos Szeredi Fixes: e9be9d5e76e3 ("overlay filesystem") Cc: # v3.18 Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/dir.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 0568898393f23..ef11fa7b869e7 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -392,6 +392,10 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, if (IS_ERR(upper)) goto out_dput; + err = -ESTALE; + if (d_is_negative(upper) || !IS_WHITEOUT(d_inode(upper))) + goto out_dput2; + err = ovl_create_real(wdir, newdentry, cattr, hardlink, true); if (err) goto out_dput2; -- GitLab From c9b8d580b3fb0ab65d37c372aef19a318fda3199 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 16 Jan 2018 13:47:16 +0900 Subject: [PATCH 1601/2269] printk: Never set console_may_schedule in console_trylock() commit fd5f7cde1b85d4c8e09ca46ce948e008a2377f64 upstream. This patch, basically, reverts commit 6b97a20d3a79 ("printk: set may_schedule for some of console_trylock() callers"). That commit was a mistake, it introduced a big dependency on the scheduler, by enabling preemption under console_sem in printk()->console_unlock() path, which is rather too critical. The patch did not significantly reduce the possibilities of printk() lockups, but made it possible to stall printk(), as has been reported by Tetsuo Handa [1]. Another issues is that preemption under console_sem also messes up with Steven Rostedt's hand off scheme, by making it possible to sleep with console_sem both in console_unlock() and in vprintk_emit(), after acquiring the console_sem ownership (anywhere between printk_safe_exit_irqrestore() in console_trylock_spinning() and printk_safe_enter_irqsave() in console_unlock()). This makes hand off less likely and, at the same time, may result in a significant amount of pending logbuf messages. Preempted console_sem owner makes it impossible for other CPUs to emit logbuf messages, but does not make it impossible for other CPUs to append new messages to the logbuf. Reinstate the old behavior and make printk() non-preemptible. Should any printk() lockup reports arrive they must be handled in a different way. [1] http://lkml.kernel.org/r/201603022101.CAH73907.OVOOMFHFFtQJSL%20()%20I-love%20!%20SAKURA%20!%20ne%20!%20jp Fixes: 6b97a20d3a79 ("printk: set may_schedule for some of console_trylock() callers") Link: http://lkml.kernel.org/r/20180116044716.GE6607@jagdpanzerIV To: Tetsuo Handa Cc: Sergey Senozhatsky Cc: Tejun Heo Cc: akpm@linux-foundation.org Cc: linux-mm@kvack.org Cc: Cong Wang Cc: Dave Hansen Cc: Johannes Weiner Cc: Mel Gorman Cc: Michal Hocko Cc: Vlastimil Babka Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jan Kara Cc: Mathieu Desnoyers Cc: Byungchul Park Cc: Pavel Machek Cc: linux-kernel@vger.kernel.org Signed-off-by: Sergey Senozhatsky Reported-by: Tetsuo Handa Reviewed-by: Steven Rostedt (VMware) Signed-off-by: Petr Mladek Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- kernel/printk/printk.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 7161312593ddc..a9cf2e15f6a37 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1762,6 +1762,12 @@ asmlinkage int vprintk_emit(int facility, int level, /* If called from the scheduler, we can not call up(). */ if (!in_sched) { + /* + * Disable preemption to avoid being preempted while holding + * console_sem which would prevent anyone from printing to + * console + */ + preempt_disable(); /* * Try to acquire and then immediately release the console * semaphore. The release will print out buffers and wake up @@ -1769,6 +1775,7 @@ asmlinkage int vprintk_emit(int facility, int level, */ if (console_trylock()) console_unlock(); + preempt_enable(); } return printed_len; @@ -2083,20 +2090,7 @@ int console_trylock(void) return 0; } console_locked = 1; - /* - * When PREEMPT_COUNT disabled we can't reliably detect if it's - * safe to schedule (e.g. calling printk while holding a spin_lock), - * because preempt_disable()/preempt_enable() are just barriers there - * and preempt_count() is always 0. - * - * RCU read sections have a separate preemption counter when - * PREEMPT_RCU enabled thus we must take extra care and check - * rcu_preempt_depth(), otherwise RCU read sections modify - * preempt_count(). - */ - console_may_schedule = !oops_in_progress && - preemptible() && - !rcu_preempt_depth(); + console_may_schedule = 0; return 1; } EXPORT_SYMBOL(console_trylock); -- GitLab From a8ba76fc043e1a34e98055f6e818c67ddc3c05c2 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 12 Apr 2018 09:16:04 -0600 Subject: [PATCH 1602/2269] nvme-loop: fix kernel oops in case of unhandled command commit 11d9ea6f2ca69237d35d6c55755beba3e006b106 upstream. When nvmet_req_init() fails, __nvmet_req_complete() is called to handle the target request via .queue_response(), so nvme_loop_queue_response() shouldn't be called again for handling the failure. This patch fixes this case by the following way: - move blk_mq_start_request() before nvmet_req_init(), so nvme_loop_queue_response() may work well to complete this host request - don't call nvme_cleanup_cmd() which is done in nvme_loop_complete_rq() - don't call nvme_loop_queue_response() which is done via .queue_response() Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig [trimmed changelog] Signed-off-by: Keith Busch Signed-off-by: Jens Axboe Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/target/loop.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 02aff5cc48bf2..3388d2788fe05 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -183,15 +183,12 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, if (ret) return ret; + blk_mq_start_request(req); iod->cmd.common.flags |= NVME_CMD_SGL_METABUF; iod->req.port = nvmet_loop_port; if (!nvmet_req_init(&iod->req, &queue->nvme_cq, - &queue->nvme_sq, &nvme_loop_ops)) { - nvme_cleanup_cmd(req); - blk_mq_start_request(req); - nvme_loop_queue_response(&iod->req); + &queue->nvme_sq, &nvme_loop_ops)) return BLK_STS_OK; - } if (blk_rq_bytes(req)) { iod->sg_table.sgl = iod->first_sgl; @@ -204,8 +201,6 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl); } - blk_mq_start_request(req); - schedule_work(&iod->work); return BLK_STS_OK; } -- GitLab From dabaf70e3ad6ec7d327bfa98916163985a103e4a Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Tue, 24 Oct 2017 12:54:47 -0700 Subject: [PATCH 1603/2269] gpio: brcmstb: release the bgpio lock during irq handlers commit 142c168e0e50164e67c9399c28dedd65a307cfe5 upstream. The basic memory-mapped GPIO controller lock must be released before calling the registered GPIO interrupt handlers to allow the interrupt handlers to access the hardware. Examples of why a GPIO interrupt handler might want to access the GPIO hardware include an interrupt that is configured to trigger on rising and falling edges that needs to read the current level of the input to know how to respond, or an interrupt that causes a change in a GPIO output in the same bank. If the lock is not released before enterring the handler the hardware accesses will deadlock when they attempt to grab the lock. Since the lock is only needed to protect the calculation of unmasked pending interrupts create a dedicated function to perform this and hide the complexity. Fixes: 19a7b6940b78 ("gpio: brcmstb: Add interrupt and wakeup source support") Signed-off-by: Doug Berger Reviewed-by: Florian Fainelli Acked-by: Gregory Fong Signed-off-by: Linus Walleij Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-brcmstb.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/gpio/gpio-brcmstb.c b/drivers/gpio/gpio-brcmstb.c index dd0308cc8bb0b..bfc9b1afa388f 100644 --- a/drivers/gpio/gpio-brcmstb.c +++ b/drivers/gpio/gpio-brcmstb.c @@ -63,6 +63,21 @@ brcmstb_gpio_gc_to_priv(struct gpio_chip *gc) return bank->parent_priv; } +static unsigned long +brcmstb_gpio_get_active_irqs(struct brcmstb_gpio_bank *bank) +{ + void __iomem *reg_base = bank->parent_priv->reg_base; + unsigned long status; + unsigned long flags; + + spin_lock_irqsave(&bank->gc.bgpio_lock, flags); + status = bank->gc.read_reg(reg_base + GIO_STAT(bank->id)) & + bank->gc.read_reg(reg_base + GIO_MASK(bank->id)); + spin_unlock_irqrestore(&bank->gc.bgpio_lock, flags); + + return status; +} + static void brcmstb_gpio_set_imask(struct brcmstb_gpio_bank *bank, unsigned int offset, bool enable) { @@ -205,11 +220,8 @@ static void brcmstb_gpio_irq_bank_handler(struct brcmstb_gpio_bank *bank) struct irq_domain *irq_domain = bank->gc.irqdomain; void __iomem *reg_base = priv->reg_base; unsigned long status; - unsigned long flags; - spin_lock_irqsave(&bank->gc.bgpio_lock, flags); - while ((status = bank->gc.read_reg(reg_base + GIO_STAT(bank->id)) & - bank->gc.read_reg(reg_base + GIO_MASK(bank->id)))) { + while ((status = brcmstb_gpio_get_active_irqs(bank))) { int bit; for_each_set_bit(bit, &status, 32) { @@ -224,7 +236,6 @@ static void brcmstb_gpio_irq_bank_handler(struct brcmstb_gpio_bank *bank) generic_handle_irq(irq_find_mapping(irq_domain, bit)); } } - spin_unlock_irqrestore(&bank->gc.bgpio_lock, flags); } /* Each UPG GIO block has one IRQ for all banks */ -- GitLab From a21f3c11b41beb0b3b7d5de28d3d5a0692c5b22b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 21 Nov 2018 09:24:18 +0100 Subject: [PATCH 1604/2269] Linux 4.14.82 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2fe1424d61d22..cac5323bc95de 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 81 +SUBLEVEL = 82 EXTRAVERSION = NAME = Petit Gorille -- GitLab From d8dce63b0484c92387f1b24678b11e8cdf973500 Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Wed, 21 Nov 2018 20:38:47 +0000 Subject: [PATCH 1605/2269] ANDROID: sched/debug: Make Energy Model read-only The energy model is exposed through the sched_domain entry in /proc/sys/kernel. This option only appears when SCHED_DEBUG is enabled, and is only writeable by the owner (root). Various userspace tools may read the energy model, including LISA (which uses it to brute-force calculate optimal energy placement for synthetic tasks), but none change it. Since there is no known use case for changing the energy model, change these procfs nodes to be read-only instead. This removes the need to add validation for values written into these files. Bug: 118694140 Change-Id: Ie997b5c4c9e751b21cc94d82b2b8b99640c524fa Signed-off-by: Chris Redpath --- kernel/sched/debug.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 4e75fda646c06..5b0e1d785be64 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -275,14 +275,14 @@ sd_alloc_ctl_energy_table(struct sched_group_energy *sge) return NULL; set_table_entry(&table[0], "nr_idle_states", &sge->nr_idle_states, - sizeof(int), 0644, proc_dointvec_minmax, false); + sizeof(int), 0444, proc_dointvec_minmax, false); set_table_entry(&table[1], "idle_states", &sge->idle_states[0].power, - sge->nr_idle_states*sizeof(struct idle_state), 0644, + sge->nr_idle_states*sizeof(struct idle_state), 0444, proc_doulongvec_minmax, false); set_table_entry(&table[2], "nr_cap_states", &sge->nr_cap_states, - sizeof(int), 0644, proc_dointvec_minmax, false); + sizeof(int), 0444, proc_dointvec_minmax, false); set_table_entry(&table[3], "cap_states", &sge->cap_states[0].cap, - sge->nr_cap_states*sizeof(struct capacity_state), 0644, + sge->nr_cap_states*sizeof(struct capacity_state), 0444, proc_doulongvec_minmax, false); return table; -- GitLab From ceb85d878ddce8f28394f91b2fcb26b1f8e8006e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EB=B0=B0=EC=84=9D=EC=A7=84?= Date: Fri, 9 Nov 2018 16:53:06 -0800 Subject: [PATCH 1606/2269] flow_dissector: do not dissect l4 ports for fragments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 62230715fd2453b3ba948c9d83cfb3ada9169169 ] Only first fragment has the sport/dport information, not the following ones. If we want consistent hash for all fragments, we need to ignore ports even for first fragment. This bug is visible for IPv6 traffic, if incoming fragments do not have a flow label, since skb_get_hash() will give different results for first fragment and following ones. It is also visible if any routing rule wants dissection and sport or dport. See commit 5e5d6fed3741 ("ipv6: route: dissect flow in input path if fib rules need it") for details. [edumazet] rewrote the changelog completely. Fixes: 06635a35d13d ("flow_dissect: use programable dissector in skb_flow_dissect and friends") Signed-off-by: 배석진 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/flow_dissector.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index d8796a7874b66..e2e716003ede7 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -838,8 +838,8 @@ ip_proto_again: break; } - if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_PORTS)) { + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS) && + !(key_control->flags & FLOW_DIS_IS_FRAGMENT)) { key_ports = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_PORTS, target_container); -- GitLab From d6e976571c9f844872ba357408c7cec868bbde4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Wed, 7 Nov 2018 17:50:52 +0100 Subject: [PATCH 1607/2269] ibmvnic: fix accelerated VLAN handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e84b47941e15e6666afb8ee8b21d1c3fc1a013af ] Don't request tag insertion when it isn't present in outgoing skb. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ibm/ibmvnic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 046af22a37cbc..5c7134ccc1fdb 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1259,7 +1259,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_crq.v1.sge_len = cpu_to_be32(skb->len); tx_crq.v1.ioba = cpu_to_be64(data_dma_addr); - if (adapter->vlan_header_insertion) { + if (adapter->vlan_header_insertion && skb_vlan_tag_present(skb)) { tx_crq.v1.flags2 |= IBMVNIC_TX_VLAN_INSERT; tx_crq.v1.vlan_id = cpu_to_be16(skb->vlan_tci); } -- GitLab From dbd5f23bd962198b298d4589565c5c5ada62e3b8 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 16 Nov 2018 16:58:19 +0100 Subject: [PATCH 1608/2269] ip_tunnel: don't force DF when MTU is locked [ Upstream commit 16f7eb2b77b55da816c4e207f3f9440a8cafc00a ] The various types of tunnels running over IPv4 can ask to set the DF bit to do PMTU discovery. However, PMTU discovery is subject to the threshold set by the net.ipv4.route.min_pmtu sysctl, and is also disabled on routes with "mtu lock". In those cases, we shouldn't set the DF bit. This patch makes setting the DF bit conditional on the route's MTU locking state. This issue seems to be older than git history. Signed-off-by: Sabrina Dubroca Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_tunnel_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 2f39479be92f4..423091727e15c 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -80,7 +80,7 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, iph->version = 4; iph->ihl = sizeof(struct iphdr) >> 2; - iph->frag_off = df; + iph->frag_off = ip_mtu_locked(&rt->dst) ? 0 : df; iph->protocol = proto; iph->tos = tos; iph->daddr = dst; -- GitLab From 590f89a7ee6b73c928f886010fe2d7a08384874e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 18 Nov 2018 10:45:30 -0800 Subject: [PATCH 1609/2269] ipv6: Fix PMTU updates for UDP/raw sockets in presence of VRF [ Upstream commit 7ddacfa564870cdd97275fd87decb6174abc6380 ] Preethi reported that PMTU discovery for UDP/raw applications is not working in the presence of VRF when the socket is not bound to a device. The problem is that ip6_sk_update_pmtu does not consider the L3 domain of the skb device if the socket is not bound. Update the function to set oif to the L3 master device if relevant. Fixes: ca254490c8df ("net: Add VRF support to IPv6 stack") Reported-by: Preethi Ramachandra Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 30204bc2fc480..74dd35d6567c9 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1547,10 +1547,13 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu); void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) { + int oif = sk->sk_bound_dev_if; struct dst_entry *dst; - ip6_update_pmtu(skb, sock_net(sk), mtu, - sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid); + if (!oif && skb->dev) + oif = l3mdev_master_ifindex(skb->dev); + + ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid); dst = __sk_dst_get(sk); if (!dst || !dst->obsolete || -- GitLab From 8b9c0eb1ff03e9a026a9f3ca2996c03fc33af182 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 17 Nov 2018 21:57:02 -0800 Subject: [PATCH 1610/2269] net-gro: reset skb->pkt_type in napi_reuse_skb() [ Upstream commit 33d9a2c72f086cbf1087b2fd2d1a15aa9df14a7f ] eth_type_trans() assumes initial value for skb->pkt_type is PACKET_HOST. This is indeed the value right after a fresh skb allocation. However, it is possible that GRO merged a packet with a different value (like PACKET_OTHERHOST in case macvlan is used), so we need to make sure napi->skb will have pkt_type set back to PACKET_HOST. Otherwise, valid packets might be dropped by the stack because their pkt_type is not PACKET_HOST. napi_reuse_skb() was added in commit 96e93eab2033 ("gro: Add internal interfaces for VLAN"), but this bug always has been there. Fixes: 96e93eab2033 ("gro: Add internal interfaces for VLAN") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index e8a66ad6d07c2..4337450a5fdbc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4993,6 +4993,10 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb->vlan_tci = 0; skb->dev = napi->dev; skb->skb_iif = 0; + + /* eth_type_trans() assumes pkt_type is PACKET_HOST */ + skb->pkt_type = PACKET_HOST; + skb->encapsulation = 0; skb_shinfo(skb)->gso_type = 0; skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); -- GitLab From c7051eb3acbe0908f29fa56b464a6c79c067f93f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 18 Nov 2018 15:21:53 +0800 Subject: [PATCH 1611/2269] sctp: not allow to set asoc prsctp_enable by sockopt [ Upstream commit cc3ccf26f0649089b3a34a2781977755ea36e72c ] As rfc7496#section4.5 says about SCTP_PR_SUPPORTED: This socket option allows the enabling or disabling of the negotiation of PR-SCTP support for future associations. For existing associations, it allows one to query whether or not PR-SCTP support was negotiated on a particular association. It means only sctp sock's prsctp_enable can be set. Note that for the limitation of SCTP_{CURRENT|ALL}_ASSOC, we will add it when introducing SCTP_{FUTURE|CURRENT|ALL}_ASSOC for linux sctp in another patchset. v1->v2: - drop the params.assoc_id check as Neil suggested. Fixes: 28aa4c26fce2 ("sctp: add SCTP_PR_SUPPORTED on sctp sockopt") Reported-by: Ying Xu Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d87d56978b4c9..6a25323705459 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3750,32 +3750,16 @@ static int sctp_setsockopt_pr_supported(struct sock *sk, unsigned int optlen) { struct sctp_assoc_value params; - struct sctp_association *asoc; - int retval = -EINVAL; if (optlen != sizeof(params)) - goto out; - - if (copy_from_user(¶ms, optval, optlen)) { - retval = -EFAULT; - goto out; - } - - asoc = sctp_id2assoc(sk, params.assoc_id); - if (asoc) { - asoc->prsctp_enable = !!params.assoc_value; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); + return -EINVAL; - sp->ep->prsctp_enable = !!params.assoc_value; - } else { - goto out; - } + if (copy_from_user(¶ms, optval, optlen)) + return -EFAULT; - retval = 0; + sctp_sk(sk)->ep->prsctp_enable = !!params.assoc_value; -out: - return retval; + return 0; } static int sctp_setsockopt_default_prinfo(struct sock *sk, -- GitLab From 8e201ff7140a23ea154cad8d348d1e6e79cf0bc7 Mon Sep 17 00:00:00 2001 From: Siva Reddy Kallam Date: Tue, 20 Nov 2018 10:04:04 +0530 Subject: [PATCH 1612/2269] tg3: Add PHY reset for 5717/5719/5720 in change ring and flow control paths [ Upstream commit 59663e42199c93d1d7314d1446f6782fc4b1eb81 ] This patch has the fix to avoid PHY lockup with 5717/5719/5720 in change ring and flow control paths. This patch solves the RX hang while doing continuous ring or flow control parameters with heavy traffic from peer. Signed-off-by: Siva Reddy Kallam Acked-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/tg3.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 1b1d2a67f412c..bc0221eafe5c4 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -12395,6 +12395,7 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e { struct tg3 *tp = netdev_priv(dev); int i, irq_sync = 0, err = 0; + bool reset_phy = false; if ((ering->rx_pending > tp->rx_std_ring_mask) || (ering->rx_jumbo_pending > tp->rx_jmb_ring_mask) || @@ -12426,7 +12427,13 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e if (netif_running(dev)) { tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); - err = tg3_restart_hw(tp, false); + /* Reset PHY to avoid PHY lock up */ + if (tg3_asic_rev(tp) == ASIC_REV_5717 || + tg3_asic_rev(tp) == ASIC_REV_5719 || + tg3_asic_rev(tp) == ASIC_REV_5720) + reset_phy = true; + + err = tg3_restart_hw(tp, reset_phy); if (!err) tg3_netif_start(tp); } @@ -12460,6 +12467,7 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam { struct tg3 *tp = netdev_priv(dev); int err = 0; + bool reset_phy = false; if (tp->link_config.autoneg == AUTONEG_ENABLE) tg3_warn_mgmt_link_flap(tp); @@ -12550,7 +12558,13 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam if (netif_running(dev)) { tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); - err = tg3_restart_hw(tp, false); + /* Reset PHY to avoid PHY lock up */ + if (tg3_asic_rev(tp) == ASIC_REV_5717 || + tg3_asic_rev(tp) == ASIC_REV_5719 || + tg3_asic_rev(tp) == ASIC_REV_5720) + reset_phy = true; + + err = tg3_restart_hw(tp, reset_phy); if (!err) tg3_netif_start(tp); } -- GitLab From 453fcc4b536a6427829e0a89a41d605a2f29bc0e Mon Sep 17 00:00:00 2001 From: Matthew Cover Date: Sun, 18 Nov 2018 00:46:00 -0700 Subject: [PATCH 1613/2269] tuntap: fix multiqueue rx [ Upstream commit 8ebebcba559a1bfbaec7bbda64feb9870b9c58da ] When writing packets to a descriptor associated with a combined queue, the packets should end up on that queue. Before this change all packets written to any descriptor associated with a tap interface end up on rx-0, even when the descriptor is associated with a different queue. The rx traffic can be generated by either of the following. 1. a simple tap program which spins up multiple queues and writes packets to each of the file descriptors 2. tx from a qemu vm with a tap multiqueue netdev The queue for rx traffic can be observed by either of the following (done on the hypervisor in the qemu case). 1. a simple netmap program which opens and reads from per-queue descriptors 2. configuring RPS and doing per-cpu captures with rxtxcpu Alternatively, if you printk() the return value of skb_get_rx_queue() just before each instance of netif_receive_skb() in tun.c, you will get 65535 for every skb. Calling skb_record_rx_queue() to set the rx queue to the queue_index fixes the association between descriptor and rx queue. Signed-off-by: Matthew Cover Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 7f8c7e3aa3569..0a008d136aae5 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1214,6 +1214,7 @@ static void tun_rx_batched(struct tun_struct *tun, struct tun_file *tfile, if (!rx_batched || (!more && skb_queue_empty(queue))) { local_bh_disable(); + skb_record_rx_queue(skb, tfile->queue_index); netif_receive_skb(skb); local_bh_enable(); return; @@ -1233,8 +1234,11 @@ static void tun_rx_batched(struct tun_struct *tun, struct tun_file *tfile, struct sk_buff *nskb; local_bh_disable(); - while ((nskb = __skb_dequeue(&process_queue))) + while ((nskb = __skb_dequeue(&process_queue))) { + skb_record_rx_queue(nskb, tfile->queue_index); netif_receive_skb(nskb); + } + skb_record_rx_queue(skb, tfile->queue_index); netif_receive_skb(skb); local_bh_enable(); } -- GitLab From 136ff9cad44a5da95187e561f473a2969d658b7e Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 1 Nov 2018 15:55:38 -0700 Subject: [PATCH 1614/2269] net: systemport: Protect stop from timeout [ Upstream commit 7cb6a2a2c72c1ed8f42fb01f1a661281b568dead ] A timing hazard exists when the network interface is stopped that allows a watchdog timeout to be processed by a separate core in parallel. This creates the potential for the timeout handler to wake the queues while the driver is shutting down, or access registers after their clocks have been removed. The more common case is that the watchdog timeout will produce a warning message which doesn't lead to a crash. The chances of this are greatly increased by the fact that bcm_sysport_netif_stop stops the transmit queues which can easily precipitate a watchdog time- out because of stale trans_start data in the queues. This commit corrects the behavior by ensuring that the watchdog timeout is disabled before enterring bcm_sysport_netif_stop. There are currently only two users of the bcm_sysport_netif_stop function: close and suspend. The close case already handles the issue by exiting the RUNNING state before invoking the driver close service. The suspend case now performs the netif_device_detach to exit the PRESENT state before the call to bcm_sysport_netif_stop rather than after it. These behaviors prevent any future scheduling of the driver timeout service during the window. The netif_tx_stop_all_queues function in bcm_sysport_netif_stop is replaced with netif_tx_disable to ensure synchronization with any transmit or timeout threads that may already be executing on other cores. For symmetry, the netif_device_attach call upon resume is moved to after the call to bcm_sysport_netif_start. Since it wakes the transmit queues it is not necessary to invoke netif_tx_start_all_queues from bcm_sysport_netif_start so it is moved into the driver open service. Fixes: 40755a0fce17 ("net: systemport: add suspend and resume support") Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bcmsysport.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 6e7f9a470ea18..45462557e51c7 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1774,9 +1774,6 @@ static void bcm_sysport_netif_start(struct net_device *dev) intrl2_1_mask_clear(priv, 0xffffffff); else intrl2_0_mask_clear(priv, INTRL2_0_TDMA_MBDONE_MASK); - - /* Last call before we start the real business */ - netif_tx_start_all_queues(dev); } static void rbuf_init(struct bcm_sysport_priv *priv) @@ -1922,6 +1919,8 @@ static int bcm_sysport_open(struct net_device *dev) bcm_sysport_netif_start(dev); + netif_tx_start_all_queues(dev); + return 0; out_clear_rx_int: @@ -1945,7 +1944,7 @@ static void bcm_sysport_netif_stop(struct net_device *dev) struct bcm_sysport_priv *priv = netdev_priv(dev); /* stop all software from updating hardware */ - netif_tx_stop_all_queues(dev); + netif_tx_disable(dev); napi_disable(&priv->napi); phy_stop(dev->phydev); @@ -2267,12 +2266,12 @@ static int bcm_sysport_suspend(struct device *d) if (!netif_running(dev)) return 0; + netif_device_detach(dev); + bcm_sysport_netif_stop(dev); phy_suspend(dev->phydev); - netif_device_detach(dev); - /* Disable UniMAC RX */ umac_enable_set(priv, CMD_RX_EN, 0); @@ -2356,8 +2355,6 @@ static int bcm_sysport_resume(struct device *d) goto out_free_rx_ring; } - netif_device_attach(dev); - /* RX pipe enable */ topctrl_writel(priv, 0, RX_FLUSH_CNTL); @@ -2402,6 +2399,8 @@ static int bcm_sysport_resume(struct device *d) bcm_sysport_netif_start(dev); + netif_device_attach(dev); + return 0; out_free_rx_ring: -- GitLab From 0ecbb0cff18453398b13de638fec1b82ad53e14a Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Fri, 9 Nov 2018 18:56:27 -0700 Subject: [PATCH 1615/2269] net: qualcomm: rmnet: Fix incorrect assignment of real_dev [ Upstream commit d02854dc1999ed3e7fd79ec700c64ac23ac0c458 ] A null dereference was observed when a sysctl was being set from userspace and rmnet was stuck trying to complete some actions in the NETDEV_REGISTER callback. This is because the real_dev is set only after the device registration handler completes. sysctl call stack - <6> Unable to handle kernel NULL pointer dereference at virtual address 00000108 <2> pc : rmnet_vnd_get_iflink+0x1c/0x28 <2> lr : dev_get_iflink+0x2c/0x40 <2> rmnet_vnd_get_iflink+0x1c/0x28 <2> inet6_fill_ifinfo+0x15c/0x234 <2> inet6_ifinfo_notify+0x68/0xd4 <2> ndisc_ifinfo_sysctl_change+0x1b8/0x234 <2> proc_sys_call_handler+0xac/0x100 <2> proc_sys_write+0x3c/0x4c <2> __vfs_write+0x54/0x14c <2> vfs_write+0xcc/0x188 <2> SyS_write+0x60/0xc0 <2> el0_svc_naked+0x34/0x38 device register call stack - <2> notifier_call_chain+0x84/0xbc <2> raw_notifier_call_chain+0x38/0x48 <2> call_netdevice_notifiers_info+0x40/0x70 <2> call_netdevice_notifiers+0x38/0x60 <2> register_netdevice+0x29c/0x3d8 <2> rmnet_vnd_newlink+0x68/0xe8 <2> rmnet_newlink+0xa0/0x160 <2> rtnl_newlink+0x57c/0x6c8 <2> rtnetlink_rcv_msg+0x1dc/0x328 <2> netlink_rcv_skb+0xac/0x118 <2> rtnetlink_rcv+0x24/0x30 <2> netlink_unicast+0x158/0x1f0 <2> netlink_sendmsg+0x32c/0x338 <2> sock_sendmsg+0x44/0x60 <2> SyS_sendto+0x150/0x1ac <2> el0_svc_naked+0x34/0x38 Fixes: b752eff5be24 ("net: qualcomm: rmnet: Implement ndo_get_iflink") Signed-off-by: Sean Tranchetti Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c index 7f90d5587653b..fda7014190393 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c @@ -102,12 +102,14 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev, struct rmnet_port *port, struct net_device *real_dev) { - struct rmnet_priv *priv; + struct rmnet_priv *priv = netdev_priv(rmnet_dev); int rc; if (port->rmnet_devices[id]) return -EINVAL; + priv->real_dev = real_dev; + rc = register_netdevice(rmnet_dev); if (!rc) { port->rmnet_devices[id] = rmnet_dev; @@ -115,9 +117,7 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev, rmnet_dev->rtnl_link_ops = &rmnet_link_ops; - priv = netdev_priv(rmnet_dev); priv->mux_id = id; - priv->real_dev = real_dev; netdev_dbg(rmnet_dev, "rmnet dev created\n"); } -- GitLab From 3ae22cc948b00b12c18e2471ad32db366b113832 Mon Sep 17 00:00:00 2001 From: Tristram Ha Date: Fri, 2 Nov 2018 19:23:41 -0700 Subject: [PATCH 1616/2269] net: dsa: microchip: initialize mutex before use [ Upstream commit 284fb78ed7572117846f8e1d1d8e3dbfd16880c2 ] Initialize mutex before use. Avoid kernel complaint when CONFIG_DEBUG_LOCK_ALLOC is enabled. Fixes: b987e98e50ab90e5 ("dsa: add DSA switch driver for Microchip KSZ9477") Signed-off-by: Tristram Ha Reviewed-by: Pavel Machek Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/microchip/ksz_common.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 56cd6d365352e..6f4c9913f8f51 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -1104,11 +1104,6 @@ static int ksz_switch_init(struct ksz_device *dev) { int i; - mutex_init(&dev->reg_mutex); - mutex_init(&dev->stats_mutex); - mutex_init(&dev->alu_mutex); - mutex_init(&dev->vlan_mutex); - dev->ds->ops = &ksz_switch_ops; for (i = 0; i < ARRAY_SIZE(ksz_switch_chips); i++) { @@ -1193,6 +1188,11 @@ int ksz_switch_register(struct ksz_device *dev) if (dev->pdata) dev->chip_id = dev->pdata->chip_id; + mutex_init(&dev->reg_mutex); + mutex_init(&dev->stats_mutex); + mutex_init(&dev->alu_mutex); + mutex_init(&dev->vlan_mutex); + if (ksz_switch_detect(dev)) return -EINVAL; -- GitLab From 56782f53537aaa10c1323c86236959e62bb75330 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 3 Nov 2018 13:59:45 +0800 Subject: [PATCH 1617/2269] sctp: fix strchange_flags name for Stream Change Event [ Upstream commit fd82d61ba142f0b83463e47064bf5460aac57b6e ] As defined in rfc6525#section-6.1.3, SCTP_STREAM_CHANGE_DENIED and SCTP_STREAM_CHANGE_FAILED should be used instead of SCTP_ASSOC_CHANGE_DENIED and SCTP_ASSOC_CHANGE_FAILED. To keep the compatibility, fix it by adding two macros. Fixes: b444153fb5a6 ("sctp: add support for generating add stream change event notification") Reported-by: Jianwen Ji Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/sctp.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index cfe9712968350..e2554c1bcf58f 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -519,6 +519,8 @@ struct sctp_assoc_reset_event { #define SCTP_ASSOC_CHANGE_DENIED 0x0004 #define SCTP_ASSOC_CHANGE_FAILED 0x0008 +#define SCTP_STREAM_CHANGE_DENIED SCTP_ASSOC_CHANGE_DENIED +#define SCTP_STREAM_CHANGE_FAILED SCTP_ASSOC_CHANGE_FAILED struct sctp_stream_change_event { __u16 strchange_type; __u16 strchange_flags; -- GitLab From fd2e5f8ae1183d8b4c8dd49a87535dc97057d258 Mon Sep 17 00:00:00 2001 From: Martin Schiller Date: Fri, 16 Nov 2018 08:38:36 +0100 Subject: [PATCH 1618/2269] net: phy: mdio-gpio: Fix working over slow can_sleep GPIOs [ Upstream commit df5a8ec64eed7fe45b556cfff503acd6429ab817 ] Up until commit 7e5fbd1e0700 ("net: mdio-gpio: Convert to use gpiod functions where possible"), the _cansleep variants of the gpio_ API was used. After that commit and the change to gpiod_ API, the _cansleep() was dropped. This then results in WARN_ON() when used with GPIO devices which do sleep. Add back the _cansleep() to avoid this. Fixes: 7e5fbd1e0700 ("net: mdio-gpio: Convert to use gpiod functions where possible") Signed-off-by: Martin Schiller Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/mdio-gpio.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c index 4333c6e14742b..638923a0ca448 100644 --- a/drivers/net/phy/mdio-gpio.c +++ b/drivers/net/phy/mdio-gpio.c @@ -79,7 +79,7 @@ static void mdio_dir(struct mdiobb_ctrl *ctrl, int dir) * assume the pin serves as pull-up. If direction is * output, the default value is high. */ - gpiod_set_value(bitbang->mdo, 1); + gpiod_set_value_cansleep(bitbang->mdo, 1); return; } @@ -94,7 +94,7 @@ static int mdio_get(struct mdiobb_ctrl *ctrl) struct mdio_gpio_info *bitbang = container_of(ctrl, struct mdio_gpio_info, ctrl); - return gpiod_get_value(bitbang->mdio); + return gpiod_get_value_cansleep(bitbang->mdio); } static void mdio_set(struct mdiobb_ctrl *ctrl, int what) @@ -103,9 +103,9 @@ static void mdio_set(struct mdiobb_ctrl *ctrl, int what) container_of(ctrl, struct mdio_gpio_info, ctrl); if (bitbang->mdo) - gpiod_set_value(bitbang->mdo, what); + gpiod_set_value_cansleep(bitbang->mdo, what); else - gpiod_set_value(bitbang->mdio, what); + gpiod_set_value_cansleep(bitbang->mdio, what); } static void mdc_set(struct mdiobb_ctrl *ctrl, int what) @@ -113,7 +113,7 @@ static void mdc_set(struct mdiobb_ctrl *ctrl, int what) struct mdio_gpio_info *bitbang = container_of(ctrl, struct mdio_gpio_info, ctrl); - gpiod_set_value(bitbang->mdc, what); + gpiod_set_value_cansleep(bitbang->mdc, what); } static const struct mdiobb_ops mdio_gpio_ops = { -- GitLab From eebe064ce49c7ea3b730885183d6b109b1b17418 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 18 Nov 2018 21:59:49 +0800 Subject: [PATCH 1619/2269] sctp: not increase stream's incnt before sending addstrm_in request [ Upstream commit e1e46479847e66f78f79d8c24d5169a5954b3fc2 ] Different from processing the addstrm_out request, The receiver handles an addstrm_in request by sending back an addstrm_out request to the sender who will increase its stream's in and incnt later. Now stream->incnt has been increased since it sent out the addstrm_in request in sctp_send_add_streams(), with the wrong stream->incnt will even cause crash when copying stream info from the old stream's in to the new one's in sctp_process_strreset_addstrm_out(). This patch is to fix it by simply removing the stream->incnt change from sctp_send_add_streams(). Fixes: 242bd2d519d7 ("sctp: implement sender-side procedures for Add Incoming/Outgoing Streams Request Parameter") Reported-by: Jianwen Ji Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/stream.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 9ea6057ed28b2..61273534ae105 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -310,7 +310,6 @@ int sctp_send_add_streams(struct sctp_association *asoc, goto out; } - stream->incnt = incnt; stream->outcnt = outcnt; asoc->strreset_outstanding = !!out + !!in; -- GitLab From 1ada2e1b289f719ea2e6b0dc5331f6c40b0341a7 Mon Sep 17 00:00:00 2001 From: Shalom Toledo Date: Fri, 2 Nov 2018 19:49:15 +0000 Subject: [PATCH 1620/2269] mlxsw: spectrum: Fix IP2ME CPU policer configuration [ Upstream commit 96801552f846460fe9ac10f1b189602992f004e1 ] The CPU policer used to police packets being trapped via a local route (IP2ME) was incorrectly configured to police based on bytes per second instead of packets per second. Change the policer to police based on packets per second and avoid packet loss under certain circumstances. Fixes: 9148e7cf73ce ("mlxsw: spectrum: Add policers for trap groups") Signed-off-by: Shalom Toledo Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 8b48338b4a70e..18bb6798937b1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3471,7 +3471,6 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) burst_size = 7; break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME: - is_bytes = true; rate = 4 * 1024; burst_size = 4; break; -- GitLab From 61c6bb13506f0bc14b0e511167a248ba703f4b0d Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Thu, 8 Nov 2018 20:38:26 +0100 Subject: [PATCH 1621/2269] net: smsc95xx: Fix MTU range [ Upstream commit 85b18b0237ce9986a81a1b9534b5e2ee116f5504 ] The commit f77f0aee4da4 ("net: use core MTU range checking in USB NIC drivers") introduce a common MTU handling for usbnet. But it's missing the necessary changes for smsc95xx. So set the MTU range accordingly. This patch has been tested on a Raspberry Pi 3. Fixes: f77f0aee4da4 ("net: use core MTU range checking in USB NIC drivers") Signed-off-by: Stefan Wahren Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/smsc95xx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 99e684e39d355..f74739664f895 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1321,6 +1321,8 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->ethtool_ops = &smsc95xx_ethtool_ops; dev->net->flags |= IFF_MULTICAST; dev->net->hard_header_len += SMSC95XX_TX_OVERHEAD_CSUM; + dev->net->min_mtu = ETH_MIN_MTU; + dev->net->max_mtu = ETH_DATA_LEN; dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len; pdata->dev = dev; -- GitLab From 039df755bd84022be5b423fb866e871b87a34ad5 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Wed, 31 Oct 2018 22:52:19 +0100 Subject: [PATCH 1622/2269] usbnet: smsc95xx: disable carrier check while suspending [ Upstream commit 7b900ead6cc66b2ee873cb042dfba169aa68b56c ] We need to make sure, that the carrier check polling is disabled while suspending. Otherwise we can end up with usbnet_read_cmd() being issued when only usbnet_read_cmd_nopm() is allowed. If this happens, read operations lock up. Fixes: d69d169493 ("usbnet: smsc95xx: fix link detection for disabled autonegotiation") Signed-off-by: Frieder Schrempf Reviewed-by: Raghuram Chary J Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/smsc95xx.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index f74739664f895..2f65975a121f0 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1600,6 +1600,8 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message) return ret; } + cancel_delayed_work_sync(&pdata->carrier_check); + if (pdata->suspend_flags) { netdev_warn(dev->net, "error during last resume\n"); pdata->suspend_flags = 0; @@ -1842,6 +1844,11 @@ done: */ if (ret && PMSG_IS_AUTO(message)) usbnet_resume(intf); + + if (ret) + schedule_delayed_work(&pdata->carrier_check, + CARRIER_CHECK_DELAY); + return ret; } -- GitLab From 59b830bf4436bdac3eace160fe1a9d7eb9e37392 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Nov 2018 17:34:27 -0800 Subject: [PATCH 1623/2269] inet: frags: better deal with smp races MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0d5b9311baf27bb545f187f12ecfd558220c607d ] Multiple cpus might attempt to insert a new fragment in rhashtable, if for example RPS is buggy, as reported by 배석진 in https://patchwork.ozlabs.org/patch/994601/ We use rhashtable_lookup_get_insert_key() instead of rhashtable_insert_fast() to let cpus losing the race free their own inet_frag_queue and use the one that was inserted by another cpu. Fixes: 648700f76b03 ("inet: frags: use rhashtables for reassembly units") Signed-off-by: Eric Dumazet Reported-by: 배석진 Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/inet_fragment.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index f6764537148ce..653be98fe3fb3 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -180,21 +180,22 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, } static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, - void *arg) + void *arg, + struct inet_frag_queue **prev) { struct inet_frags *f = nf->f; struct inet_frag_queue *q; - int err; q = inet_frag_alloc(nf, f, arg); - if (!q) + if (!q) { + *prev = ERR_PTR(-ENOMEM); return NULL; - + } mod_timer(&q->timer, jiffies + nf->timeout); - err = rhashtable_insert_fast(&nf->rhashtable, &q->node, - f->rhash_params); - if (err < 0) { + *prev = rhashtable_lookup_get_insert_key(&nf->rhashtable, &q->key, + &q->node, f->rhash_params); + if (*prev) { q->flags |= INET_FRAG_COMPLETE; inet_frag_kill(q); inet_frag_destroy(q); @@ -206,19 +207,20 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, /* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key) { - struct inet_frag_queue *fq; + struct inet_frag_queue *fq = NULL, *prev; rcu_read_lock(); - fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params); - if (fq) { + prev = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params); + if (!prev) + fq = inet_frag_create(nf, key, &prev); + if (prev && !IS_ERR(prev)) { + fq = prev; if (!refcount_inc_not_zero(&fq->refcnt)) fq = NULL; - rcu_read_unlock(); - return fq; } rcu_read_unlock(); - return inet_frag_create(nf, key); + return fq; } EXPORT_SYMBOL(inet_frag_find); -- GitLab From c33b5cc7076b9140d5d393a2d750475b08d8efad Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 21 Nov 2018 19:11:06 +0100 Subject: [PATCH 1624/2269] Revert "x86/speculation: Enable cross-hyperthread spectre v2 STIBP mitigation" This reverts commit 8a13906ae519b3ed95cd0fb73f1098b46362f6c4 which is commit 53c613fe6349994f023245519265999eed75957f upstream. It's not ready for the stable trees as there are major slowdowns involved with this patch. Reported-by: Jiri Kosina Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: "WoodhouseDavid" Cc: Andi Kleen Cc: Tim Chen Cc: "SchauflerCasey" Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 57 ++++---------------------------------- kernel/cpu.c | 11 +------- 2 files changed, 7 insertions(+), 61 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index aa6e7f75bccc9..e92aedd938064 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -34,10 +34,12 @@ static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); -/* The base value of the SPEC_CTRL MSR that always has to be preserved. */ -u64 x86_spec_ctrl_base; +/* + * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any + * writes to SPEC_CTRL contain whatever reserved bits have been set. + */ +u64 __ro_after_init x86_spec_ctrl_base; EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); -static DEFINE_MUTEX(spec_ctrl_mutex); /* * The vendor and possibly platform specific bits which can be modified in @@ -321,46 +323,6 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return cmd; } -static bool stibp_needed(void) -{ - if (spectre_v2_enabled == SPECTRE_V2_NONE) - return false; - - if (!boot_cpu_has(X86_FEATURE_STIBP)) - return false; - - return true; -} - -static void update_stibp_msr(void *info) -{ - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); -} - -void arch_smt_update(void) -{ - u64 mask; - - if (!stibp_needed()) - return; - - mutex_lock(&spec_ctrl_mutex); - mask = x86_spec_ctrl_base; - if (cpu_smt_control == CPU_SMT_ENABLED) - mask |= SPEC_CTRL_STIBP; - else - mask &= ~SPEC_CTRL_STIBP; - - if (mask != x86_spec_ctrl_base) { - pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", - cpu_smt_control == CPU_SMT_ENABLED ? - "Enabling" : "Disabling"); - x86_spec_ctrl_base = mask; - on_each_cpu(update_stibp_msr, NULL, 1); - } - mutex_unlock(&spec_ctrl_mutex); -} - static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -460,9 +422,6 @@ specv2_set_mode: setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); pr_info("Enabling Restricted Speculation for firmware calls\n"); } - - /* Enable STIBP if appropriate */ - arch_smt_update(); } #undef pr_fmt @@ -855,8 +814,6 @@ static ssize_t l1tf_show_state(char *buf) static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { - int ret; - if (!boot_cpu_has_bug(bug)) return sprintf(buf, "Not affected\n"); @@ -871,12 +828,10 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return sprintf(buf, "Mitigation: __user pointer sanitization\n"); case X86_BUG_SPECTRE_V2: - ret = sprintf(buf, "%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "", spectre_v2_module_string()); - return ret; case X86_BUG_SPEC_STORE_BYPASS: return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); diff --git a/kernel/cpu.c b/kernel/cpu.c index 90cf6a04e08a2..f3f389e333430 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2045,12 +2045,6 @@ static void cpuhp_online_cpu_device(unsigned int cpu) kobject_uevent(&dev->kobj, KOBJ_ONLINE); } -/* - * Architectures that need SMT-specific errata handling during SMT hotplug - * should override this. - */ -void __weak arch_smt_update(void) { }; - static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { int cpu, ret = 0; @@ -2077,10 +2071,8 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) */ cpuhp_offline_cpu_device(cpu); } - if (!ret) { + if (!ret) cpu_smt_control = ctrlval; - arch_smt_update(); - } cpu_maps_update_done(); return ret; } @@ -2091,7 +2083,6 @@ static int cpuhp_smt_enable(void) cpu_maps_update_begin(); cpu_smt_control = CPU_SMT_ENABLED; - arch_smt_update(); for_each_present_cpu(cpu) { /* Skip online CPUs and CPUs on offline nodes */ if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) -- GitLab From 8a1bd68b06ebc9fe5b20297c52a51c82fde744a6 Mon Sep 17 00:00:00 2001 From: Chris Paterson Date: Thu, 14 Dec 2017 09:08:40 +0000 Subject: [PATCH 1625/2269] ARM: dts: r8a7791: Correct critical CPU temperature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e4fdf59bcce3b490bbc7197145bcb9f9d5a18cd3 upstream. The R-Car M2W hardware manual states that Tc = –40°C to +105°C. The thermal sensor has an accuracy of ±5°C and there can be a temperature difference of 1 or 2 degrees between Tjmax and the thermal sensor due to the location of the latter. This means that 95°C is a safer value to use. Fixes: cac68a56e34b9810 ("ARM: dts: r8a7791: enable to use thermal-zone") Signed-off-by: Chris Paterson Reviewed-by: Geert Uytterhoeven Signed-off-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/r8a7791.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/r8a7791.dtsi b/arch/arm/boot/dts/r8a7791.dtsi index f1d1a97721530..0aceb3736e5ca 100644 --- a/arch/arm/boot/dts/r8a7791.dtsi +++ b/arch/arm/boot/dts/r8a7791.dtsi @@ -91,7 +91,7 @@ trips { cpu-crit { - temperature = <115000>; + temperature = <95000>; hysteresis = <0>; type = "critical"; }; -- GitLab From 004ef8f590a464bb1977f38d3848b18df225267e Mon Sep 17 00:00:00 2001 From: Chris Paterson Date: Thu, 14 Dec 2017 09:08:41 +0000 Subject: [PATCH 1626/2269] ARM: dts: r8a7793: Correct critical CPU temperature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1dfc65cef481ac6af64380f26186d5cc585b46eb upstream. The R-Car M2N hardware manual states that Tc = –40°C to +105°C. The thermal sensor has an accuracy of ±5°C and there can be a temperature difference of 1 or 2 degrees between Tjmax and the thermal sensor due to the location of the latter. This means that 95°C is a safer value to use. Fixes: 57f9156bc620ac56 ("ARM: dts: r8a7793: enable to use thermal-zone") Signed-off-by: Chris Paterson Reviewed-by: Geert Uytterhoeven Signed-off-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/r8a7793.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/r8a7793.dtsi b/arch/arm/boot/dts/r8a7793.dtsi index 497716b6fbe24..fd12564aabc39 100644 --- a/arch/arm/boot/dts/r8a7793.dtsi +++ b/arch/arm/boot/dts/r8a7793.dtsi @@ -88,7 +88,7 @@ trips { cpu-crit { - temperature = <115000>; + temperature = <95000>; hysteresis = <0>; type = "critical"; }; -- GitLab From 1d4bd2e4e1d26c5384d52ca4a1953cb0cfc934f1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 23 Nov 2018 08:19:27 +0100 Subject: [PATCH 1627/2269] Linux 4.14.83 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index cac5323bc95de..0f42814095a4d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 82 +SUBLEVEL = 83 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 426477d9f68b56ab71f52a5ce2dd6da0699d89c3 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 1 Nov 2018 13:14:30 +0000 Subject: [PATCH 1628/2269] cifs: don't dereference smb_file_target before null check [ Upstream commit 8c6c9bed8773375b1d54ccca2911ec892c59db5d ] There is a null check on dst_file->private data which suggests it can be potentially null. However, before this check, pointer smb_file_target is derived from dst_file->private and dereferenced in the call to tlink_tcon, hence there is a potential null pointer deference. Fix this by assigning smb_file_target and target_tcon after the null pointer sanity checks. Detected by CoverityScan, CID#1475302 ("Dereference before null check") Fixes: 04b38d601239 ("vfs: pull btrfs clone API to vfs layer") Signed-off-by: Colin Ian King Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/cifsfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 44a7b2dea6882..c5fd5abf72069 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -933,8 +933,8 @@ static int cifs_clone_file_range(struct file *src_file, loff_t off, struct inode *src_inode = file_inode(src_file); struct inode *target_inode = file_inode(dst_file); struct cifsFileInfo *smb_file_src = src_file->private_data; - struct cifsFileInfo *smb_file_target = dst_file->private_data; - struct cifs_tcon *target_tcon = tlink_tcon(smb_file_target->tlink); + struct cifsFileInfo *smb_file_target; + struct cifs_tcon *target_tcon; unsigned int xid; int rc; @@ -948,6 +948,9 @@ static int cifs_clone_file_range(struct file *src_file, loff_t off, goto out; } + smb_file_target = dst_file->private_data; + target_tcon = tlink_tcon(smb_file_target->tlink); + /* * Note: cifs case is easier than btrfs since server responsible for * checks for proper open modes and file type and if it wants -- GitLab From bc32d90e4d45b4f544527efdce2ea57d83f77542 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 25 Oct 2018 15:43:36 +1000 Subject: [PATCH 1629/2269] cifs: fix return value for cifs_listxattr [ Upstream commit 0c5d6cb6643f48ad3775322f3ebab6c7eb67484e ] If the application buffer was too small to fit all the names we would still count the number of bytes and return this for listxattr. This would then trigger a BUG in usercopy.c Fix the computation of the size so that we return -ERANGE correctly when the buffer is too small. This fixes the kernel BUG for xfstest generic/377 Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Reviewed-by: Aurelien Aptel Signed-off-by: Sasha Levin --- fs/cifs/smb2ops.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 4e5b05263e4a3..3372eedaa94d7 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -441,6 +441,7 @@ move_smb2_ea_to_cifs(char *dst, size_t dst_size, int rc = 0; unsigned int ea_name_len = ea_name ? strlen(ea_name) : 0; char *name, *value; + size_t buf_size = dst_size; size_t name_len, value_len, user_name_len; while (src_size > 0) { @@ -476,9 +477,10 @@ move_smb2_ea_to_cifs(char *dst, size_t dst_size, /* 'user.' plus a terminating null */ user_name_len = 5 + 1 + name_len; - rc += user_name_len; - - if (dst_size >= user_name_len) { + if (buf_size == 0) { + /* skip copy - calc size only */ + rc += user_name_len; + } else if (dst_size >= user_name_len) { dst_size -= user_name_len; memcpy(dst, "user.", 5); dst += 5; @@ -486,8 +488,7 @@ move_smb2_ea_to_cifs(char *dst, size_t dst_size, dst += name_len; *dst = 0; ++dst; - } else if (dst_size == 0) { - /* skip copy - calc size only */ + rc += user_name_len; } else { /* stop before overrun buffer */ rc = -ERANGE; -- GitLab From aa9571b2015d270a3e73a66d3a5b27c37836fd6a Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Tue, 30 Oct 2018 12:38:50 +0100 Subject: [PATCH 1630/2269] arm64: kprobe: make page to RO mode when allocate it [ Upstream commit 966866892cf89d606544bca22d584ba2ef9ec208 ] Commit 1404d6f13e47 ("arm64: dump: Add checking for writable and exectuable pages") has successfully identified code that leaves a page with W+X permissions. [ 3.245140] arm64/mm: Found insecure W+X mapping at address (____ptrval____)/0xffff000000d90000 [ 3.245771] WARNING: CPU: 0 PID: 1 at ../arch/arm64/mm/dump.c:232 note_page+0x410/0x420 [ 3.246141] Modules linked in: [ 3.246653] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.19.0-rc5-next-20180928-00001-ge70ae259b853-dirty #62 [ 3.247008] Hardware name: linux,dummy-virt (DT) [ 3.247347] pstate: 80000005 (Nzcv daif -PAN -UAO) [ 3.247623] pc : note_page+0x410/0x420 [ 3.247898] lr : note_page+0x410/0x420 [ 3.248071] sp : ffff00000804bcd0 [ 3.248254] x29: ffff00000804bcd0 x28: ffff000009274000 [ 3.248578] x27: ffff00000921a000 x26: ffff80007dfff000 [ 3.248845] x25: ffff0000093f5000 x24: ffff000009526f6a [ 3.249109] x23: 0000000000000004 x22: ffff000000d91000 [ 3.249396] x21: ffff000000d90000 x20: 0000000000000000 [ 3.249661] x19: ffff00000804bde8 x18: 0000000000000400 [ 3.249924] x17: 0000000000000000 x16: 0000000000000000 [ 3.250271] x15: ffffffffffffffff x14: 295f5f5f5f6c6176 [ 3.250594] x13: 7274705f5f5f5f28 x12: 2073736572646461 [ 3.250941] x11: 20746120676e6970 x10: 70616d20582b5720 [ 3.251252] x9 : 6572756365736e69 x8 : 3039643030303030 [ 3.251519] x7 : 306666666678302f x6 : ffff0000095467b2 [ 3.251802] x5 : 0000000000000000 x4 : 0000000000000000 [ 3.252060] x3 : 0000000000000000 x2 : ffffffffffffffff [ 3.252323] x1 : 4d151327adc50b00 x0 : 0000000000000000 [ 3.252664] Call trace: [ 3.252953] note_page+0x410/0x420 [ 3.253186] walk_pgd+0x12c/0x238 [ 3.253417] ptdump_check_wx+0x68/0xf8 [ 3.253637] mark_rodata_ro+0x68/0x98 [ 3.253847] kernel_init+0x38/0x160 [ 3.254103] ret_from_fork+0x10/0x18 kprobes allocates a writable executable page with module_alloc() in order to store executable code. Reworked to that when allocate a page it sets mode RO. Inspired by commit 63fef14fc98a ("kprobes/x86: Make insn buffer always ROX and use text_poke()"). Suggested-by: Arnd Bergmann Suggested-by: Ard Biesheuvel Acked-by: Will Deacon Acked-by: Masami Hiramatsu Reviewed-by: Laura Abbott Signed-off-by: Anders Roxell [catalin.marinas@arm.com: removed unnecessary casts] Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/kernel/probes/kprobes.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 22a5921562c7f..0417c929d21a0 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -23,7 +23,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -42,10 +44,21 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); static void __kprobes post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *); +static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode) +{ + void *addrs[1]; + u32 insns[1]; + + addrs[0] = addr; + insns[0] = opcode; + + return aarch64_insn_patch_text(addrs, insns, 1); +} + static void __kprobes arch_prepare_ss_slot(struct kprobe *p) { /* prepare insn slot */ - p->ainsn.api.insn[0] = cpu_to_le32(p->opcode); + patch_text(p->ainsn.api.insn, p->opcode); flush_icache_range((uintptr_t) (p->ainsn.api.insn), (uintptr_t) (p->ainsn.api.insn) + @@ -118,15 +131,15 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return 0; } -static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode) +void *alloc_insn_page(void) { - void *addrs[1]; - u32 insns[1]; + void *page; - addrs[0] = (void *)addr; - insns[0] = (u32)opcode; + page = vmalloc_exec(PAGE_SIZE); + if (page) + set_memory_ro((unsigned long)page, 1); - return aarch64_insn_patch_text(addrs, insns, 1); + return page; } /* arm kprobe: install breakpoint in text */ -- GitLab From f54c21ee5dcede4164e8924a525d801f2fe855fd Mon Sep 17 00:00:00 2001 From: Radoslaw Tyl Date: Mon, 22 Oct 2018 08:44:31 +0200 Subject: [PATCH 1631/2269] ixgbe: fix MAC anti-spoofing filter after VFLR [ Upstream commit 6702185c1ffec3421181b5e24491e3fac920cb61 ] This change resolves a driver bug where the driver is logging a message that says "Spoofed packets detected". This can occur on the PF (host) when a VF has VLAN+MACVLAN enabled and is re-started with a different MAC address. MAC and VLAN anti-spoofing filters are to be enabled together. Signed-off-by: Radoslaw Tyl Tested-by: Andrew Bowers Acked-by: Piotr Skajewski Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 112d24c6c9cea..4904a63b83ef1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -760,8 +760,10 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf) ixgbe_set_vmvir(adapter, vfinfo->pf_vlan, adapter->default_up, vf); - if (vfinfo->spoofchk_enabled) + if (vfinfo->spoofchk_enabled) { hw->mac.ops.set_vlan_anti_spoofing(hw, true, vf); + hw->mac.ops.set_mac_anti_spoofing(hw, true, vf); + } } /* reset multicast table array for vf */ -- GitLab From 195169e26eaa18da05f0b63cc4b7dbfa50d70752 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 30 Oct 2018 15:06:38 -0700 Subject: [PATCH 1632/2269] reiserfs: propagate errors from fill_with_dentries() properly [ Upstream commit b10298d56c9623f9b173f19959732d3184b35f4f ] fill_with_dentries() failed to propagate errors up to reiserfs_for_each_xattr() properly. Plumb them through. Note that reiserfs_for_each_xattr() is only used by reiserfs_delete_xattrs() and reiserfs_chown_xattrs(). The result of reiserfs_delete_xattrs() is discarded anyway, the only difference there is whether a warning is printed to dmesg. The result of reiserfs_chown_xattrs() does matter because it can block chowning of the file to which the xattrs belong; but either way, the resulting state can have misaligned ownership, so my patch doesn't improve things greatly. Credit for making me look at this code goes to Al Viro, who pointed out that the ->actor calling convention is suboptimal and should be changed. Link: http://lkml.kernel.org/r/20180802163335.83312-1-jannh@google.com Signed-off-by: Jann Horn Reviewed-by: Andrew Morton Cc: Jeff Mahoney Cc: Eric Biggers Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/reiserfs/xattr.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 505f87a8c724f..83423192588cd 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -185,6 +185,7 @@ struct reiserfs_dentry_buf { struct dir_context ctx; struct dentry *xadir; int count; + int err; struct dentry *dentries[8]; }; @@ -207,6 +208,7 @@ fill_with_dentries(struct dir_context *ctx, const char *name, int namelen, dentry = lookup_one_len(name, dbuf->xadir, namelen); if (IS_ERR(dentry)) { + dbuf->err = PTR_ERR(dentry); return PTR_ERR(dentry); } else if (d_really_is_negative(dentry)) { /* A directory entry exists, but no file? */ @@ -215,6 +217,7 @@ fill_with_dentries(struct dir_context *ctx, const char *name, int namelen, "not found for file %pd.\n", dentry, dbuf->xadir); dput(dentry); + dbuf->err = -EIO; return -EIO; } @@ -262,6 +265,10 @@ static int reiserfs_for_each_xattr(struct inode *inode, err = reiserfs_readdir_inode(d_inode(dir), &buf.ctx); if (err) break; + if (buf.err) { + err = buf.err; + break; + } if (!buf.count) break; for (i = 0; !err && i < buf.count && buf.dentries[i]; i++) { -- GitLab From dc50dd3ac68b092c0bcb05cbd8b604e8d517185a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ernesto=20A=2E=20Fern=C3=A1ndez?= Date: Tue, 30 Oct 2018 15:06:07 -0700 Subject: [PATCH 1633/2269] hfs: prevent btree data loss on root split MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d057c036672f33d43a5f7344acbb08cf3a8a0c09 ] This bug is triggered whenever hfs_brec_update_parent() needs to split the root node. The height of the btree is not increased, which leaves the new node orphaned and its records lost. It is not possible for this to happen on a valid hfs filesystem because the index nodes have fixed length keys. For reasons I ignore, the hfs module does have support for a number of hfsplus features. A corrupt btree header may report variable length keys and trigger this bug, so it's better to fix it. Link: http://lkml.kernel.org/r/9750b1415685c4adca10766895f6d5ef12babdb0.1535682463.git.ernesto.mnd.fernandez@gmail.com Signed-off-by: Ernesto A. Fernández Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/hfs/brec.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c index 9a8772465a907..da25c49203cc5 100644 --- a/fs/hfs/brec.c +++ b/fs/hfs/brec.c @@ -425,6 +425,10 @@ skip: if (new_node) { __be32 cnid; + if (!new_node->parent) { + hfs_btree_inc_height(tree); + new_node->parent = tree->root; + } fd->bnode = hfs_bnode_find(tree, new_node->parent); /* create index key and entry */ hfs_bnode_read_key(new_node, fd->search_key, 14); -- GitLab From f2e71fed903fa6441188f4ac8b0df6528711cae3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ernesto=20A=2E=20Fern=C3=A1ndez?= Date: Tue, 30 Oct 2018 15:06:00 -0700 Subject: [PATCH 1634/2269] hfsplus: prevent btree data loss on root split MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0a3021d4f5295aa073c7bf5c5e4de60a2e292578 ] Creating, renaming or deleting a file may cause catalog corruption and data loss. This bug is randomly triggered by xfstests generic/027, but here is a faster reproducer: truncate -s 50M fs.iso mkfs.hfsplus fs.iso mount fs.iso /mnt i=100 while [ $i -le 150 ]; do touch /mnt/$i &>/dev/null ((++i)) done i=100 while [ $i -le 150 ]; do mv /mnt/$i /mnt/$(perl -e "print $i x82") &>/dev/null ((++i)) done umount /mnt fsck.hfsplus -n fs.iso The bug is triggered whenever hfs_brec_update_parent() needs to split the root node. The height of the btree is not increased, which leaves the new node orphaned and its records lost. Link: http://lkml.kernel.org/r/26d882184fc43043a810114258f45277752186c7.1535682461.git.ernesto.mnd.fernandez@gmail.com Signed-off-by: Ernesto A. Fernández Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/hfsplus/brec.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c index 808f4d8c859c9..d3f36982f6858 100644 --- a/fs/hfsplus/brec.c +++ b/fs/hfsplus/brec.c @@ -428,6 +428,10 @@ skip: if (new_node) { __be32 cnid; + if (!new_node->parent) { + hfs_btree_inc_height(tree); + new_node->parent = tree->root; + } fd->bnode = hfs_bnode_find(tree, new_node->parent); /* create index key and entry */ hfs_bnode_read_key(new_node, fd->search_key, 14); -- GitLab From a134b05a43ae2327036e860fb10c74389b5bd971 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Fri, 15 Jun 2018 16:42:56 +0200 Subject: [PATCH 1635/2269] um: Give start_idle_thread() a return code [ Upstream commit 7ff1e34bbdc15acab823b1ee4240e94623d50ee8 ] Fixes: arch/um/os-Linux/skas/process.c:613:1: warning: control reaches end of non-void function [-Wreturn-type] longjmp() never returns but gcc still warns that the end of the function can be reached. Add a return code and debug aid to detect this impossible case. Signed-off-by: Richard Weinberger Signed-off-by: Sasha Levin --- arch/um/os-Linux/skas/process.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index c94c3bd70ccd7..df4a985716eba 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -610,6 +610,11 @@ int start_idle_thread(void *stack, jmp_buf *switch_buf) fatal_sigsegv(); } longjmp(*switch_buf, 1); + + /* unreachable */ + printk(UM_KERN_ERR "impossible long jump!"); + fatal_sigsegv(); + return 0; } void initial_thread_cb_skas(void (*proc)(void *), void *arg) -- GitLab From a3e69ef80467644e7970fdc39452f2247994fc3f Mon Sep 17 00:00:00 2001 From: "Lee, Shawn C" Date: Sun, 28 Oct 2018 22:49:33 -0700 Subject: [PATCH 1636/2269] drm/edid: Add 6 bpc quirk for BOE panel. [ Upstream commit 922dceff8dc1fb4dafc9af78139ba65671408103 ] BOE panel (ID: 0x0771) that reports "DFP 1.x compliant TMDS". But it's 6bpc panel only instead of 8 bpc. Add panel ID to edid quirk list and set 6 bpc as default to work around this issue. Cc: Jani Nikula Cc: Maarten Lankhorst Cc: Gustavo Padovan Cc: Cooper Chiou Signed-off-by: Lee, Shawn C > Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1540792173-7288-1-git-send-email-shawn.c.lee@intel.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_edid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index d1191ebed0720..ed01e3aae0e88 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -120,6 +120,9 @@ static const struct edid_quirk { /* SDC panel of Lenovo B50-80 reports 8 bpc, but is a 6 bpc panel */ { "SDC", 0x3652, EDID_QUIRK_FORCE_6BPC }, + /* BOE model 0x0771 reports 8 bpc, but is a 6 bpc panel */ + { "BOE", 0x0771, EDID_QUIRK_FORCE_6BPC }, + /* Belinea 10 15 55 */ { "MAX", 1516, EDID_QUIRK_PREFER_LARGE_60 }, { "MAX", 0x77e, EDID_QUIRK_PREFER_LARGE_60 }, -- GitLab From b36c78af0c4d210aa99b705c4d777adf6a44d67f Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Sat, 6 Oct 2018 12:21:13 +0530 Subject: [PATCH 1637/2269] platform/x86: intel_telemetry: report debugfs failure [ Upstream commit 8d98b1ef368feeb7720b8b9b6f3bd93f2ad892bc ] On some Goldmont based systems such as ASRock J3455M the BIOS may not enable the IPC1 device that provides access to the PMC and PUNIT. In such scenarios, the IOSS and PSS resources from the platform device can not be obtained and result in a invalid telemetry_plt_config which is an internal data structure that holds platform config and is maintained by the telemetry platform driver. This is also applicable to the platforms where the BIOS supports IPC1 device under debug configurations but IPC1 is disabled by user or the policy. This change allows user to know the reason for not seeing entries under /sys/kernel/debug/telemetry/* when there is no apparent failure at boot. Cc: Matt Turner Cc: Len Brown Cc: Souvik Kumar Chakravarty Cc: Kuppuswamy Sathyanarayanan Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=198779 Acked-by: Matt Turner Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Andy Shevchenko Signed-off-by: Sasha Levin --- drivers/platform/x86/intel_telemetry_debugfs.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel_telemetry_debugfs.c b/drivers/platform/x86/intel_telemetry_debugfs.c index d4fc42b4cbebf..401bdc7a9d94a 100644 --- a/drivers/platform/x86/intel_telemetry_debugfs.c +++ b/drivers/platform/x86/intel_telemetry_debugfs.c @@ -968,12 +968,16 @@ static int __init telemetry_debugfs_init(void) debugfs_conf = (struct telemetry_debugfs_conf *)id->driver_data; err = telemetry_pltconfig_valid(); - if (err < 0) + if (err < 0) { + pr_info("Invalid pltconfig, ensure IPC1 device is enabled in BIOS\n"); return -ENODEV; + } err = telemetry_debugfs_check_evts(); - if (err < 0) + if (err < 0) { + pr_info("telemetry_debugfs_check_evts failed\n"); return -EINVAL; + } register_pm_notifier(&pm_notifier); -- GitLab From 0333b8fc793bdb01b454a9c2ad3bf2c63a8cb530 Mon Sep 17 00:00:00 2001 From: Alan Tull Date: Thu, 18 Oct 2018 14:54:11 -0500 Subject: [PATCH 1638/2269] clk: fixed-rate: fix of_node_get-put imbalance [ Upstream commit 52091c256bdcad0d01e2852a63f19cd2cce6af96 ] When the fixed rate clock is created by devicetree, of_clk_add_provider is called. Add a call to of_clk_del_provider in the remove function to balance it out. Signed-off-by: Alan Tull Fixes: 435779fe1336 ("clk: fixed-rate: Convert into a module platform driver") Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/clk-fixed-rate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/clk-fixed-rate.c b/drivers/clk/clk-fixed-rate.c index b5c46b3f8764d..6d6475c32ee51 100644 --- a/drivers/clk/clk-fixed-rate.c +++ b/drivers/clk/clk-fixed-rate.c @@ -200,6 +200,7 @@ static int of_fixed_clk_remove(struct platform_device *pdev) { struct clk *clk = platform_get_drvdata(pdev); + of_clk_del_provider(pdev->dev.of_node); clk_unregister_fixed_rate(clk); return 0; -- GitLab From bff410ea7ec2541a38d353a9e012f7f4622e17c1 Mon Sep 17 00:00:00 2001 From: David Miller Date: Wed, 17 Oct 2018 12:08:59 -0700 Subject: [PATCH 1639/2269] perf symbols: Set PLT entry/header sizes properly on Sparc [ Upstream commit d6afa561e1471ccfdaf7191230c0c59a37e45a5b ] Using the sh_entsize for both values isn't correct. It happens to be correct on x86... For both 32-bit and 64-bit sparc, there are four PLT entries in the PLT section. Signed-off-by: David S. Miller Cc: Alexander Shishkin Cc: Alexis Berlemont Cc: David Tolnay Cc: Hanjun Guo Cc: Hemant Kumar Cc: Li Bin Cc: Masami Hiramatsu Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Cc: zhangmengting@huawei.com Fixes: b2f7605076d6 ("perf symbols: Fix plt entry calculation for ARM and AARCH64") Link: http://lkml.kernel.org/r/20181017.120859.2268840244308635255.davem@davemloft.net Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/symbol-elf.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 2de770511e705..8ad4296de98b0 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -338,7 +338,17 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * plt_entry_size = 16; break; - default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/sparc/xtensa need to be checked */ + case EM_SPARC: + plt_header_size = 48; + plt_entry_size = 12; + break; + + case EM_SPARCV9: + plt_header_size = 128; + plt_entry_size = 32; + break; + + default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/xtensa need to be checked */ plt_header_size = shdr_plt.sh_entsize; plt_entry_size = shdr_plt.sh_entsize; break; -- GitLab From a088fde602b8dc3183fb40fa2e706455243bd900 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Wed, 13 Jun 2018 12:05:13 +0800 Subject: [PATCH 1640/2269] fs/exofs: fix potential memory leak in mount option parsing [ Upstream commit 515f1867addaba49c1c6ac73abfaffbc192c1db4 ] There are some cases can cause memory leak when parsing option 'osdname'. Signed-off-by: Chengguang Xu Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- fs/exofs/super.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 819624cfc8da4..c9ec652e2fcd2 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -100,6 +100,7 @@ static int parse_options(char *options, struct exofs_mountopt *opts) token = match_token(p, tokens, args); switch (token) { case Opt_name: + kfree(opts->dev_name); opts->dev_name = match_strdup(&args[0]); if (unlikely(!opts->dev_name)) { EXOFS_ERR("Error allocating dev_name"); @@ -863,8 +864,10 @@ static struct dentry *exofs_mount(struct file_system_type *type, int ret; ret = parse_options(data, &opts); - if (ret) + if (ret) { + kfree(opts.dev_name); return ERR_PTR(ret); + } if (!opts.dev_name) opts.dev_name = dev_name; -- GitLab From e2a11843feaac0a4c7f6047ab433a059f0122603 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 24 Sep 2018 13:01:20 +0200 Subject: [PATCH 1641/2269] clk: samsung: exynos5420: Enable PERIS clocks for suspend [ Upstream commit b33228029d842269e17bba591609e83ed422005d ] Ensure that clocks for core SoC modules (including TZPC0..9 modules) are enabled for suspend/resume cycle. This fixes suspend/resume support on Exynos5422-based Odroid XU3/XU4 boards. Suggested-by: Joonyoung Shim Signed-off-by: Marek Szyprowski Signed-off-by: Sylwester Nawrocki Signed-off-by: Sasha Levin --- drivers/clk/samsung/clk-exynos5420.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/samsung/clk-exynos5420.c b/drivers/clk/samsung/clk-exynos5420.c index 25601967d1cd6..500a55415e900 100644 --- a/drivers/clk/samsung/clk-exynos5420.c +++ b/drivers/clk/samsung/clk-exynos5420.c @@ -280,6 +280,7 @@ static const struct samsung_clk_reg_dump exynos5420_set_clksrc[] = { { .offset = GATE_BUS_TOP, .value = 0xffffffff, }, { .offset = GATE_BUS_DISP1, .value = 0xffffffff, }, { .offset = GATE_IP_PERIC, .value = 0xffffffff, }, + { .offset = GATE_IP_PERIS, .value = 0xffffffff, }, }; static int exynos5420_clk_suspend(void) -- GitLab From d0a636aa44f9e7cbac2a90e191bc0ad840b10b9e Mon Sep 17 00:00:00 2001 From: Zubin Mithra Date: Thu, 27 Sep 2018 14:49:17 -0700 Subject: [PATCH 1642/2269] apparmor: Fix uninitialized value in aa_split_fqname [ Upstream commit 250f2da49cb8e582215a65c03f50e8ddf5cd119c ] Syzkaller reported a OOB-read with the stacktrace below. This occurs inside __aa_lookupn_ns as `n` is not initialized. `n` is obtained from aa_splitn_fqname. In cases where `name` is invalid, aa_splitn_fqname returns without initializing `ns_name` and `ns_len`. Fix this by always initializing `ns_name` and `ns_len`. __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c4/0x2b4 lib/dump_stack.c:113 print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412 __asan_report_load1_noabort+0x14/0x20 mm/kasan/report.c:430 memcmp+0xe3/0x160 lib/string.c:861 strnstr+0x4b/0x70 lib/string.c:934 __aa_lookupn_ns+0xc1/0x570 security/apparmor/policy_ns.c:209 aa_lookupn_ns+0x88/0x1e0 security/apparmor/policy_ns.c:240 aa_fqlookupn_profile+0x1b9/0x1010 security/apparmor/policy.c:468 fqlookupn_profile+0x80/0xc0 security/apparmor/label.c:1844 aa_label_strn_parse+0xa3a/0x1230 security/apparmor/label.c:1908 aa_label_parse+0x42/0x50 security/apparmor/label.c:1943 aa_change_profile+0x513/0x3510 security/apparmor/domain.c:1362 apparmor_setprocattr+0xaa4/0x1150 security/apparmor/lsm.c:658 security_setprocattr+0x66/0xc0 security/security.c:1298 proc_pid_attr_write+0x301/0x540 fs/proc/base.c:2555 __vfs_write+0x119/0x9f0 fs/read_write.c:485 vfs_write+0x1fc/0x560 fs/read_write.c:549 ksys_write+0x101/0x260 fs/read_write.c:598 __do_sys_write fs/read_write.c:610 [inline] __se_sys_write fs/read_write.c:607 [inline] __x64_sys_write+0x73/0xb0 fs/read_write.c:607 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: 3b0aaf5866bf ("apparmor: add lib fn to find the "split" for fqnames") Reported-by: syzbot+61e4b490d9d2da591b50@syzkaller.appspotmail.com Signed-off-by: Zubin Mithra Reviewed-by: Kees Cook Signed-off-by: John Johansen Signed-off-by: Sasha Levin --- security/apparmor/lib.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 08ca26bcca770..451654372a76a 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -90,10 +90,12 @@ const char *aa_splitn_fqname(const char *fqname, size_t n, const char **ns_name, const char *end = fqname + n; const char *name = skipn_spaces(fqname, n); - if (!name) - return NULL; *ns_name = NULL; *ns_len = 0; + + if (!name) + return NULL; + if (name[0] == ':') { char *split = strnchr(&name[1], end - &name[1], ':'); *ns_name = skipn_spaces(&name[1], end - &name[1]); -- GitLab From ad2e60ff51a7bbcaf87cf66875e3a28aaefbaedd Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 3 Oct 2018 00:49:21 +0800 Subject: [PATCH 1643/2269] x86/earlyprintk: Add a force option for pciserial device [ Upstream commit d2266bbfa9e3e32e3b642965088ca461bd24a94f ] The "pciserial" earlyprintk variant helps much on many modern x86 platforms, but unfortunately there are still some platforms with PCI UART devices which have the wrong PCI class code. In that case, the current class code check does not allow for them to be used for logging. Add a sub-option "force" which overrides the class code check and thus the use of such device can be enforced. [ bp: massage formulations. ] Suggested-by: Borislav Petkov Signed-off-by: Feng Tang Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: "Stuart R . Anderson" Cc: Bjorn Helgaas Cc: David Rientjes Cc: Feng Tang Cc: Frederic Weisbecker Cc: Greg Kroah-Hartman Cc: H Peter Anvin Cc: Ingo Molnar Cc: Jiri Kosina Cc: Jonathan Corbet Cc: Kai-Heng Feng Cc: Kate Stewart Cc: Konrad Rzeszutek Wilk Cc: Peter Zijlstra Cc: Philippe Ombredanne Cc: Thomas Gleixner Cc: Thymo van Beers Cc: alan@linux.intel.com Cc: linux-doc@vger.kernel.org Link: http://lkml.kernel.org/r/20181002164921.25833-1-feng.tang@intel.com Signed-off-by: Sasha Levin --- .../admin-guide/kernel-parameters.txt | 6 +++- arch/x86/kernel/early_printk.c | 29 ++++++++++++------- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 9841bad6f271f..99a08722124d6 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1011,7 +1011,7 @@ earlyprintk=serial[,0x...[,baudrate]] earlyprintk=ttySn[,baudrate] earlyprintk=dbgp[debugController#] - earlyprintk=pciserial,bus:device.function[,baudrate] + earlyprintk=pciserial[,force],bus:device.function[,baudrate] earlyprintk=xdbc[xhciController#] earlyprintk is useful when the kernel crashes before @@ -1043,6 +1043,10 @@ The sclp output can only be used on s390. + The optional "force" to "pciserial" enables use of a + PCI device even when its classcode is not of the + UART class. + edac_report= [HW,EDAC] Control how to report EDAC event Format: {"on" | "off" | "force"} on: enable EDAC to report H/W event. May be overridden diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 5e801c8c8ce7c..374a52fa52969 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -213,8 +213,9 @@ static unsigned int mem32_serial_in(unsigned long addr, int offset) * early_pci_serial_init() * * This function is invoked when the early_printk param starts with "pciserial" - * The rest of the param should be ",B:D.F,baud" where B, D & F describe the - * location of a PCI device that must be a UART device. + * The rest of the param should be "[force],B:D.F,baud", where B, D & F describe + * the location of a PCI device that must be a UART device. "force" is optional + * and overrides the use of an UART device with a wrong PCI class code. */ static __init void early_pci_serial_init(char *s) { @@ -224,17 +225,23 @@ static __init void early_pci_serial_init(char *s) u32 classcode, bar0; u16 cmdreg; char *e; + int force = 0; - - /* - * First, part the param to get the BDF values - */ if (*s == ',') ++s; if (*s == 0) return; + /* Force the use of an UART device with wrong class code */ + if (!strncmp(s, "force,", 6)) { + force = 1; + s += 6; + } + + /* + * Part the param to get the BDF values + */ bus = (u8)simple_strtoul(s, &e, 16); s = e; if (*s != ':') @@ -253,7 +260,7 @@ static __init void early_pci_serial_init(char *s) s++; /* - * Second, find the device from the BDF + * Find the device from the BDF */ cmdreg = read_pci_config(bus, slot, func, PCI_COMMAND); classcode = read_pci_config(bus, slot, func, PCI_CLASS_REVISION); @@ -264,8 +271,10 @@ static __init void early_pci_serial_init(char *s) */ if (((classcode >> 16 != PCI_CLASS_COMMUNICATION_MODEM) && (classcode >> 16 != PCI_CLASS_COMMUNICATION_SERIAL)) || - (((classcode >> 8) & 0xff) != 0x02)) /* 16550 I/F at BAR0 */ - return; + (((classcode >> 8) & 0xff) != 0x02)) /* 16550 I/F at BAR0 */ { + if (!force) + return; + } /* * Determine if it is IO or memory mapped @@ -289,7 +298,7 @@ static __init void early_pci_serial_init(char *s) } /* - * Lastly, initialize the hardware + * Initialize the hardware */ if (*s) { if (strcmp(s, "nocfg") == 0) -- GitLab From 4f9ffc2711d0d6651b67fa68ab75feec7201ecff Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Thu, 20 Sep 2018 21:44:19 -0400 Subject: [PATCH 1644/2269] platform/x86: acerhdf: Add BIOS entry for Gateway LT31 v1.3307 [ Upstream commit 684238d79ad85c5e19a71bb5818e77e329912fbc ] To fix: acerhdf: unknown (unsupported) BIOS version Gateway /LT31 /v1.3307 , please report, aborting! As can be seen in the context, the BIOS registers haven't changed in the previous versions, so the assumption is they won't have changed in this last update for this somewhat older platform either. Cc: Peter Feuerer Cc: Darren Hart Cc: Andy Shevchenko Signed-off-by: Paul Gortmaker Signed-off-by: Andy Shevchenko Reviewed-by: Peter Feuerer Signed-off-by: Sasha Levin --- drivers/platform/x86/acerhdf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c index ea22591ee66fe..53dfe67807e39 100644 --- a/drivers/platform/x86/acerhdf.c +++ b/drivers/platform/x86/acerhdf.c @@ -233,6 +233,7 @@ static const struct bios_settings bios_tbl[] = { {"Gateway", "LT31", "v1.3201", 0x55, 0x58, {0x9e, 0x00}, 0}, {"Gateway", "LT31", "v1.3302", 0x55, 0x58, {0x9e, 0x00}, 0}, {"Gateway", "LT31", "v1.3303t", 0x55, 0x58, {0x9e, 0x00}, 0}, + {"Gateway", "LT31", "v1.3307", 0x55, 0x58, {0x9e, 0x00}, 0}, /* Packard Bell */ {"Packard Bell", "DOA150", "v0.3104", 0x55, 0x58, {0x21, 0x00}, 0}, {"Packard Bell", "DOA150", "v0.3105", 0x55, 0x58, {0x20, 0x00}, 0}, -- GitLab From 10c51fa85c9879f7d90b184f940e2e16e5e3b330 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 25 Sep 2018 12:44:59 -0700 Subject: [PATCH 1645/2269] arm64: percpu: Initialize ret in the default case [ Upstream commit b5bb425871186303e6936fa2581521bdd1964a58 ] Clang warns that if the default case is taken, ret will be uninitialized. ./arch/arm64/include/asm/percpu.h:196:2: warning: variable 'ret' is used uninitialized whenever switch default is taken [-Wsometimes-uninitialized] default: ^~~~~~~ ./arch/arm64/include/asm/percpu.h:200:9: note: uninitialized use occurs here return ret; ^~~ ./arch/arm64/include/asm/percpu.h:157:19: note: initialize the variable 'ret' to silence this warning unsigned long ret, loop; ^ = 0 This warning appears several times while building the erofs filesystem. While it's not strictly wrong, the BUILD_BUG will prevent this from becoming a true problem. Initialize ret to 0 in the default case right before the BUILD_BUG to silence all of these warnings. Reported-by: Prasad Sodagudi Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Dennis Zhou Signed-off-by: Sasha Levin --- arch/arm64/include/asm/percpu.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 43393208229eb..d79eaa816f294 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -93,6 +93,7 @@ static inline unsigned long __percpu_##op(void *ptr, \ : [val] "Ir" (val)); \ break; \ default: \ + ret = 0; \ BUILD_BUG(); \ } \ \ @@ -122,6 +123,7 @@ static inline unsigned long __percpu_read(void *ptr, int size) ret = READ_ONCE(*(u64 *)ptr); break; default: + ret = 0; BUILD_BUG(); } @@ -191,6 +193,7 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, : [val] "r" (val)); break; default: + ret = 0; BUILD_BUG(); } -- GitLab From 221609ce4ce547d2fbad0e7ecd68d66e12f358ec Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 19 Oct 2018 15:37:01 +0200 Subject: [PATCH 1646/2269] s390/vdso: add missing FORCE to build targets [ Upstream commit b44b136a3773d8a9c7853f8df716bd1483613cbb ] According to Documentation/kbuild/makefiles.txt all build targets using if_changed should use FORCE as well. Add missing FORCE to make sure vdso targets are rebuild properly when not just immediate prerequisites have changed but also when build command differs. Reviewed-by: Philipp Rudo Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin --- arch/s390/kernel/vdso32/Makefile | 6 +++--- arch/s390/kernel/vdso64/Makefile | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index 308564b9bf686..101cadabfc89e 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -33,7 +33,7 @@ UBSAN_SANITIZE := n $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so # link rule for the .so file, .lds has to be first -$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) +$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE $(call if_changed,vdso32ld) # strip rule for the .so file @@ -42,12 +42,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) # assembly rules for the .S files -$(obj-vdso32): %.o: %.S +$(obj-vdso32): %.o: %.S FORCE $(call if_changed_dep,vdso32as) # actual build commands quiet_cmd_vdso32ld = VDSO32L $@ - cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ + cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@ quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $< diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index f81ae79988839..36bbafcf4a770 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -33,7 +33,7 @@ UBSAN_SANITIZE := n $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so # link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) +$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE $(call if_changed,vdso64ld) # strip rule for the .so file @@ -42,12 +42,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) # assembly rules for the .S files -$(obj-vdso64): %.o: %.S +$(obj-vdso64): %.o: %.S FORCE $(call if_changed_dep,vdso64as) # actual build commands quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ + cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@ quiet_cmd_vdso64as = VDSO64A $@ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< -- GitLab From a1e0ae82cfd5f05973a6459ec17ed61df5cf0f9f Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Sat, 14 Jul 2018 21:59:43 +0200 Subject: [PATCH 1647/2269] netfilter: ipset: list:set: Decrease refcount synchronously on deletion and replace [ Upstream commit 439cd39ea136d2c026805264d58a91f36b6b64ca ] Commit 45040978c899 ("netfilter: ipset: Fix set:list type crash when flush/dump set in parallel") postponed decreasing set reference counters to the RCU callback. An 'ipset del' command can terminate before the RCU grace period is elapsed, and if sets are listed before then, the reference counter shown in userspace will be wrong: # ipset create h hash:ip; ipset create l list:set; ipset add l # ipset del l h; ipset list h Name: h Type: hash:ip Revision: 4 Header: family inet hashsize 1024 maxelem 65536 Size in memory: 88 References: 1 Number of entries: 0 Members: # sleep 1; ipset list h Name: h Type: hash:ip Revision: 4 Header: family inet hashsize 1024 maxelem 65536 Size in memory: 88 References: 0 Number of entries: 0 Members: Fix this by making the reference count update synchronous again. As a result, when sets are listed, ip_set_name_byindex() might now fetch a set whose reference count is already zero. Instead of relying on the reference count to protect against concurrent set renaming, grab ip_set_ref_lock as reader and copy the name, while holding the same lock in ip_set_rename() as writer instead. Reported-by: Li Shuang Fixes: 45040978c899 ("netfilter: ipset: Fix set:list type crash when flush/dump set in parallel") Signed-off-by: Stefano Brivio Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/linux/netfilter/ipset/ip_set.h | 2 +- net/netfilter/ipset/ip_set_core.c | 23 +++++++++++------------ net/netfilter/ipset/ip_set_list_set.c | 17 +++++++++++------ 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 8e42253e5d4de..91a533bd3eb19 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -312,7 +312,7 @@ enum { extern ip_set_id_t ip_set_get_byname(struct net *net, const char *name, struct ip_set **set); extern void ip_set_put_byindex(struct net *net, ip_set_id_t index); -extern const char *ip_set_name_byindex(struct net *net, ip_set_id_t index); +extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name); extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index); extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 9d2ce1459cec4..a3f1dc7cf5382 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -668,21 +668,20 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index) EXPORT_SYMBOL_GPL(ip_set_put_byindex); /* Get the name of a set behind a set index. - * We assume the set is referenced, so it does exist and - * can't be destroyed. The set cannot be renamed due to - * the referencing either. - * + * Set itself is protected by RCU, but its name isn't: to protect against + * renaming, grab ip_set_ref_lock as reader (see ip_set_rename()) and copy the + * name. */ -const char * -ip_set_name_byindex(struct net *net, ip_set_id_t index) +void +ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name) { - const struct ip_set *set = ip_set_rcu_get(net, index); + struct ip_set *set = ip_set_rcu_get(net, index); BUG_ON(!set); - BUG_ON(set->ref == 0); - /* Referenced, so it's safe */ - return set->name; + read_lock_bh(&ip_set_ref_lock); + strncpy(name, set->name, IPSET_MAXNAMELEN); + read_unlock_bh(&ip_set_ref_lock); } EXPORT_SYMBOL_GPL(ip_set_name_byindex); @@ -1128,7 +1127,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl, if (!set) return -ENOENT; - read_lock_bh(&ip_set_ref_lock); + write_lock_bh(&ip_set_ref_lock); if (set->ref != 0) { ret = -IPSET_ERR_REFERENCED; goto out; @@ -1145,7 +1144,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl, strncpy(set->name, name2, IPSET_MAXNAMELEN); out: - read_unlock_bh(&ip_set_ref_lock); + write_unlock_bh(&ip_set_ref_lock); return ret; } diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 178d4eba013b4..75d52aed6fdb3 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -156,9 +156,7 @@ __list_set_del_rcu(struct rcu_head * rcu) { struct set_elem *e = container_of(rcu, struct set_elem, rcu); struct ip_set *set = e->set; - struct list_set *map = set->data; - ip_set_put_byindex(map->net, e->id); ip_set_ext_destroy(set, e); kfree(e); } @@ -166,15 +164,21 @@ __list_set_del_rcu(struct rcu_head * rcu) static inline void list_set_del(struct ip_set *set, struct set_elem *e) { + struct list_set *map = set->data; + set->elements--; list_del_rcu(&e->list); + ip_set_put_byindex(map->net, e->id); call_rcu(&e->rcu, __list_set_del_rcu); } static inline void -list_set_replace(struct set_elem *e, struct set_elem *old) +list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old) { + struct list_set *map = set->data; + list_replace_rcu(&old->list, &e->list); + ip_set_put_byindex(map->net, old->id); call_rcu(&old->rcu, __list_set_del_rcu); } @@ -306,7 +310,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, INIT_LIST_HEAD(&e->list); list_set_init_extensions(set, ext, e); if (n) - list_set_replace(e, n); + list_set_replace(set, e, n); else if (next) list_add_tail_rcu(&e->list, &next->list); else if (prev) @@ -497,6 +501,7 @@ list_set_list(const struct ip_set *set, const struct list_set *map = set->data; struct nlattr *atd, *nested; u32 i = 0, first = cb->args[IPSET_CB_ARG0]; + char name[IPSET_MAXNAMELEN]; struct set_elem *e; int ret = 0; @@ -515,8 +520,8 @@ list_set_list(const struct ip_set *set, nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; - if (nla_put_string(skb, IPSET_ATTR_NAME, - ip_set_name_byindex(map->net, e->id))) + ip_set_name_byindex(map->net, e->id, name); + if (nla_put_string(skb, IPSET_ATTR_NAME, name)) goto nla_put_failure; if (ip_set_put_extensions(skb, set, e, true)) goto nla_put_failure; -- GitLab From 0d5d0b5c41f766355f2b42c47d13ea001f754c7d Mon Sep 17 00:00:00 2001 From: Eric Westbrook Date: Tue, 28 Aug 2018 15:14:42 -0600 Subject: [PATCH 1648/2269] netfilter: ipset: actually allow allowable CIDR 0 in hash:net,port,net [ Upstream commit 886503f34d63e681662057448819edb5b1057a97 ] Allow /0 as advertised for hash:net,port,net sets. For "hash:net,port,net", ipset(8) says that "either subnet is permitted to be a /0 should you wish to match port between all destinations." Make that statement true. Before: # ipset create cidrzero hash:net,port,net # ipset add cidrzero 0.0.0.0/0,12345,0.0.0.0/0 ipset v6.34: The value of the CIDR parameter of the IP address is invalid # ipset create cidrzero6 hash:net,port,net family inet6 # ipset add cidrzero6 ::/0,12345,::/0 ipset v6.34: The value of the CIDR parameter of the IP address is invalid After: # ipset create cidrzero hash:net,port,net # ipset add cidrzero 0.0.0.0/0,12345,0.0.0.0/0 # ipset test cidrzero 192.168.205.129,12345,172.16.205.129 192.168.205.129,tcp:12345,172.16.205.129 is in set cidrzero. # ipset create cidrzero6 hash:net,port,net family inet6 # ipset add cidrzero6 ::/0,12345,::/0 # ipset test cidrzero6 fe80::1,12345,ff00::1 fe80::1,tcp:12345,ff00::1 is in set cidrzero6. See also: https://bugzilla.kernel.org/show_bug.cgi?id=200897 https://github.com/ewestbrook/linux/commit/df7ff6efb0934ab6acc11f003ff1a7580d6c1d9c Signed-off-by: Eric Westbrook Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipset/ip_set_hash_netportnet.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 8602f2595a1a1..0e6e40c6f6520 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -213,13 +213,13 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CIDR]) { e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (!e.cidr[0] || e.cidr[0] > HOST_MASK) + if (e.cidr[0] > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } if (tb[IPSET_ATTR_CIDR2]) { e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]); - if (!e.cidr[1] || e.cidr[1] > HOST_MASK) + if (e.cidr[1] > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } @@ -492,13 +492,13 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CIDR]) { e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (!e.cidr[0] || e.cidr[0] > HOST_MASK) + if (e.cidr[0] > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } if (tb[IPSET_ATTR_CIDR2]) { e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]); - if (!e.cidr[1] || e.cidr[1] > HOST_MASK) + if (e.cidr[1] > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } -- GitLab From 28bd72a7262a2256d4e2fa35b8bba47cb43e8142 Mon Sep 17 00:00:00 2001 From: "Justin M. Forbes" Date: Wed, 31 Oct 2018 13:02:03 -0500 Subject: [PATCH 1649/2269] s390/mm: Fix ERROR: "__node_distance" undefined! [ Upstream commit a541f0ebcc08ed8bc0cc492eec9a86cb280a9f24 ] Fixes: ERROR: "__node_distance" [drivers/nvme/host/nvme-core.ko] undefined! make[1]: *** [scripts/Makefile.modpost:92: __modpost] Error 1 make: *** [Makefile:1275: modules] Error 2 + exit 1 Signed-off-by: Justin M. Forbes Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin --- arch/s390/numa/numa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c index 5bd374491f946..6c151b42e65db 100644 --- a/arch/s390/numa/numa.c +++ b/arch/s390/numa/numa.c @@ -54,6 +54,7 @@ int __node_distance(int a, int b) { return mode->distance ? mode->distance(a, b) : 0; } +EXPORT_SYMBOL(__node_distance); int numa_debug_enabled; -- GitLab From 79f2eb5fde82c5264cb51cf52c1c24bd3cef6b47 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 19 Oct 2018 19:35:19 +0200 Subject: [PATCH 1650/2269] netfilter: ipset: Correct rcu_dereference() call in ip_set_put_comment() [ Upstream commit 17b8b74c0f8dbf9b9e3301f9ca5b65dd1c079951 ] The function is called when rcu_read_lock() is held and not when rcu_read_lock_bh() is held. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/linux/netfilter/ipset/ip_set_comment.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h index 8e2bab1e8e909..70877f8de7e91 100644 --- a/include/linux/netfilter/ipset/ip_set_comment.h +++ b/include/linux/netfilter/ipset/ip_set_comment.h @@ -43,11 +43,11 @@ ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment, rcu_assign_pointer(comment->c, c); } -/* Used only when dumping a set, protected by rcu_read_lock_bh() */ +/* Used only when dumping a set, protected by rcu_read_lock() */ static inline int ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment) { - struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c); + struct ip_set_comment_rcu *c = rcu_dereference(comment->c); if (!c) return 0; -- GitLab From fdf4e678ea193ec9c2e099509994a118260f0d8c Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 21 Oct 2018 00:00:08 +0900 Subject: [PATCH 1651/2269] netfilter: xt_IDLETIMER: add sysfs filename checking routine [ Upstream commit 54451f60c8fa061af9051a53be9786393947367c ] When IDLETIMER rule is added, sysfs file is created under /sys/class/xt_idletimer/timers/ But some label name shouldn't be used. ".", "..", "power", "uevent", "subsystem", etc... So that sysfs filename checking routine is needed. test commands: %iptables -I INPUT -j IDLETIMER --timeout 1 --label "power" splat looks like: [95765.423132] sysfs: cannot create duplicate filename '/devices/virtual/xt_idletimer/timers/power' [95765.433418] CPU: 0 PID: 8446 Comm: iptables Not tainted 4.19.0-rc6+ #20 [95765.449755] Call Trace: [95765.449755] dump_stack+0xc9/0x16b [95765.449755] ? show_regs_print_info+0x5/0x5 [95765.449755] sysfs_warn_dup+0x74/0x90 [95765.449755] sysfs_add_file_mode_ns+0x352/0x500 [95765.449755] sysfs_create_file_ns+0x179/0x270 [95765.449755] ? sysfs_add_file_mode_ns+0x500/0x500 [95765.449755] ? idletimer_tg_checkentry+0x3e5/0xb1b [xt_IDLETIMER] [95765.449755] ? rcu_read_lock_sched_held+0x114/0x130 [95765.449755] ? __kmalloc_track_caller+0x211/0x2b0 [95765.449755] ? memcpy+0x34/0x50 [95765.449755] idletimer_tg_checkentry+0x4e2/0xb1b [xt_IDLETIMER] [ ... ] Fixes: 0902b469bd25 ("netfilter: xtables: idletimer target implementation") Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/xt_IDLETIMER.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 1141f08810b6f..3fef8c2e545df 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -116,6 +116,22 @@ static void idletimer_tg_expired(unsigned long data) schedule_work(&timer->work); } +static int idletimer_check_sysfs_name(const char *name, unsigned int size) +{ + int ret; + + ret = xt_check_proc_name(name, size); + if (ret < 0) + return ret; + + if (!strcmp(name, "power") || + !strcmp(name, "subsystem") || + !strcmp(name, "uevent")) + return -EINVAL; + + return 0; +} + static int idletimer_tg_create(struct idletimer_tg_info *info) { int ret; @@ -126,6 +142,10 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) goto out; } + ret = idletimer_check_sysfs_name(info->label, sizeof(info->label)); + if (ret < 0) + goto out_free_timer; + sysfs_attr_init(&info->timer->attr.attr); info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL); if (!info->timer->attr.attr.name) { -- GitLab From 8cab58117e72caa2234323ce399f77037fe4dea2 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 2 Nov 2018 19:04:09 +0100 Subject: [PATCH 1652/2269] s390/qeth: fix HiperSockets sniffer [ Upstream commit bd74a7f9cc033cf4d405788f80292268987dc0c5 ] Sniffing mode for L3 HiperSockets requires that no IP addresses are registered with the HW. The preferred way to achieve this is for userspace to delete all the IPs on the interface. But qeth is expected to also tolerate a configuration where that is not the case, by skipping the IP registration when in sniffer mode. Since commit 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") reworked the IP registration logic in the L3 subdriver, this no longer works. When the qeth device is set online, qeth_l3_recover_ip() now unconditionally registers all unicast addresses from our internal IP table. While we could fix this particular problem by skipping qeth_l3_recover_ip() on a sniffer device, the more future-proof change is to skip the IP address registration at the lowest level. This way we a) catch any future code path that attempts to register an IP address without considering the sniffer scenario, and b) continue to build up our internal IP table, so that if sniffer mode is switched off later we can operate just like normal. Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/s390/net/qeth_l3_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index cd73172bff477..a19f2dc69e8aa 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -358,9 +358,6 @@ static void qeth_l3_clear_ip_htable(struct qeth_card *card, int recover) QETH_CARD_TEXT(card, 4, "clearip"); - if (recover && card->options.sniffer) - return; - spin_lock_bh(&card->ip_lock); hash_for_each_safe(card->ip_htable, i, tmp, addr, hnode) { @@ -818,6 +815,8 @@ static int qeth_l3_register_addr_entry(struct qeth_card *card, int rc = 0; int cnt = 3; + if (card->options.sniffer) + return 0; if (addr->proto == QETH_PROT_IPV4) { QETH_CARD_TEXT(card, 2, "setaddr4"); @@ -853,6 +852,9 @@ static int qeth_l3_deregister_addr_entry(struct qeth_card *card, { int rc = 0; + if (card->options.sniffer) + return 0; + if (addr->proto == QETH_PROT_IPV4) { QETH_CARD_TEXT(card, 2, "deladdr4"); QETH_CARD_HEX(card, 3, &addr->u.a4.addr, sizeof(int)); -- GitLab From 21eb778e7dd55c4a690b662d988798a2eb988c1d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 28 Oct 2018 18:16:51 +0100 Subject: [PATCH 1653/2269] hwmon: (ibmpowernv) Remove bogus __init annotations [ Upstream commit e3e61f01d755188cb6c2dcf5a244b9c0937c258e ] If gcc decides not to inline make_sensor_label(): WARNING: vmlinux.o(.text+0x4df549c): Section mismatch in reference from the function .create_device_attrs() to the function .init.text:.make_sensor_label() The function .create_device_attrs() references the function __init .make_sensor_label(). This is often because .create_device_attrs lacks a __init annotation or the annotation of .make_sensor_label is wrong. As .probe() can be called after freeing of __init memory, all __init annotiations in the driver are bogus, and should be removed. Signed-off-by: Geert Uytterhoeven Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/ibmpowernv.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/hwmon/ibmpowernv.c b/drivers/hwmon/ibmpowernv.c index 5ccdd0b526506..b38f4951c94e4 100644 --- a/drivers/hwmon/ibmpowernv.c +++ b/drivers/hwmon/ibmpowernv.c @@ -126,7 +126,7 @@ static ssize_t show_label(struct device *dev, struct device_attribute *devattr, return sprintf(buf, "%s\n", sdata->label); } -static int __init get_logical_cpu(int hwcpu) +static int get_logical_cpu(int hwcpu) { int cpu; @@ -137,9 +137,8 @@ static int __init get_logical_cpu(int hwcpu) return -ENOENT; } -static void __init make_sensor_label(struct device_node *np, - struct sensor_data *sdata, - const char *label) +static void make_sensor_label(struct device_node *np, + struct sensor_data *sdata, const char *label) { u32 id; size_t n; -- GitLab From c6b1ef1110ce90b66dbd957b706d58fd1ce4f4b4 Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Fri, 5 Oct 2018 11:50:20 +0900 Subject: [PATCH 1654/2269] Revert "drm/exynos/decon5433: implement frame counter" [ Upstream commit 6ca469e22a30992b4478d2ab88737c70667c1e00 ] This reverts commit 0586feba322e1de05075700eb4b835c8b683e62b This patch makes it to need get_vblank_counter callback in crtc to get frame counter from decon driver. However, drm_dev->max_vblank_count is a member unique to vendor's DRM driver but in case of ARM DRM, some CRTC devices don't provide the frame counter value. As a result, this patch made extension and clone mode not working. Instead of this patch, we may need separated max_vblank_count which belongs to each CRTC device, or need to implement frame counter emulation for them who don't support HW frame counter. Signed-off-by: Inki Dae Signed-off-by: Sasha Levin --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 9 --------- drivers/gpu/drm/exynos/exynos_drm_crtc.c | 11 ----------- drivers/gpu/drm/exynos/exynos_drm_drv.h | 1 - 3 files changed, 21 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index f905c214fdd0e..5a5b3535411f6 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -160,13 +160,6 @@ static u32 decon_get_frame_count(struct decon_context *ctx, bool end) return frm; } -static u32 decon_get_vblank_counter(struct exynos_drm_crtc *crtc) -{ - struct decon_context *ctx = crtc->ctx; - - return decon_get_frame_count(ctx, false); -} - static void decon_setup_trigger(struct decon_context *ctx) { if (!ctx->crtc->i80_mode && !(ctx->out_type & I80_HW_TRG)) @@ -532,7 +525,6 @@ static const struct exynos_drm_crtc_ops decon_crtc_ops = { .disable = decon_disable, .enable_vblank = decon_enable_vblank, .disable_vblank = decon_disable_vblank, - .get_vblank_counter = decon_get_vblank_counter, .atomic_begin = decon_atomic_begin, .update_plane = decon_update_plane, .disable_plane = decon_disable_plane, @@ -550,7 +542,6 @@ static int decon_bind(struct device *dev, struct device *master, void *data) int ret; ctx->drm_dev = drm_dev; - drm_dev->max_vblank_count = 0xffffffff; for (win = ctx->first_win; win < WINDOWS_NR; win++) { int tmp = (win == ctx->first_win) ? 0 : win; diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c index 6ce0821590dfc..4787560bf93e7 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c @@ -147,16 +147,6 @@ static void exynos_drm_crtc_disable_vblank(struct drm_crtc *crtc) exynos_crtc->ops->disable_vblank(exynos_crtc); } -static u32 exynos_drm_crtc_get_vblank_counter(struct drm_crtc *crtc) -{ - struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc); - - if (exynos_crtc->ops->get_vblank_counter) - return exynos_crtc->ops->get_vblank_counter(exynos_crtc); - - return 0; -} - static const struct drm_crtc_funcs exynos_crtc_funcs = { .set_config = drm_atomic_helper_set_config, .page_flip = drm_atomic_helper_page_flip, @@ -166,7 +156,6 @@ static const struct drm_crtc_funcs exynos_crtc_funcs = { .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, .enable_vblank = exynos_drm_crtc_enable_vblank, .disable_vblank = exynos_drm_crtc_disable_vblank, - .get_vblank_counter = exynos_drm_crtc_get_vblank_counter, }; struct exynos_drm_crtc *exynos_drm_crtc_create(struct drm_device *drm_dev, diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h index f8bae4cb48236..d228b5148dbc8 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.h +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h @@ -133,7 +133,6 @@ struct exynos_drm_crtc_ops { void (*disable)(struct exynos_drm_crtc *crtc); int (*enable_vblank)(struct exynos_drm_crtc *crtc); void (*disable_vblank)(struct exynos_drm_crtc *crtc); - u32 (*get_vblank_counter)(struct exynos_drm_crtc *crtc); enum drm_mode_status (*mode_valid)(struct exynos_drm_crtc *crtc, const struct drm_display_mode *mode); int (*atomic_check)(struct exynos_drm_crtc *crtc, -- GitLab From 35b3e230a2b5dc9068a8779f1b77754b9c3edfde Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Thu, 1 Nov 2018 14:15:49 +0100 Subject: [PATCH 1655/2269] clk: fixed-factor: fix of_node_get-put imbalance [ Upstream commit f98e8a572bddbf27032114127d2fcc78fa5e6a9d ] When the fixed factor clock is created by devicetree, of_clk_add_provider is called. Add a call to of_clk_del_provider in the remove function to balance it out. Reported-by: Alan Tull Fixes: 971451b3b15d ("clk: fixed-factor: Convert into a module platform driver") Signed-off-by: Ricardo Ribalda Delgado Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/clk-fixed-factor.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/clk-fixed-factor.c b/drivers/clk/clk-fixed-factor.c index 20724abd38bd1..7df6b5b1e7ee0 100644 --- a/drivers/clk/clk-fixed-factor.c +++ b/drivers/clk/clk-fixed-factor.c @@ -210,6 +210,7 @@ static int of_fixed_factor_clk_remove(struct platform_device *pdev) { struct clk *clk = platform_get_drvdata(pdev); + of_clk_del_provider(pdev->dev.of_node); clk_unregister_fixed_factor(clk); return 0; -- GitLab From 615a30e8dbc9c60bb35f3687072fb077160ea0b7 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Mon, 5 Nov 2018 18:14:41 -0600 Subject: [PATCH 1656/2269] lib/raid6: Fix arm64 test build [ Upstream commit 313a06e636808387822af24c507cba92703568b1 ] The lib/raid6/test fails to build the neon objects on arm64 because the correct machine type is 'aarch64'. Once this is correctly enabled, the neon recovery objects need to be added to the build. Reviewed-by: Ard Biesheuvel Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- lib/raid6/test/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index be1010bdc4358..565a77220fae1 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -27,7 +27,7 @@ ifeq ($(ARCH),arm) CFLAGS += -I../../../arch/arm/include -mfpu=neon HAS_NEON = yes endif -ifeq ($(ARCH),arm64) +ifeq ($(ARCH),aarch64) CFLAGS += -I../../../arch/arm64/include HAS_NEON = yes endif @@ -41,7 +41,7 @@ ifeq ($(IS_X86),yes) gcc -c -x assembler - >&/dev/null && \ rm ./-.o && echo -DCONFIG_AS_AVX512=1) else ifeq ($(HAS_NEON),yes) - OBJS += neon.o neon1.o neon2.o neon4.o neon8.o + OBJS += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 else HAS_ALTIVEC := $(shell printf '\#include \nvector int a;\n' |\ -- GitLab From 850e4ca7868046e13b66cb0d656398960efc4162 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 29 Oct 2018 08:11:33 +0000 Subject: [PATCH 1657/2269] s390/perf: Change CPUM_CF return code in event init function [ Upstream commit 0bb2ae1b26e1fb7543ec7474cdd374ac4b88c4da ] The function perf_init_event() creates a new event and assignes it to a PMU. This a done in a loop over all existing PMUs. For each listed PMU the event init function is called and if this function does return any other error than -ENOENT, the loop is terminated the creation of the event fails. If the event is invalid, return -ENOENT to try other PMUs. Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin --- arch/s390/kernel/perf_cpum_cf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 746d034233336..61e91fee8467c 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -376,7 +376,7 @@ static int __hw_perf_event_init(struct perf_event *event) return -ENOENT; if (ev > PERF_CPUM_CF_MAX_CTR) - return -EINVAL; + return -ENOENT; /* Obtain the counter set to which the specified counter belongs */ set = get_counter_set(ev); -- GitLab From ef8d2a5d990af5d658aafd37b41e29782b690bff Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 23 Oct 2018 14:37:31 +0100 Subject: [PATCH 1658/2269] sched/core: Take the hotplug lock in sched_init_smp() [ Upstream commit 40fa3780bac2b654edf23f6b13f4e2dd550aea10 ] When running on linux-next (8c60c36d0b8c ("Add linux-next specific files for 20181019")) + CONFIG_PROVE_LOCKING=y on a big.LITTLE system (e.g. Juno or HiKey960), we get the following report: [ 0.748225] Call trace: [ 0.750685] lockdep_assert_cpus_held+0x30/0x40 [ 0.755236] static_key_enable_cpuslocked+0x20/0xc8 [ 0.760137] build_sched_domains+0x1034/0x1108 [ 0.764601] sched_init_domains+0x68/0x90 [ 0.768628] sched_init_smp+0x30/0x80 [ 0.772309] kernel_init_freeable+0x278/0x51c [ 0.776685] kernel_init+0x10/0x108 [ 0.780190] ret_from_fork+0x10/0x18 The static_key in question is 'sched_asym_cpucapacity' introduced by commit: df054e8445a4 ("sched/topology: Add static_key for asymmetric CPU capacity optimizations") In this particular case, we enable it because smp_prepare_cpus() will end up fetching the capacity-dmips-mhz entry from the devicetree, so we already have some asymmetry detected when entering sched_init_smp(). This didn't get detected in tip/sched/core because we were missing: commit cb538267ea1e ("jump_label/lockdep: Assert we hold the hotplug lock for _cpuslocked() operations") Calls to build_sched_domains() post sched_init_smp() will hold the hotplug lock, it just so happens that this very first call is a special case. As stated by a comment in sched_init_smp(), "There's no userspace yet to cause hotplug operations" so this is a harmless warning. However, to both respect the semantics of underlying callees and make lockdep happy, take the hotplug lock in sched_init_smp(). This also satisfies the comment atop sched_init_domains() that says "Callers must hold the hotplug lock". Reported-by: Sudeep Holla Tested-by: Sudeep Holla Signed-off-by: Valentin Schneider Signed-off-by: Peter Zijlstra (Intel) Cc: Dietmar.Eggemann@arm.com Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: morten.rasmussen@arm.com Cc: quentin.perret@arm.com Link: http://lkml.kernel.org/r/1540301851-3048-1-git-send-email-valentin.schneider@arm.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/sched/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4e89ed8a0fb21..3bc664662081c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5733,14 +5733,17 @@ void __init sched_init_smp(void) /* * There's no userspace yet to cause hotplug operations; hence all the * CPU masks are stable and all blatant races in the below code cannot - * happen. + * happen. The hotplug lock is nevertheless taken to satisfy lockdep, + * but there won't be any contention on it. */ + cpus_read_lock(); mutex_lock(&sched_domains_mutex); sched_init_domains(cpu_active_mask); cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); if (cpumask_empty(non_isolated_cpus)) cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); mutex_unlock(&sched_domains_mutex); + cpus_read_unlock(); /* Move init over to a non-isolated CPU */ if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0) -- GitLab From fc88b3abe23fe3d4cb2e2b1f3f7036e7f68889fe Mon Sep 17 00:00:00 2001 From: Gustavo Romero Date: Thu, 1 Nov 2018 20:13:21 -0400 Subject: [PATCH 1659/2269] perf tools: Fix undefined symbol scnprintf in libperf-jvmti.so [ Upstream commit 6ac2226229d931153331a93d90655a3de05b9290 ] Currently jvmti agent can not be used because function scnprintf is not present in the agent libperf-jvmti.so. As a result the JVM when using such agent to record JITed code profiling information will fail on looking up scnprintf: java: symbol lookup error: lib/libperf-jvmti.so: undefined symbol: scnprintf This commit fixes that by reverting to the use of snprintf, that can be looked up, instead of scnprintf, adding a proper check for the returned value in order to print a better error message when the jitdump file pathname is too long. Checking the returned value also helps to comply with some recent gcc versions, like gcc8, which will fail due to truncated writing checks related to the -Werror=format-truncation= flag. Signed-off-by: Gustavo Romero Acked-by: Jiri Olsa LPU-Reference: 1541117601-18937-2-git-send-email-gromero@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-mvpxxxy7wnzaj74cq75muw3f@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/jvmti/jvmti_agent.c | 49 ++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c index c1d20d9514341..4ad9948fe594b 100644 --- a/tools/perf/jvmti/jvmti_agent.c +++ b/tools/perf/jvmti/jvmti_agent.c @@ -125,7 +125,7 @@ perf_get_timestamp(void) } static int -debug_cache_init(void) +create_jit_cache_dir(void) { char str[32]; char *base, *p; @@ -144,8 +144,13 @@ debug_cache_init(void) strftime(str, sizeof(str), JIT_LANG"-jit-%Y%m%d", &tm); - snprintf(jit_path, PATH_MAX - 1, "%s/.debug/", base); - + ret = snprintf(jit_path, PATH_MAX, "%s/.debug/", base); + if (ret >= PATH_MAX) { + warnx("jvmti: cannot generate jit cache dir because %s/.debug/" + " is too long, please check the cwd, JITDUMPDIR, and" + " HOME variables", base); + return -1; + } ret = mkdir(jit_path, 0755); if (ret == -1) { if (errno != EEXIST) { @@ -154,20 +159,32 @@ debug_cache_init(void) } } - snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit", base); + ret = snprintf(jit_path, PATH_MAX, "%s/.debug/jit", base); + if (ret >= PATH_MAX) { + warnx("jvmti: cannot generate jit cache dir because" + " %s/.debug/jit is too long, please check the cwd," + " JITDUMPDIR, and HOME variables", base); + return -1; + } ret = mkdir(jit_path, 0755); if (ret == -1) { if (errno != EEXIST) { - warn("cannot create jit cache dir %s", jit_path); + warn("jvmti: cannot create jit cache dir %s", jit_path); return -1; } } - snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit/%s.XXXXXXXX", base, str); - + ret = snprintf(jit_path, PATH_MAX, "%s/.debug/jit/%s.XXXXXXXX", base, str); + if (ret >= PATH_MAX) { + warnx("jvmti: cannot generate jit cache dir because" + " %s/.debug/jit/%s.XXXXXXXX is too long, please check" + " the cwd, JITDUMPDIR, and HOME variables", + base, str); + return -1; + } p = mkdtemp(jit_path); if (p != jit_path) { - warn("cannot create jit cache dir %s", jit_path); + warn("jvmti: cannot create jit cache dir %s", jit_path); return -1; } @@ -228,7 +245,7 @@ void *jvmti_open(void) { char dump_path[PATH_MAX]; struct jitheader header; - int fd; + int fd, ret; FILE *fp; init_arch_timestamp(); @@ -245,12 +262,22 @@ void *jvmti_open(void) memset(&header, 0, sizeof(header)); - debug_cache_init(); + /* + * jitdump file dir + */ + if (create_jit_cache_dir() < 0) + return NULL; /* * jitdump file name */ - scnprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid()); + ret = snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid()); + if (ret >= PATH_MAX) { + warnx("jvmti: cannot generate jitdump file full path because" + " %s/jit-%i.dump is too long, please check the cwd," + " JITDUMPDIR, and HOME variables", jit_path, getpid()); + return NULL; + } fd = open(dump_path, O_CREAT|O_TRUNC|O_RDWR, 0666); if (fd == -1) -- GitLab From eddf9365687f829beeca47a3d0cfcdf6252e46a1 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 29 Oct 2018 10:52:42 -0700 Subject: [PATCH 1660/2269] i40e: restore NETIF_F_GSO_IPXIP[46] to netdev features [ Upstream commit ba766b8b99c30ad3c55ed8cf224d1185ecff1476 ] Since commit bacd75cfac8a ("i40e/i40evf: Add capability exchange for outer checksum", 2017-04-06) the i40e driver has not reported support for IP-in-IP offloads. This likely occurred due to a bad rebase, as the commit extracts hw_enc_features into its own variable. As part of this change, it dropped the NETIF_F_FSO_IPXIP flags from the netdev->hw_enc_features. This was unfortunately not caught during code review. Fix this by adding back the missing feature flags. For reference, NETIF_F_GSO_IPXIP4 was added in commit 7e13318daa4a ("net: define gso types for IPx over IPv4 and IPv6", 2016-05-20), replacing NETIF_F_GSO_IPIP and NETIF_F_GSO_SIT. NETIF_F_GSO_IPXIP6 was added in commit bf2d1df39502 ("intel: Add support for IPv6 IP-in-IP offload", 2016-05-20). Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 04dbf64fb1cb8..176c99b8251d8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -9688,6 +9688,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_PARTIAL | + NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_IPXIP6 | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC | -- GitLab From 484ef227384f88c0b52e9dd43529eccf7c4025da Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Thu, 8 Nov 2018 16:46:08 +0200 Subject: [PATCH 1661/2269] qed: Fix memory/entry leak in qed_init_sp_request() [ Upstream commit 39477551df940ddb1339203817de04f5caaacf7a ] Free the allocated SPQ entry or return the acquired SPQ entry to the free list in error flows. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/qlogic/qed/qed_sp_commands.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index d7c5965328be8..b26578464469c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -80,7 +80,7 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, case QED_SPQ_MODE_BLOCK: if (!p_data->p_comp_data) - return -EINVAL; + goto err; p_ent->comp_cb.cookie = p_data->p_comp_data->cookie; break; @@ -95,7 +95,7 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, default: DP_NOTICE(p_hwfn, "Unknown SPQE completion mode %d\n", p_ent->comp_mode); - return -EINVAL; + goto err; } DP_VERBOSE(p_hwfn, QED_MSG_SPQ, @@ -109,6 +109,18 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, memset(&p_ent->ramrod, 0, sizeof(p_ent->ramrod)); return 0; + +err: + /* qed_spq_get_entry() can either get an entry from the free_pool, + * or, if no entries are left, allocate a new entry and add it to + * the unlimited_pending list. + */ + if (p_ent->queue == &p_hwfn->p_spq->unlimited_pending) + kfree(p_ent); + else + qed_spq_return_entry(p_hwfn, p_ent); + + return -EINVAL; } static enum tunnel_clss qed_tunn_clss_to_fw_clss(u8 type) -- GitLab From 082810cb7f21c52dbe4c15f247e87ad22b88bbb8 Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Thu, 8 Nov 2018 16:46:09 +0200 Subject: [PATCH 1662/2269] qed: Fix blocking/unlimited SPQ entries leak [ Upstream commit 2632f22ebd08da249c2017962a199a0cfb2324bf ] When there are no SPQ entries left in the free_pool, new entries are allocated and are added to the unlimited list. When an entry in the pool is available, the content is copied from the original entry, and the new entry is sent to the device. qed_spq_post() is not aware of that, so the additional entry is stored in the original entry as p_post_ent, which can later be returned to the pool. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_sp.h | 3 ++ drivers/net/ethernet/qlogic/qed/qed_spq.c | 57 ++++++++++++----------- 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h index ab4ad8a1e2a5e..01a213d4ee9c5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h @@ -167,6 +167,9 @@ struct qed_spq_entry { enum spq_mode comp_mode; struct qed_spq_comp_cb comp_cb; struct qed_spq_comp_done comp_done; /* SPQ_MODE_EBLOCK */ + + /* Posted entry for unlimited list entry in EBLOCK mode */ + struct qed_spq_entry *post_ent; }; struct qed_eq { diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index be48d9abd0010..0313e9c469790 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -687,6 +687,8 @@ static int qed_spq_add_entry(struct qed_hwfn *p_hwfn, /* EBLOCK responsible to free the allocated p_ent */ if (p_ent->comp_mode != QED_SPQ_MODE_EBLOCK) kfree(p_ent); + else + p_ent->post_ent = p_en2; p_ent = p_en2; } @@ -770,6 +772,25 @@ static int qed_spq_pend_post(struct qed_hwfn *p_hwfn) SPQ_HIGH_PRI_RESERVE_DEFAULT); } +/* Avoid overriding of SPQ entries when getting out-of-order completions, by + * marking the completions in a bitmap and increasing the chain consumer only + * for the first successive completed entries. + */ +static void qed_spq_comp_bmap_update(struct qed_hwfn *p_hwfn, __le16 echo) +{ + u16 pos = le16_to_cpu(echo) % SPQ_RING_SIZE; + struct qed_spq *p_spq = p_hwfn->p_spq; + + __set_bit(pos, p_spq->p_comp_bitmap); + while (test_bit(p_spq->comp_bitmap_idx, + p_spq->p_comp_bitmap)) { + __clear_bit(p_spq->comp_bitmap_idx, + p_spq->p_comp_bitmap); + p_spq->comp_bitmap_idx++; + qed_chain_return_produced(&p_spq->chain); + } +} + int qed_spq_post(struct qed_hwfn *p_hwfn, struct qed_spq_entry *p_ent, u8 *fw_return_code) { @@ -821,11 +842,12 @@ int qed_spq_post(struct qed_hwfn *p_hwfn, p_ent->queue == &p_spq->unlimited_pending); if (p_ent->queue == &p_spq->unlimited_pending) { - /* This is an allocated p_ent which does not need to - * return to pool. - */ + struct qed_spq_entry *p_post_ent = p_ent->post_ent; + kfree(p_ent); - return rc; + + /* Return the entry which was actually posted */ + p_ent = p_post_ent; } if (rc) @@ -839,7 +861,7 @@ int qed_spq_post(struct qed_hwfn *p_hwfn, spq_post_fail2: spin_lock_bh(&p_spq->lock); list_del(&p_ent->list); - qed_chain_return_produced(&p_spq->chain); + qed_spq_comp_bmap_update(p_hwfn, p_ent->elem.hdr.echo); spq_post_fail: /* return to the free pool */ @@ -871,25 +893,8 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn, spin_lock_bh(&p_spq->lock); list_for_each_entry_safe(p_ent, tmp, &p_spq->completion_pending, list) { if (p_ent->elem.hdr.echo == echo) { - u16 pos = le16_to_cpu(echo) % SPQ_RING_SIZE; - list_del(&p_ent->list); - - /* Avoid overriding of SPQ entries when getting - * out-of-order completions, by marking the completions - * in a bitmap and increasing the chain consumer only - * for the first successive completed entries. - */ - __set_bit(pos, p_spq->p_comp_bitmap); - - while (test_bit(p_spq->comp_bitmap_idx, - p_spq->p_comp_bitmap)) { - __clear_bit(p_spq->comp_bitmap_idx, - p_spq->p_comp_bitmap); - p_spq->comp_bitmap_idx++; - qed_chain_return_produced(&p_spq->chain); - } - + qed_spq_comp_bmap_update(p_hwfn, echo); p_spq->comp_count++; found = p_ent; break; @@ -928,11 +933,9 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn, QED_MSG_SPQ, "Got a completion without a callback function\n"); - if ((found->comp_mode != QED_SPQ_MODE_EBLOCK) || - (found->queue == &p_spq->unlimited_pending)) + if (found->comp_mode != QED_SPQ_MODE_EBLOCK) /* EBLOCK is responsible for returning its own entry into the - * free list, unless it originally added the entry into the - * unlimited pending list. + * free list. */ qed_spq_return_entry(p_hwfn, found); -- GitLab From fe2c4df2692de3c724ac8720d4479cbe16d89814 Mon Sep 17 00:00:00 2001 From: Sagiv Ozeri Date: Thu, 8 Nov 2018 16:46:11 +0200 Subject: [PATCH 1663/2269] qed: Fix potential memory corruption [ Upstream commit fa5c448d98f0df660bfcad3dd5facc027ef84cd3 ] A stuck ramrod should be deleted from the completion_pending list, otherwise it will be added again in the future and corrupt the list. Return error value to inform that ramrod is stuck and should be deleted. Signed-off-by: Sagiv Ozeri Signed-off-by: Denis Bolotin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_spq.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index 0313e9c469790..467755b6dd0be 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -144,6 +144,7 @@ static int qed_spq_block(struct qed_hwfn *p_hwfn, DP_INFO(p_hwfn, "Ramrod is stuck, requesting MCP drain\n"); rc = qed_mcp_drain(p_hwfn, p_ptt); + qed_ptt_release(p_hwfn, p_ptt); if (rc) { DP_NOTICE(p_hwfn, "MCP drain failed\n"); goto err; @@ -152,18 +153,15 @@ static int qed_spq_block(struct qed_hwfn *p_hwfn, /* Retry after drain */ rc = __qed_spq_block(p_hwfn, p_ent, p_fw_ret, true); if (!rc) - goto out; + return 0; comp_done = (struct qed_spq_comp_done *)p_ent->comp_cb.cookie; - if (comp_done->done == 1) + if (comp_done->done == 1) { if (p_fw_ret) *p_fw_ret = comp_done->fw_return_code; -out: - qed_ptt_release(p_hwfn, p_ptt); - return 0; - + return 0; + } err: - qed_ptt_release(p_hwfn, p_ptt); DP_NOTICE(p_hwfn, "Ramrod is stuck [CID %08x cmd %02x protocol %02x echo %04x]\n", le32_to_cpu(p_ent->elem.hdr.cid), -- GitLab From 47ff76ee9c9c938531ec97b7b4a74804a10f04fc Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Thu, 8 Nov 2018 11:42:14 -0600 Subject: [PATCH 1664/2269] net: stmmac: Fix RX packet size > 8191 [ Upstream commit 8137b6ef0ce469154e5cf19f8e7fe04d9a72ac5e ] Ping problems with packets > 8191 as shown: PING 192.168.1.99 (192.168.1.99) 8150(8178) bytes of data. 8158 bytes from 192.168.1.99: icmp_seq=1 ttl=64 time=0.669 ms wrong data byte 8144 should be 0xd0 but was 0x0 16 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f %< ---------------snip-------------------------------------- 8112 b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf c0 c1 c2 c3 c4 c5 c6 c7 c8 c9 ca cb cc cd ce cf 8144 0 0 0 0 d0 d1 ^^^^^^^ Notice the 4 bytes of 0 before the expected byte of d0. Databook notes that the RX buffer must be a multiple of 4/8/16 bytes [1]. Update the DMA Buffer size define to 8188 instead of 8192. Remove the -1 from the RX buffer size allocations and use the new DMA Buffer size directly. [1] Synopsys DesignWare Cores Ethernet MAC Universal v3.70a [section 8.4.2 - Table 8-24] Tested on SoCFPGA Stratix10 with ping sweep from 100 to 8300 byte packets. Fixes: 286a83721720 ("stmmac: add CHAINED descriptor mode support (V4)") Suggested-by: Jose Abreu Signed-off-by: Thor Thayer Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/common.h | 3 ++- drivers/net/ethernet/stmicro/stmmac/descs_com.h | 2 +- drivers/net/ethernet/stmicro/stmmac/enh_desc.c | 2 +- drivers/net/ethernet/stmicro/stmmac/ring_mode.c | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 627fec210e2f6..8e2a19616bc93 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -340,7 +340,8 @@ struct dma_features { /* GMAC TX FIFO is 8K, Rx FIFO is 16K */ #define BUF_SIZE_16KiB 16384 -#define BUF_SIZE_8KiB 8192 +/* RX Buffer size must be < 8191 and multiple of 4/8/16 bytes */ +#define BUF_SIZE_8KiB 8188 #define BUF_SIZE_4KiB 4096 #define BUF_SIZE_2KiB 2048 diff --git a/drivers/net/ethernet/stmicro/stmmac/descs_com.h b/drivers/net/ethernet/stmicro/stmmac/descs_com.h index ca9d7e48034ce..40d6356a7e73c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/descs_com.h +++ b/drivers/net/ethernet/stmicro/stmmac/descs_com.h @@ -31,7 +31,7 @@ /* Enhanced descriptors */ static inline void ehn_desc_rx_set_on_ring(struct dma_desc *p, int end) { - p->des1 |= cpu_to_le32(((BUF_SIZE_8KiB - 1) + p->des1 |= cpu_to_le32((BUF_SIZE_8KiB << ERDES1_BUFFER2_SIZE_SHIFT) & ERDES1_BUFFER2_SIZE_MASK); diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index 2a828a3128142..acd65a4f94d4f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -262,7 +262,7 @@ static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, int end) { p->des0 |= cpu_to_le32(RDES0_OWN); - p->des1 |= cpu_to_le32((BUF_SIZE_8KiB - 1) & ERDES1_BUFFER1_SIZE_MASK); + p->des1 |= cpu_to_le32(BUF_SIZE_8KiB & ERDES1_BUFFER1_SIZE_MASK); if (mode == STMMAC_CHAIN_MODE) ehn_desc_rx_set_on_chain(p); diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c index 28e4b5d50ce6c..1af7b078b94dc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c @@ -143,7 +143,7 @@ static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p) static int stmmac_set_16kib_bfsize(int mtu) { int ret = 0; - if (unlikely(mtu >= BUF_SIZE_8KiB)) + if (unlikely(mtu > BUF_SIZE_8KiB)) ret = BUF_SIZE_16KiB; return ret; } -- GitLab From c887029c11d0c2bc14da9eb02704669cebab89a9 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Fri, 23 Nov 2018 15:25:50 +0900 Subject: [PATCH 1665/2269] zram: close udev startup race condition as default groups commit fef912bf860e upstream. commit 98af4d4df889 upstream. I got a report from Howard Chen that he saw zram and sysfs race(ie, zram block device file is created but sysfs for it isn't yet) when he tried to create new zram devices via hotadd knob. v4.20 kernel fixes it by [1, 2] but it's too large size to merge into -stable so this patch fixes the problem by registering defualt group by Greg KH's approach[3]. This patch should be applied to every stable tree [3.16+] currently existing from kernel.org because the problem was introduced at 2.6.37 by [4]. [1] fef912bf860e, block: genhd: add 'groups' argument to device_add_disk [2] 98af4d4df889, zram: register default groups with device_add_disk() [3] http://kroah.com/log/blog/2013/06/26/how-to-create-a-sysfs-file-correctly/ [4] 33863c21e69e9, Staging: zram: Replace ioctls with sysfs interface Cc: Sergey Senozhatsky Cc: Hannes Reinecke Tested-by: Howard Chen Signed-off-by: Minchan Kim Signed-off-by: Sasha Levin --- drivers/block/zram/zram_drv.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 1e2648e4c2867..27b202c64c846 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1491,6 +1491,11 @@ static const struct attribute_group zram_disk_attr_group = { .attrs = zram_disk_attrs, }; +static const struct attribute_group *zram_disk_attr_groups[] = { + &zram_disk_attr_group, + NULL, +}; + /* * Allocate and initialize new zram device. the function returns * '>= 0' device_id upon success, and negative value otherwise. @@ -1568,23 +1573,14 @@ static int zram_add(void) if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); + disk_to_dev(zram->disk)->groups = zram_disk_attr_groups; add_disk(zram->disk); - ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj, - &zram_disk_attr_group); - if (ret < 0) { - pr_err("Error creating sysfs group for device %d\n", - device_id); - goto out_free_disk; - } strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); pr_info("Added device: %s\n", zram->disk->disk_name); return device_id; -out_free_disk: - del_gendisk(zram->disk); - put_disk(zram->disk); out_free_queue: blk_cleanup_queue(queue); out_free_idr: @@ -1612,16 +1608,6 @@ static int zram_remove(struct zram *zram) zram->claim = true; mutex_unlock(&bdev->bd_mutex); - /* - * Remove sysfs first, so no one will perform a disksize - * store while we destroy the devices. This also helps during - * hot_remove -- zram_reset_device() is the last holder of - * ->init_lock, no later/concurrent disksize_store() or any - * other sysfs handlers are possible. - */ - sysfs_remove_group(&disk_to_dev(zram->disk)->kobj, - &zram_disk_attr_group); - /* Make sure all the pending I/O are finished */ fsync_bdev(bdev); zram_reset_device(zram); -- GitLab From 26eff850549b52e43d0bc61f4682952fe2da293d Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 8 Nov 2018 02:04:57 +0000 Subject: [PATCH 1666/2269] SUNRPC: drop pointless static qualifier in xdr_get_next_encode_buffer() [ Upstream commit 025911a5f4e36955498ed50806ad1b02f0f76288 ] There is no need to have the '__be32 *p' variable static since new value always be assigned before use it. Signed-off-by: YueHaibing Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin --- net/sunrpc/xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 13695ba8fc540..4f382805eb9c0 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -512,7 +512,7 @@ EXPORT_SYMBOL_GPL(xdr_commit_encode); static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, size_t nbytes) { - static __be32 *p; + __be32 *p; int space_left; int frag1bytes, frag2bytes; -- GitLab From 8c209c38d8fa633bfd519476f6b616ee1bc8eb06 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 22 May 2018 14:16:50 +0300 Subject: [PATCH 1667/2269] ACPI / watchdog: Prefer iTCO_wdt always when WDAT table uses RTC SRAM [ Upstream commit 5a802a7a285c8877ca872e44eeb0f06afcb5212f ] After we added quirk for Lenovo Z50-70 it turns out there are at least two more systems where WDAT table includes instructions accessing RTC SRAM. Instead of quirking each system separately, look for such instructions in the table and automatically prefer iTCO_wdt if found. Link: https://bugzilla.kernel.org/show_bug.cgi?id=199033 Reported-by: Arnold Guy Reported-by: Alois Nespor Reported-by: Yury Pakin Reported-by: Ihor Chyhin Signed-off-by: Mika Westerberg Acked-by: Guenter Roeck Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/acpi_watchdog.c | 72 ++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c index 4bde16fb97d88..95600309ce420 100644 --- a/drivers/acpi/acpi_watchdog.c +++ b/drivers/acpi/acpi_watchdog.c @@ -12,35 +12,51 @@ #define pr_fmt(fmt) "ACPI: watchdog: " fmt #include -#include #include #include #include "internal.h" -static const struct dmi_system_id acpi_watchdog_skip[] = { - { - /* - * On Lenovo Z50-70 there are two issues with the WDAT - * table. First some of the instructions use RTC SRAM - * to store persistent information. This does not work well - * with Linux RTC driver. Second, more important thing is - * that the instructions do not actually reset the system. - * - * On this particular system iTCO_wdt seems to work just - * fine so we prefer that over WDAT for now. - * - * See also https://bugzilla.kernel.org/show_bug.cgi?id=199033. - */ - .ident = "Lenovo Z50-70", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_NAME, "20354"), - DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Z50-70"), - }, - }, - {} -}; +#ifdef CONFIG_RTC_MC146818_LIB +#include + +/* + * There are several systems where the WDAT table is accessing RTC SRAM to + * store persistent information. This does not work well with the Linux RTC + * driver so on those systems we skip WDAT driver and prefer iTCO_wdt + * instead. + * + * See also https://bugzilla.kernel.org/show_bug.cgi?id=199033. + */ +static bool acpi_watchdog_uses_rtc(const struct acpi_table_wdat *wdat) +{ + const struct acpi_wdat_entry *entries; + int i; + + entries = (struct acpi_wdat_entry *)(wdat + 1); + for (i = 0; i < wdat->entries; i++) { + const struct acpi_generic_address *gas; + + gas = &entries[i].register_region; + if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { + switch (gas->address) { + case RTC_PORT(0): + case RTC_PORT(1): + case RTC_PORT(2): + case RTC_PORT(3): + return true; + } + } + } + + return false; +} +#else +static bool acpi_watchdog_uses_rtc(const struct acpi_table_wdat *wdat) +{ + return false; +} +#endif static const struct acpi_table_wdat *acpi_watchdog_get_wdat(void) { @@ -50,9 +66,6 @@ static const struct acpi_table_wdat *acpi_watchdog_get_wdat(void) if (acpi_disabled) return NULL; - if (dmi_check_system(acpi_watchdog_skip)) - return NULL; - status = acpi_get_table(ACPI_SIG_WDAT, 0, (struct acpi_table_header **)&wdat); if (ACPI_FAILURE(status)) { @@ -60,6 +73,11 @@ static const struct acpi_table_wdat *acpi_watchdog_get_wdat(void) return NULL; } + if (acpi_watchdog_uses_rtc(wdat)) { + pr_info("Skipping WDAT on this system because it uses RTC SRAM\n"); + return NULL; + } + return wdat; } -- GitLab From e404a6294a31b2b68cecdd2e89d61cc6ed87dddd Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 21 Nov 2018 15:52:43 +0200 Subject: [PATCH 1668/2269] perf machine: Add machine__is() to identify machine arch commit dbbd34a666ee117d0e39e71a47f38f02c4a5c698 upstream. Add a function to identify the machine architecture. Signed-off-by: Adrian Hunter Tested-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Andy Lutomirski Cc: Dave Hansen Cc: H. Peter Anvin Cc: Joerg Roedel Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: x86@kernel.org Link: http://lkml.kernel.org/r/1526548928-20790-6-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/env.c | 19 +++++++++++++++++++ tools/perf/util/env.h | 3 +++ tools/perf/util/machine.c | 9 +++++++++ tools/perf/util/machine.h | 2 ++ 4 files changed, 33 insertions(+) diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 6276b340f893e..49f58921a9687 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -3,6 +3,7 @@ #include "env.h" #include "util.h" #include +#include struct perf_env perf_env; @@ -87,6 +88,24 @@ int perf_env__read_cpu_topology_map(struct perf_env *env) return 0; } +static int perf_env__read_arch(struct perf_env *env) +{ + struct utsname uts; + + if (env->arch) + return 0; + + if (!uname(&uts)) + env->arch = strdup(uts.machine); + + return env->arch ? 0 : -ENOMEM; +} + +const char *perf_env__raw_arch(struct perf_env *env) +{ + return env && !perf_env__read_arch(env) ? env->arch : "unknown"; +} + void cpu_cache_level__free(struct cpu_cache_level *cache) { free(cache->type); diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 1eb35b190b342..bd3869913907e 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -65,4 +65,7 @@ int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]); int perf_env__read_cpu_topology_map(struct perf_env *env); void cpu_cache_level__free(struct cpu_cache_level *cache); + +const char *perf_env__raw_arch(struct perf_env *env); + #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index bd5d5b5e2218c..2af879693fbe4 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2238,6 +2238,15 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, return 0; } +/* + * Compares the raw arch string. N.B. see instead perf_env__arch() if a + * normalized arch is needed. + */ +bool machine__is(struct machine *machine, const char *arch) +{ + return machine && !strcmp(perf_env__raw_arch(machine->env), arch); +} + int machine__get_kernel_start(struct machine *machine) { struct map *map = machine__kernel_map(machine); diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index d551aa80a59ba..fbc5133fb27ca 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -169,6 +169,8 @@ static inline bool machine__is_host(struct machine *machine) return machine ? machine->pid == HOST_KERNEL_ID : false; } +bool machine__is(struct machine *machine, const char *arch); + struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); -- GitLab From 4edc059bac38f8cb59a8e539fb36fe0b8d19c55e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 21 Nov 2018 15:52:44 +0200 Subject: [PATCH 1669/2269] perf tools: Fix kernel_start for PTI on x86 commit 19422a9f2a3be7f3a046285ffae4cbb571aa853a upstream. On x86_64, PTI entry trampolines are less than the start of kernel text, but still above 2^63. So leave kernel_start = 1ULL << 63 for x86_64. Signed-off-by: Adrian Hunter Tested-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Andy Lutomirski Cc: Dave Hansen Cc: H. Peter Anvin Cc: Joerg Roedel Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: x86@kernel.org Link: http://lkml.kernel.org/r/1526548928-20790-7-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/machine.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 2af879693fbe4..af18c3d556421 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2263,7 +2263,12 @@ int machine__get_kernel_start(struct machine *machine) machine->kernel_start = 1ULL << 63; if (map) { err = map__load(map); - if (!err) + /* + * On x86_64, PTI entry trampolines are less than the + * start of kernel text, but still above 2^63. So leave + * kernel_start = 1ULL << 63 for x86_64. + */ + if (!err && !machine__is(machine, "x86_64")) machine->kernel_start = map->start; } return err; -- GitLab From 5d390059eab4bc9b3bfcba01a214ab873514f578 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 21 Nov 2018 15:52:45 +0200 Subject: [PATCH 1670/2269] perf machine: Add nr_cpus_avail() commit 9cecca325ea879c84fcd31a5e609a514c1a1dbd1 upstream. Add a function to return the number of the machine's available CPUs. Signed-off-by: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Andy Lutomirski Cc: Dave Hansen Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Joerg Roedel Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: x86@kernel.org Link: http://lkml.kernel.org/r/1526986485-6562-5-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/env.c | 13 +++++++++++++ tools/perf/util/env.h | 1 + tools/perf/util/machine.c | 5 +++++ tools/perf/util/machine.h | 1 + 4 files changed, 20 insertions(+) diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 49f58921a9687..b492cb974aa0d 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -101,11 +101,24 @@ static int perf_env__read_arch(struct perf_env *env) return env->arch ? 0 : -ENOMEM; } +static int perf_env__read_nr_cpus_avail(struct perf_env *env) +{ + if (env->nr_cpus_avail == 0) + env->nr_cpus_avail = cpu__max_present_cpu(); + + return env->nr_cpus_avail ? 0 : -ENOENT; +} + const char *perf_env__raw_arch(struct perf_env *env) { return env && !perf_env__read_arch(env) ? env->arch : "unknown"; } +int perf_env__nr_cpus_avail(struct perf_env *env) +{ + return env && !perf_env__read_nr_cpus_avail(env) ? env->nr_cpus_avail : 0; +} + void cpu_cache_level__free(struct cpu_cache_level *cache) { free(cache->type); diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index bd3869913907e..9aace8452751c 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -67,5 +67,6 @@ int perf_env__read_cpu_topology_map(struct perf_env *env); void cpu_cache_level__free(struct cpu_cache_level *cache); const char *perf_env__raw_arch(struct perf_env *env); +int perf_env__nr_cpus_avail(struct perf_env *env); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index af18c3d556421..78aa1c5f19ca8 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2247,6 +2247,11 @@ bool machine__is(struct machine *machine, const char *arch) return machine && !strcmp(perf_env__raw_arch(machine->env), arch); } +int machine__nr_cpus_avail(struct machine *machine) +{ + return machine ? perf_env__nr_cpus_avail(machine->env) : 0; +} + int machine__get_kernel_start(struct machine *machine) { struct map *map = machine__kernel_map(machine); diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index fbc5133fb27ca..245743d9ce63a 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -170,6 +170,7 @@ static inline bool machine__is_host(struct machine *machine) } bool machine__is(struct machine *machine, const char *arch); +int machine__nr_cpus_avail(struct machine *machine); struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); -- GitLab From ce41e5fc90d4039907046e10b8d8dd7206ec36a4 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 21 Nov 2018 15:52:46 +0200 Subject: [PATCH 1671/2269] perf machine: Workaround missing maps for x86 PTI entry trampolines commit 4d99e4136580d178e3523281a820be17bf814bf8 upstream. On x86_64 the PTI entry trampolines are not in the kernel map created by perf tools. That results in the addresses having no symbols and prevents annotation. It also causes Intel PT to have decoding errors at the trampoline addresses. Workaround that by creating maps for the trampolines. At present the kernel does not export information revealing where the trampolines are. Until that happens, the addresses are hardcoded. Signed-off-by: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Andy Lutomirski Cc: Dave Hansen Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Joerg Roedel Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: x86@kernel.org Link: http://lkml.kernel.org/r/1526986485-6562-6-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/machine.c | 96 +++++++++++++++++++++++++++++++++++++++ tools/perf/util/machine.h | 3 ++ tools/perf/util/symbol.c | 12 +++-- 3 files changed, 106 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 78aa1c5f19ca8..968fd0454e6b6 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -818,6 +818,102 @@ static int machine__get_running_kernel_start(struct machine *machine, return 0; } +/* Kernel-space maps for symbols that are outside the main kernel map and module maps */ +struct extra_kernel_map { + u64 start; + u64 end; + u64 pgoff; +}; + +static int machine__create_extra_kernel_map(struct machine *machine, + struct dso *kernel, + struct extra_kernel_map *xm) +{ + struct kmap *kmap; + struct map *map; + + map = map__new2(xm->start, kernel, MAP__FUNCTION); + if (!map) + return -1; + + map->end = xm->end; + map->pgoff = xm->pgoff; + + kmap = map__kmap(map); + + kmap->kmaps = &machine->kmaps; + + map_groups__insert(&machine->kmaps, map); + + pr_debug2("Added extra kernel map %" PRIx64 "-%" PRIx64 "\n", + map->start, map->end); + + map__put(map); + + return 0; +} + +static u64 find_entry_trampoline(struct dso *dso) +{ + /* Duplicates are removed so lookup all aliases */ + const char *syms[] = { + "_entry_trampoline", + "__entry_trampoline_start", + "entry_SYSCALL_64_trampoline", + }; + struct symbol *sym = dso__first_symbol(dso, MAP__FUNCTION); + unsigned int i; + + for (; sym; sym = dso__next_symbol(sym)) { + if (sym->binding != STB_GLOBAL) + continue; + for (i = 0; i < ARRAY_SIZE(syms); i++) { + if (!strcmp(sym->name, syms[i])) + return sym->start; + } + } + + return 0; +} + +/* + * These values can be used for kernels that do not have symbols for the entry + * trampolines in kallsyms. + */ +#define X86_64_CPU_ENTRY_AREA_PER_CPU 0xfffffe0000000000ULL +#define X86_64_CPU_ENTRY_AREA_SIZE 0x2c000 +#define X86_64_ENTRY_TRAMPOLINE 0x6000 + +/* Map x86_64 PTI entry trampolines */ +int machine__map_x86_64_entry_trampolines(struct machine *machine, + struct dso *kernel) +{ + u64 pgoff = find_entry_trampoline(kernel); + int nr_cpus_avail, cpu; + + if (!pgoff) + return 0; + + nr_cpus_avail = machine__nr_cpus_avail(machine); + + /* Add a 1 page map for each CPU's entry trampoline */ + for (cpu = 0; cpu < nr_cpus_avail; cpu++) { + u64 va = X86_64_CPU_ENTRY_AREA_PER_CPU + + cpu * X86_64_CPU_ENTRY_AREA_SIZE + + X86_64_ENTRY_TRAMPOLINE; + struct extra_kernel_map xm = { + .start = va, + .end = va + page_size, + .pgoff = pgoff, + }; + + if (machine__create_extra_kernel_map(machine, kernel, &xm) < 0) + return -1; + } + + return 0; +} + int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) { int type; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 245743d9ce63a..13041b036a5b5 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -266,4 +266,7 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, */ char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp); +int machine__map_x86_64_entry_trampolines(struct machine *machine, + struct dso *kernel); + #endif /* __PERF_MACHINE_H */ diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index ec40e47aa1987..3936f69f385c0 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1513,20 +1513,22 @@ int dso__load(struct dso *dso, struct map *map) goto out; } + if (map->groups && map->groups->machine) + machine = map->groups->machine; + else + machine = NULL; + if (dso->kernel) { if (dso->kernel == DSO_TYPE_KERNEL) ret = dso__load_kernel_sym(dso, map); else if (dso->kernel == DSO_TYPE_GUEST_KERNEL) ret = dso__load_guest_kernel_sym(dso, map); + if (machine__is(machine, "x86_64")) + machine__map_x86_64_entry_trampolines(machine, dso); goto out; } - if (map->groups && map->groups->machine) - machine = map->groups->machine; - else - machine = NULL; - dso->adjust_symbols = 0; if (perfmap) { -- GitLab From 09917457494163dec35a74745ff04184385c8d32 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 21 Nov 2018 15:52:47 +0200 Subject: [PATCH 1672/2269] perf test code-reading: Fix perf_env setup for PTI entry trampolines commit f6c66d73bb8192d357bb5fb8cd5826920f811d8c upstream. The "Object code reading" test will not create maps for the PTI entry trampolines unless the machine environment exists to show that the arch is x86_64. Signed-off-by: Adrian Hunter Reported-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1528183800-21577-1-git-send-email-adrian.hunter@intel.com [ split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/tests/code-reading.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index fcc8984bc329d..acad8ba06d77c 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -527,6 +527,7 @@ static int do_test_code_reading(bool try_kcore) pid = getpid(); machine = machine__new_host(); + machine->env = &perf_env; ret = machine__create_kernel_maps(machine); if (ret < 0) { -- GitLab From d412ab7cfc0de3c01befb5d32746eaf512929bde Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 26 Oct 2018 15:28:54 +0300 Subject: [PATCH 1673/2269] x86/mm: Move LDT remap out of KASLR region on 5-level paging commit d52888aa2753e3063a9d3a0c9f72f94aa9809c15 upstream On 5-level paging the LDT remap area is placed in the middle of the KASLR randomization region and it can overlap with the direct mapping, the vmalloc or the vmap area. The LDT mapping is per mm, so it cannot be moved into the P4D page table next to the CPU_ENTRY_AREA without complicating PGD table allocation for 5-level paging. The 4 PGD slot gap just before the direct mapping is reserved for hypervisors, so it cannot be used. Move the direct mapping one slot deeper and use the resulting gap for the LDT remap area. The resulting layout is the same for 4 and 5 level paging. [ tglx: Massaged changelog ] Fixes: f55f0501cbf6 ("x86/pti: Put the LDT in its own PGD if PTI is on") Signed-off-by: Kirill A. Shutemov Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Cc: bp@alien8.de Cc: hpa@zytor.com Cc: dave.hansen@linux.intel.com Cc: peterz@infradead.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: bhe@redhat.com Cc: willy@infradead.org Cc: linux-mm@kvack.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181026122856.66224-2-kirill.shutemov@linux.intel.com Signed-off-by: Sasha Levin --- Documentation/x86/x86_64/mm.txt | 10 ++++++---- arch/x86/include/asm/page_64_types.h | 12 +++++++----- arch/x86/include/asm/pgtable_64_types.h | 7 +++---- arch/x86/xen/mmu_pv.c | 6 +++--- 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index ea91cb61a6029..43f066cde67de 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -4,8 +4,9 @@ Virtual memory map with 4 level page tables: 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm hole caused by [47:63] sign extension ffff800000000000 - ffff87ffffffffff (=43 bits) guard hole, reserved for hypervisor -ffff880000000000 - ffffc7ffffffffff (=64 TB) direct mapping of all phys. memory -ffffc80000000000 - ffffc8ffffffffff (=40 bits) hole +ffff880000000000 - ffff887fffffffff (=39 bits) LDT remap for PTI +ffff888000000000 - ffffc87fffffffff (=64 TB) direct mapping of all phys. memory +ffffc88000000000 - ffffc8ffffffffff (=39 bits) hole ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) @@ -30,8 +31,9 @@ Virtual memory map with 5 level page tables: 0000000000000000 - 00ffffffffffffff (=56 bits) user space, different per mm hole caused by [56:63] sign extension ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor -ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory -ff90000000000000 - ff9fffffffffffff (=52 bits) LDT remap for PTI +ff10000000000000 - ff10ffffffffffff (=48 bits) LDT remap for PTI +ff11000000000000 - ff90ffffffffffff (=55 bits) direct mapping of all phys. memory +ff91000000000000 - ff9fffffffffffff (=3840 TB) hole ffa0000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space (12800 TB) ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB) diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index e1407312c4122..74d531f6d5180 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -33,14 +33,16 @@ /* * Set __PAGE_OFFSET to the most negative possible address + - * PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a - * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's - * what Xen requires. + * PGDIR_SIZE*17 (pgd slot 273). + * + * The gap is to allow a space for LDT remap for PTI (1 pgd slot) and space for + * a hypervisor (16 slots). Choosing 16 slots for a hypervisor is arbitrary, + * but it's what Xen requires. */ #ifdef CONFIG_X86_5LEVEL -#define __PAGE_OFFSET_BASE _AC(0xff10000000000000, UL) +#define __PAGE_OFFSET_BASE _AC(0xff11000000000000, UL) #else -#define __PAGE_OFFSET_BASE _AC(0xffff880000000000, UL) +#define __PAGE_OFFSET_BASE _AC(0xffff888000000000, UL) #endif #ifdef CONFIG_RANDOMIZE_MEMORY diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 6b8f73dcbc2c2..7764617b8f9c2 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -88,16 +88,15 @@ typedef struct { pteval_t pte; } pte_t; # define VMALLOC_SIZE_TB _AC(12800, UL) # define __VMALLOC_BASE _AC(0xffa0000000000000, UL) # define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) -# define LDT_PGD_ENTRY _AC(-112, UL) -# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) #else # define VMALLOC_SIZE_TB _AC(32, UL) # define __VMALLOC_BASE _AC(0xffffc90000000000, UL) # define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) -# define LDT_PGD_ENTRY _AC(-3, UL) -# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) #endif +#define LDT_PGD_ENTRY -240UL +#define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) + #ifdef CONFIG_RANDOMIZE_MEMORY # define VMALLOC_START vmalloc_base # define VMEMMAP_START vmemmap_base diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 8ed11a5b1a9d8..b33fa127a6131 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1869,7 +1869,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) init_top_pgt[0] = __pgd(0); /* Pre-constructed entries are in pfn, so convert to mfn */ - /* L4[272] -> level3_ident_pgt */ + /* L4[273] -> level3_ident_pgt */ /* L4[511] -> level3_kernel_pgt */ convert_pfn_mfn(init_top_pgt); @@ -1889,8 +1889,8 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) addr[0] = (unsigned long)pgd; addr[1] = (unsigned long)l3; addr[2] = (unsigned long)l2; - /* Graft it onto L4[272][0]. Note that we creating an aliasing problem: - * Both L4[272][0] and L4[511][510] have entries that point to the same + /* Graft it onto L4[273][0]. Note that we creating an aliasing problem: + * Both L4[273][0] and L4[511][510] have entries that point to the same * L2 (PMD) tables. Meaning that if you modify it in __va space * it will be also modified in the __ka space! (But if you just * modify the PMD table to point to other PTE's or none, then you -- GitLab From a17989cb9bb03c75bb57dddc0f044100fd4d856e Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 26 Oct 2018 15:28:55 +0300 Subject: [PATCH 1674/2269] x86/ldt: Unmap PTEs for the slot before freeing LDT pages commit a0e6e0831c516860fc7f9be1db6c081fe902ebcf upstream modify_ldt(2) leaves the old LDT mapped after switching over to the new one. The old LDT gets freed and the pages can be re-used. Leaving the mapping in place can have security implications. The mapping is present in the userspace page tables and Meltdown-like attacks can read these freed and possibly reused pages. It's relatively simple to fix: unmap the old LDT and flush TLB before freeing the old LDT memory. This further allows to avoid flushing the TLB in map_ldt_struct() as the slot is unmapped and flushed by unmap_ldt_struct() or has never been mapped at all. [ tglx: Massaged changelog and removed the needless line breaks ] Fixes: f55f0501cbf6 ("x86/pti: Put the LDT in its own PGD if PTI is on") Signed-off-by: Kirill A. Shutemov Signed-off-by: Thomas Gleixner Cc: bp@alien8.de Cc: hpa@zytor.com Cc: dave.hansen@linux.intel.com Cc: luto@kernel.org Cc: peterz@infradead.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: bhe@redhat.com Cc: willy@infradead.org Cc: linux-mm@kvack.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181026122856.66224-3-kirill.shutemov@linux.intel.com Signed-off-by: Sasha Levin --- arch/x86/kernel/ldt.c | 49 +++++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 26d713ecad34a..65df298d4e9e8 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -103,14 +103,6 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries) /* * If PTI is enabled, this maps the LDT into the kernelmode and * usermode tables for the given mm. - * - * There is no corresponding unmap function. Even if the LDT is freed, we - * leave the PTEs around until the slot is reused or the mm is destroyed. - * This is harmless: the LDT is always in ordinary memory, and no one will - * access the freed slot. - * - * If we wanted to unmap freed LDTs, we'd also need to do a flush to make - * it useful, and the flush would slow down modify_ldt(). */ static int map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) @@ -119,8 +111,8 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) bool is_vmalloc, had_top_level_entry; unsigned long va; spinlock_t *ptl; + int i, nr_pages; pgd_t *pgd; - int i; if (!static_cpu_has(X86_FEATURE_PTI)) return 0; @@ -141,7 +133,9 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) is_vmalloc = is_vmalloc_addr(ldt->entries); - for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) { + nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE); + + for (i = 0; i < nr_pages; i++) { unsigned long offset = i << PAGE_SHIFT; const void *src = (char *)ldt->entries + offset; unsigned long pfn; @@ -189,14 +183,42 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) } } - va = (unsigned long)ldt_slot_va(slot); - flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0); - ldt->slot = slot; #endif return 0; } +static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt) +{ +#ifdef CONFIG_PAGE_TABLE_ISOLATION + unsigned long va; + int i, nr_pages; + + if (!ldt) + return; + + /* LDT map/unmap is only required for PTI */ + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE); + + for (i = 0; i < nr_pages; i++) { + unsigned long offset = i << PAGE_SHIFT; + spinlock_t *ptl; + pte_t *ptep; + + va = (unsigned long)ldt_slot_va(ldt->slot) + offset; + ptep = get_locked_pte(mm, va, &ptl); + pte_clear(mm, va, ptep); + pte_unmap_unlock(ptep, ptl); + } + + va = (unsigned long)ldt_slot_va(ldt->slot); + flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, 0); +#endif /* CONFIG_PAGE_TABLE_ISOLATION */ +} + static void free_ldt_pgtables(struct mm_struct *mm) { #ifdef CONFIG_PAGE_TABLE_ISOLATION @@ -433,6 +455,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) } install_ldt(mm, new_ldt); + unmap_ldt_struct(mm, old_ldt); free_ldt_struct(old_ldt); error = 0; -- GitLab From 817266cb8d45b74a695fb8a27e12b17cf087d7b1 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 5 Nov 2018 09:35:44 -0500 Subject: [PATCH 1675/2269] media: v4l: event: Add subscription to list before calling "add" operation commit 92539d3eda2c090b382699bbb896d4b54e9bdece upstream. Patch ad608fbcf166 changed how events were subscribed to address an issue elsewhere. As a side effect of that change, the "add" callback was called before the event subscription was added to the list of subscribed events, causing the first event queued by the add callback (and possibly other events arriving soon afterwards) to be lost. Fix this by adding the subscription to the list before calling the "add" callback, and clean up afterwards if that fails. Fixes: ad608fbcf166 ("media: v4l: event: Prevent freeing event subscriptions while accessed") Reported-by: Dave Stevenson Signed-off-by: Sakari Ailus Tested-by: Dave Stevenson Reviewed-by: Hans Verkuil Tested-by: Hans Verkuil Cc: stable@vger.kernel.org (for 4.14 and up) Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/v4l2-event.c | 43 ++++++++++++++++------------ 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-event.c b/drivers/media/v4l2-core/v4l2-event.c index 568dd4affb337..011907eff6609 100644 --- a/drivers/media/v4l2-core/v4l2-event.c +++ b/drivers/media/v4l2-core/v4l2-event.c @@ -193,6 +193,22 @@ int v4l2_event_pending(struct v4l2_fh *fh) } EXPORT_SYMBOL_GPL(v4l2_event_pending); +static void __v4l2_event_unsubscribe(struct v4l2_subscribed_event *sev) +{ + struct v4l2_fh *fh = sev->fh; + unsigned int i; + + lockdep_assert_held(&fh->subscribe_lock); + assert_spin_locked(&fh->vdev->fh_lock); + + /* Remove any pending events for this subscription */ + for (i = 0; i < sev->in_use; i++) { + list_del(&sev->events[sev_pos(sev, i)].list); + fh->navailable--; + } + list_del(&sev->list); +} + int v4l2_event_subscribe(struct v4l2_fh *fh, const struct v4l2_event_subscription *sub, unsigned elems, const struct v4l2_subscribed_event_ops *ops) @@ -225,27 +241,23 @@ int v4l2_event_subscribe(struct v4l2_fh *fh, spin_lock_irqsave(&fh->vdev->fh_lock, flags); found_ev = v4l2_event_subscribed(fh, sub->type, sub->id); + if (!found_ev) + list_add(&sev->list, &fh->subscribed); spin_unlock_irqrestore(&fh->vdev->fh_lock, flags); if (found_ev) { /* Already listening */ kvfree(sev); - goto out_unlock; - } - - if (sev->ops && sev->ops->add) { + } else if (sev->ops && sev->ops->add) { ret = sev->ops->add(sev, elems); if (ret) { + spin_lock_irqsave(&fh->vdev->fh_lock, flags); + __v4l2_event_unsubscribe(sev); + spin_unlock_irqrestore(&fh->vdev->fh_lock, flags); kvfree(sev); - goto out_unlock; } } - spin_lock_irqsave(&fh->vdev->fh_lock, flags); - list_add(&sev->list, &fh->subscribed); - spin_unlock_irqrestore(&fh->vdev->fh_lock, flags); - -out_unlock: mutex_unlock(&fh->subscribe_lock); return ret; @@ -280,7 +292,6 @@ int v4l2_event_unsubscribe(struct v4l2_fh *fh, { struct v4l2_subscribed_event *sev; unsigned long flags; - int i; if (sub->type == V4L2_EVENT_ALL) { v4l2_event_unsubscribe_all(fh); @@ -292,14 +303,8 @@ int v4l2_event_unsubscribe(struct v4l2_fh *fh, spin_lock_irqsave(&fh->vdev->fh_lock, flags); sev = v4l2_event_subscribed(fh, sub->type, sub->id); - if (sev != NULL) { - /* Remove any pending events for this subscription */ - for (i = 0; i < sev->in_use; i++) { - list_del(&sev->events[sev_pos(sev, i)].list); - fh->navailable--; - } - list_del(&sev->list); - } + if (sev != NULL) + __v4l2_event_unsubscribe(sev); spin_unlock_irqrestore(&fh->vdev->fh_lock, flags); -- GitLab From 2fc77700daa9f2faf7e61f46c1d784fd85867dfc Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 11 Nov 2018 00:06:12 +0200 Subject: [PATCH 1676/2269] MIPS: OCTEON: cavium_octeon_defconfig: re-enable OCTEON USB driver commit 82fba2df7f7c019627f24c5036dc99f41731d770 upstream. Re-enable OCTEON USB driver which is needed on older hardware (e.g. EdgeRouter Lite) for mass storage etc. This got accidentally deleted when config options were changed for OCTEON2/3 USB. Signed-off-by: Aaro Koskinen Signed-off-by: Paul Burton Fixes: f922bc0ad08b ("MIPS: Octeon: cavium_octeon_defconfig: Enable more drivers") Patchwork: https://patchwork.linux-mips.org/patch/21077/ Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # 4.14+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/configs/cavium_octeon_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/configs/cavium_octeon_defconfig b/arch/mips/configs/cavium_octeon_defconfig index 490b12af103c1..c52d0efacd146 100644 --- a/arch/mips/configs/cavium_octeon_defconfig +++ b/arch/mips/configs/cavium_octeon_defconfig @@ -140,6 +140,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=y CONFIG_STAGING=y CONFIG_OCTEON_ETHERNET=y +CONFIG_OCTEON_USB=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_RAS=y CONFIG_EXT4_FS=y -- GitLab From f6a6ae4e0f345aa481535bfe2046cd33f4dc37b8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 26 Oct 2018 10:19:51 +0300 Subject: [PATCH 1677/2269] uio: Fix an Oops on load commit 432798195bbce1f8cd33d1c0284d0538835e25fb upstream. I was trying to solve a double free but I introduced a more serious NULL dereference bug. The problem is that if there is an IRQ which triggers immediately, then we need "info->uio_dev" but it's not set yet. This patch puts the original initialization back to how it was and just sets info->uio_dev to NULL on the error path so it should solve both the Oops and the double free. Fixes: f019f07ecf6a ("uio: potential double frees if __uio_register_device() fails") Reported-by: Mathias Thore Signed-off-by: Dan Carpenter Cc: stable Tested-by: Mathias Thore Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 0a730136646d5..654579bc1e54b 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -850,6 +850,8 @@ int __uio_register_device(struct module *owner, if (ret) goto err_uio_dev_add_attributes; + info->uio_dev = idev; + if (info->irq && (info->irq != UIO_IRQ_CUSTOM)) { /* * Note that we deliberately don't use devm_request_irq @@ -861,11 +863,12 @@ int __uio_register_device(struct module *owner, */ ret = request_irq(info->irq, uio_interrupt, info->irq_flags, info->name, idev); - if (ret) + if (ret) { + info->uio_dev = NULL; goto err_request_irq; + } } - info->uio_dev = idev; return 0; err_request_irq: -- GitLab From 578f05a757bf76e828fe244994436947187bcfe3 Mon Sep 17 00:00:00 2001 From: Maarten Jacobs Date: Mon, 19 Nov 2018 23:18:49 +0000 Subject: [PATCH 1678/2269] usb: cdc-acm: add entry for Hiro (Conexant) modem commit 63529eaa6164ef7ab4b907b25ac3648177e5e78f upstream. The cdc-acm kernel module currently does not support the Hiro (Conexant) H05228 USB modem. The patch below adds the device specific information: idVendor 0x0572 idProduct 0x1349 Signed-off-by: Maarten Jacobs Acked-by: Oliver Neukum Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index e41d00bc7e974..5a8ef83a5c5cc 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1724,6 +1724,9 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x0572, 0x1328), /* Shiro / Aztech USB MODEM UM-3100 */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, + { USB_DEVICE(0x0572, 0x1349), /* Hiro (Conexant) USB MODEM H50228 */ + .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ + }, { USB_DEVICE(0x20df, 0x0001), /* Simtec Electronics Entropy Key */ .driver_info = QUIRK_CONTROL_LINE_STATE, }, { USB_DEVICE(0x2184, 0x001c) }, /* GW Instek AFG-2225 */ -- GitLab From d7385bcf1887642ac0f0c7c6b82efc0d3c3c5f45 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 26 Oct 2018 13:33:15 +0800 Subject: [PATCH 1679/2269] USB: quirks: Add no-lpm quirk for Raydium touchscreens commit deefd24228a172d1b27d4a9adbfd2cdacd60ae64 upstream. Raydium USB touchscreen fails to set config if LPM is enabled: [ 2.030658] usb 1-8: New USB device found, idVendor=2386, idProduct=3119 [ 2.030659] usb 1-8: New USB device strings: Mfr=1, Product=2, SerialNumber=0 [ 2.030660] usb 1-8: Product: Raydium Touch System [ 2.030661] usb 1-8: Manufacturer: Raydium Corporation [ 7.132209] usb 1-8: can't set config #1, error -110 Same behavior can be observed on 2386:3114. Raydium claims the touchscreen supports LPM under Windows, so I used Microsoft USB Test Tools (MUTT) [1] to check its LPM status. MUTT shows that the LPM doesn't work under Windows, either. So let's just disable LPM for Raydium touchscreens. [1] https://docs.microsoft.com/en-us/windows-hardware/drivers/usbcon/usb-test-tools Signed-off-by: Kai-Heng Feng Cc: stable Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 37a5e07b3488d..dc753a8b24187 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -263,6 +263,11 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x2040, 0x7200), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, + /* Raydium Touchscreen */ + { USB_DEVICE(0x2386, 0x3114), .driver_info = USB_QUIRK_NO_LPM }, + + { USB_DEVICE(0x2386, 0x3119), .driver_info = USB_QUIRK_NO_LPM }, + /* DJI CineSSD */ { USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM }, -- GitLab From 041fa1f61997142e52331eee0eba84ffdbf85bd2 Mon Sep 17 00:00:00 2001 From: Emmanuel Pescosta Date: Fri, 26 Oct 2018 14:48:09 +0200 Subject: [PATCH 1680/2269] usb: quirks: Add delay-init quirk for Corsair K70 LUX RGB commit a77112577667cbda7c6292c52d909636aef31fd9 upstream. Following on from this patch: https://lkml.org/lkml/2017/11/3/516, Corsair K70 LUX RGB keyboards also require the DELAY_INIT quirk to start correctly at boot. Dmesg output: usb 1-6: string descriptor 0 read error: -110 usb 1-6: New USB device found, idVendor=1b1c, idProduct=1b33 usb 1-6: New USB device strings: Mfr=1, Product=2, SerialNumber=3 usb 1-6: can't set config #1, error -110 Signed-off-by: Emmanuel Pescosta Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index dc753a8b24187..1e8f68960014b 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -243,6 +243,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1b1c, 0x1b20), .driver_info = USB_QUIRK_DELAY_INIT | USB_QUIRK_DELAY_CTRL_MSG }, + /* Corsair K70 LUX RGB */ + { USB_DEVICE(0x1b1c, 0x1b33), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Corsair K70 LUX */ { USB_DEVICE(0x1b1c, 0x1b36), .driver_info = USB_QUIRK_DELAY_INIT }, -- GitLab From 939936e6015d007f6ef2e344f5e2ec809074c987 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 17 Oct 2018 10:09:02 -0700 Subject: [PATCH 1681/2269] misc: atmel-ssc: Fix section annotation on atmel_ssc_get_driver_data commit 7c97301285b62a41d6bceded7d964085fc8cc50f upstream. After building the kernel with Clang, the following section mismatch warning appears: WARNING: vmlinux.o(.text+0x3bf19a6): Section mismatch in reference from the function ssc_probe() to the function .init.text:atmel_ssc_get_driver_data() The function ssc_probe() references the function __init atmel_ssc_get_driver_data(). This is often because ssc_probe lacks a __init annotation or the annotation of atmel_ssc_get_driver_data is wrong. Remove __init from atmel_ssc_get_driver_data to get rid of the mismatch. Signed-off-by: Nathan Chancellor Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/misc/atmel-ssc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c index b2a0340f277e2..d8e3cc2dc7470 100644 --- a/drivers/misc/atmel-ssc.c +++ b/drivers/misc/atmel-ssc.c @@ -132,7 +132,7 @@ static const struct of_device_id atmel_ssc_dt_ids[] = { MODULE_DEVICE_TABLE(of, atmel_ssc_dt_ids); #endif -static inline const struct atmel_ssc_platform_data * __init +static inline const struct atmel_ssc_platform_data * atmel_ssc_get_driver_data(struct platform_device *pdev) { if (pdev->dev.of_node) { -- GitLab From fb37c765e28d778f53c66c7727254f8ac2f047ea Mon Sep 17 00:00:00 2001 From: Mattias Jacobsson <2pi@mok.nu> Date: Sun, 21 Oct 2018 11:25:37 +0200 Subject: [PATCH 1682/2269] USB: misc: appledisplay: add 20" Apple Cinema Display commit f6501f49199097b99e4e263644d88c90d1ec1060 upstream. Add another Apple Cinema Display to the list of supported displays Signed-off-by: Mattias Jacobsson <2pi@mok.nu> Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/appledisplay.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/misc/appledisplay.c b/drivers/usb/misc/appledisplay.c index 8efdc500e790b..288fe3e69d52d 100644 --- a/drivers/usb/misc/appledisplay.c +++ b/drivers/usb/misc/appledisplay.c @@ -63,6 +63,7 @@ static const struct usb_device_id appledisplay_table[] = { { APPLEDISPLAY_DEVICE(0x9219) }, { APPLEDISPLAY_DEVICE(0x921c) }, { APPLEDISPLAY_DEVICE(0x921d) }, + { APPLEDISPLAY_DEVICE(0x9222) }, { APPLEDISPLAY_DEVICE(0x9236) }, /* Terminating entry */ -- GitLab From 16c233b9b6c8758207f0079737bf36bda9cd9094 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 16 Oct 2018 12:59:44 +0200 Subject: [PATCH 1683/2269] drivers/misc/sgi-gru: fix Spectre v1 vulnerability commit fee05f455ceb5c670cbe48e2f9454ebc4a388554 upstream. req.gid can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: vers/misc/sgi-gru/grukdump.c:200 gru_dump_chiplet_request() warn: potential spectre issue 'gru_base' [w] Fix this by sanitizing req.gid before calling macro GID_TO_GRU, which uses it to index gru_base. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Greg Kroah-Hartman --- drivers/misc/sgi-gru/grukdump.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/misc/sgi-gru/grukdump.c b/drivers/misc/sgi-gru/grukdump.c index 313da31502626..1540a7785e147 100644 --- a/drivers/misc/sgi-gru/grukdump.c +++ b/drivers/misc/sgi-gru/grukdump.c @@ -27,6 +27,9 @@ #include #include #include + +#include + #include "gru.h" #include "grutables.h" #include "gruhandles.h" @@ -196,6 +199,7 @@ int gru_dump_chiplet_request(unsigned long arg) /* Currently, only dump by gid is implemented */ if (req.gid >= gru_max_gids) return -EINVAL; + req.gid = array_index_nospec(req.gid, gru_max_gids); gru = GID_TO_GRU(req.gid); ubuf = req.buf; -- GitLab From 3cf1a2b4e0b3eae21abd25d535142e7b6ac3209c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 19 Nov 2018 19:06:01 +0100 Subject: [PATCH 1684/2269] ACPI / platform: Add SMB0001 HID to forbidden_id_list commit 2bbb5fa37475d7aa5fa62f34db1623f3da2dfdfa upstream. Many HP AMD based laptops contain an SMB0001 device like this: Device (SMBD) { Name (_HID, "SMB0001") // _HID: Hardware ID Name (_CRS, ResourceTemplate () // _CRS: Current Resource Settings { IO (Decode16, 0x0B20, // Range Minimum 0x0B20, // Range Maximum 0x20, // Alignment 0x20, // Length ) IRQ (Level, ActiveLow, Shared, ) {7} }) } The legacy style IRQ resource here causes acpi_dev_get_irqresource() to be called with legacy=true and this message to show in dmesg: ACPI: IRQ 7 override to edge, high This causes issues when later on the AMD0030 GPIO device gets enumerated: Device (GPIO) { Name (_HID, "AMDI0030") // _HID: Hardware ID Name (_CID, "AMDI0030") // _CID: Compatible ID Name (_UID, Zero) // _UID: Unique ID Method (_CRS, 0, NotSerialized) // _CRS: Current Resource Settings { Name (RBUF, ResourceTemplate () { Interrupt (ResourceConsumer, Level, ActiveLow, Shared, ,, ) { 0x00000007, } Memory32Fixed (ReadWrite, 0xFED81500, // Address Base 0x00000400, // Address Length ) }) Return (RBUF) /* \_SB_.GPIO._CRS.RBUF */ } } Now acpi_dev_get_irqresource() gets called with legacy=false, but because of the earlier override of the trigger-type acpi_register_gsi() returns -EBUSY (because we try to register the same interrupt with a different trigger-type) and we end up setting IORESOURCE_DISABLED in the flags. The setting of IORESOURCE_DISABLED causes platform_get_irq() to call acpi_irq_get() which is not implemented on x86 and returns -EINVAL. resulting in the following in dmesg: amd_gpio AMDI0030:00: Failed to get gpio IRQ: -22 amd_gpio: probe of AMDI0030:00 failed with error -22 The SMB0001 is a "virtual" device in the sense that the only way the OS interacts with it is through calling a couple of methods to do SMBus transfers. As such it is weird that it has IO and IRQ resources at all, because the driver for it is not expected to ever access the hardware directly. The Linux driver for the SMB0001 device directly binds to the acpi_device through the acpi_bus, so we do not need to instantiate a platform_device for this ACPI device. This commit adds the SMB0001 HID to the forbidden_id_list, avoiding the instantiating of a platform_device for it. Not instantiating a platform_device means we will no longer call acpi_dev_get_irqresource() for the legacy IRQ resource fixing the probe of the AMDI0030 device failing. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1644013 BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=198715 BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=199523 Reported-by: Lukas Kahnert Tested-by: Marc Cc: All applicable Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_platform.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c index 88cd949003f3e..ecd84d910ed2e 100644 --- a/drivers/acpi/acpi_platform.c +++ b/drivers/acpi/acpi_platform.c @@ -30,6 +30,7 @@ static const struct acpi_device_id forbidden_id_list[] = { {"PNP0200", 0}, /* AT DMA Controller */ {"ACPI0009", 0}, /* IOxAPIC */ {"ACPI000A", 0}, /* IOAPIC */ + {"SMB0001", 0}, /* ACPI SMBUS virtual device */ {"", 0}, }; -- GitLab From 540f89376ba2949f53ed6f057fea321be848cd43 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 14 Nov 2018 13:55:09 -0800 Subject: [PATCH 1685/2269] HID: uhid: forbid UHID_CREATE under KERNEL_DS or elevated privileges commit 8c01db7619f07c85c5cd81ec5eb83608b56c88f5 upstream. When a UHID_CREATE command is written to the uhid char device, a copy_from_user() is done from a user pointer embedded in the command. When the address limit is KERNEL_DS, e.g. as is the case during sys_sendfile(), this can read from kernel memory. Alternatively, information can be leaked from a setuid binary that is tricked to write to the file descriptor. Therefore, forbid UHID_CREATE in these cases. No other commands in uhid_char_write() are affected by this bug and UHID_CREATE is marked as "obsolete", so apply the restriction to UHID_CREATE only rather than to uhid_char_write() entirely. Thanks to Dmitry Vyukov for adding uhid definitions to syzkaller and to Jann Horn for commit 9da3f2b740544 ("x86/fault: BUG() when uaccess helpers fault on kernel addresses"), allowing this bug to be found. Reported-by: syzbot+72473edc9bf4eb1c6556@syzkaller.appspotmail.com Fixes: d365c6cfd337 ("HID: uhid: add UHID_CREATE and UHID_DESTROY events") Cc: # v3.6+ Cc: Jann Horn Cc: Andy Lutomirski Signed-off-by: Eric Biggers Reviewed-by: Jann Horn Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/uhid.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c index 6f819f144cb4f..6f67d73b184e5 100644 --- a/drivers/hid/uhid.c +++ b/drivers/hid/uhid.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -722,6 +723,17 @@ static ssize_t uhid_char_write(struct file *file, const char __user *buffer, switch (uhid->input_buf.type) { case UHID_CREATE: + /* + * 'struct uhid_create_req' contains a __user pointer which is + * copied from, so it's unsafe to allow this with elevated + * privileges (e.g. from a setuid binary) or via kernel_write(). + */ + if (file->f_cred != current_cred() || uaccess_kernel()) { + pr_err_once("UHID_CREATE from different security context by process %d (%s), this is not allowed.\n", + task_tgid_vnr(current), current->comm); + ret = -EACCES; + goto unlock; + } ret = uhid_dev_create(uhid, &uhid->input_buf); break; case UHID_CREATE2: -- GitLab From 3c7f1671af5f1bef6fac4e9ad7416e7a5a4ed295 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 8 Nov 2018 15:55:37 +0100 Subject: [PATCH 1686/2269] libceph: fall back to sendmsg for slab pages commit 7e241f647dc7087a0401418a187f3f5b527cc690 upstream. skb_can_coalesce() allows coalescing neighboring slab objects into a single frag: return page == skb_frag_page(frag) && off == frag->page_offset + skb_frag_size(frag); ceph_tcp_sendpage() can be handed slab pages. One example of this is XFS: it passes down sector sized slab objects for its metadata I/O. If the kernel client is co-located on the OSD node, the skb may go through loopback and pop on the receive side with the exact same set of frags. When tcp_recvmsg() attempts to copy out such a frag, hardened usercopy complains because the size exceeds the object's allocated size: usercopy: kernel memory exposure attempt detected from ffff9ba917f20a00 (kmalloc-512) (1024 bytes) Although skb_can_coalesce() could be taught to return false if the resulting frag would cross a slab object boundary, we already have a fallback for non-refcounted pages. Utilize it for slab pages too. Cc: stable@vger.kernel.org # 4.8+ Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- net/ceph/messenger.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 5c4e85296cf68..5281da82371a6 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -594,9 +594,15 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page, struct bio_vec bvec; int ret; - /* sendpage cannot properly handle pages with page_count == 0, - * we need to fallback to sendmsg if that's the case */ - if (page_count(page) >= 1) + /* + * sendpage cannot properly handle pages with page_count == 0, + * we need to fall back to sendmsg if that's the case. + * + * Same goes for slab pages: skb_can_coalesce() allows + * coalescing neighboring slab objects into a single frag which + * triggers one of hardened usercopy checks. + */ + if (page_count(page) >= 1 && !PageSlab(page)) return __ceph_tcp_sendpage(sock, page, offset, size, more); bvec.bv_page = page; -- GitLab From 4201a586f1fa63de5a965a1ebc3b99e0e1a4912c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 27 Nov 2018 16:10:52 +0100 Subject: [PATCH 1687/2269] Linux 4.14.84 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0f42814095a4d..874d72a3e6a71 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 83 +SUBLEVEL = 84 EXTRAVERSION = NAME = Petit Gorille -- GitLab From eff0c5c06eb119d7ddc397928fd26228ad436fc0 Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Wed, 6 Jun 2018 13:46:44 -0700 Subject: [PATCH 1688/2269] Revert "proc: Convert proc_mount to use mount_ns." This reverts commit e94591d0d90c13166cb6eb54ce5f96ed13d81b55. This cleanup broke the parsing of procfs mount parameters. Bug: 79705088 Change-Id: I0f07180ef9a994c884abfa269ffb273ee0bcbc0d Signed-off-by: Alistair Strachan --- fs/proc/inode.c | 9 ++------ fs/proc/internal.h | 3 +-- fs/proc/root.c | 52 ++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 51 insertions(+), 13 deletions(-) diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 225f541f7078c..c5f154be0ab29 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -472,17 +472,12 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) return inode; } -int proc_fill_super(struct super_block *s, void *data, int silent) +int proc_fill_super(struct super_block *s) { - struct pid_namespace *ns = get_pid_ns(s->s_fs_info); struct inode *root_inode; int ret; - if (!proc_parse_options(data, ns)) - return -EINVAL; - - /* User space would break if executables or devices appear on proc */ - s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV; + s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NODEV; s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC; s->s_blocksize = 1024; s->s_blocksize_bits = 10; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 5e55186858c96..02e0d9e9b3fed 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -203,7 +203,7 @@ extern const struct inode_operations proc_pid_link_inode_operations; extern void proc_init_inodecache(void); void set_proc_pid_nlink(void); extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); -extern int proc_fill_super(struct super_block *, void *data, int flags); +extern int proc_fill_super(struct super_block *); extern void proc_entry_rundown(struct proc_dir_entry *); /* @@ -270,7 +270,6 @@ static inline void proc_tty_init(void) {} * root.c */ extern struct proc_dir_entry proc_root; -extern int proc_parse_options(char *options, struct pid_namespace *pid); extern void proc_self_init(void); extern int proc_remount(struct super_block *, int *, char *); diff --git a/fs/proc/root.c b/fs/proc/root.c index eafe87e010eca..ef6840c64e95a 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -26,6 +26,21 @@ #include "internal.h" +static int proc_test_super(struct super_block *sb, void *data) +{ + return sb->s_fs_info == data; +} + +static int proc_set_super(struct super_block *sb, void *data) +{ + int err = set_anon_super(sb, NULL); + if (!err) { + struct pid_namespace *ns = (struct pid_namespace *)data; + sb->s_fs_info = get_pid_ns(ns); + } + return err; +} + enum { Opt_gid, Opt_hidepid, Opt_err, }; @@ -36,7 +51,7 @@ static const match_table_t tokens = { {Opt_err, NULL}, }; -int proc_parse_options(char *options, struct pid_namespace *pid) +static int proc_parse_options(char *options, struct pid_namespace *pid) { char *p; substring_t args[MAX_OPT_ARGS]; @@ -89,16 +104,45 @@ int proc_remount(struct super_block *sb, int *flags, char *data) static struct dentry *proc_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { + int err; + struct super_block *sb; struct pid_namespace *ns; + char *options; if (flags & MS_KERNMOUNT) { - ns = data; - data = NULL; + ns = (struct pid_namespace *)data; + options = NULL; } else { ns = task_active_pid_ns(current); + options = data; + + /* Does the mounter have privilege over the pid namespace? */ + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + } + + sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns); + if (IS_ERR(sb)) + return ERR_CAST(sb); + + if (!proc_parse_options(options, ns)) { + deactivate_locked_super(sb); + return ERR_PTR(-EINVAL); + } + + if (!sb->s_root) { + err = proc_fill_super(sb); + if (err) { + deactivate_locked_super(sb); + return ERR_PTR(err); + } + + sb->s_flags |= MS_ACTIVE; + /* User space would break if executables appear on proc */ + sb->s_iflags |= SB_I_NOEXEC; } - return mount_ns(fs_type, flags, data, ns, ns->user_ns, proc_fill_super); + return dget(sb->s_root); } static void proc_kill_sb(struct super_block *sb) -- GitLab From d9fe221bbf84cf096f2977bb0c9fb28662e3885b Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 25 Oct 2018 16:22:50 -0700 Subject: [PATCH 1689/2269] ANDROID: sdcardfs: Add sandbox Android/sandbox is treated the same as Android/data Bug: 27915347 Test: ls -l /sdcard/Android/sandbox/*somepackage* after creating the folder. Signed-off-by: Daniel Rosenberg Change-Id: I7ef440a88df72198303c419e1f2f7c4657f9c170 --- fs/sdcardfs/derived_perm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c index 0b3b22334e54a..ea0d9900eb6a4 100644 --- a/fs/sdcardfs/derived_perm.c +++ b/fs/sdcardfs/derived_perm.c @@ -62,6 +62,7 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, int err; struct qstr q_Android = QSTR_LITERAL("Android"); struct qstr q_data = QSTR_LITERAL("data"); + struct qstr q_sandbox = QSTR_LITERAL("sandbox"); struct qstr q_obb = QSTR_LITERAL("obb"); struct qstr q_media = QSTR_LITERAL("media"); struct qstr q_cache = QSTR_LITERAL("cache"); @@ -110,6 +111,9 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, if (qstr_case_eq(name, &q_data)) { /* App-specific directories inside; let anyone traverse */ info->data->perm = PERM_ANDROID_DATA; + } else if (qstr_case_eq(name, &q_sandbox)) { + /* App-specific directories inside; let anyone traverse */ + info->data->perm = PERM_ANDROID_DATA; } else if (qstr_case_eq(name, &q_obb)) { /* App-specific directories inside; let anyone traverse */ info->data->perm = PERM_ANDROID_OBB; -- GitLab From f544ad0b154702daed947248cf75b840181c1aa2 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 25 Oct 2018 16:25:15 -0700 Subject: [PATCH 1690/2269] ANDROID: sdcardfs: Add option to not link obb Add mount option unshared_obb to not link the obb folders of multiple users together. Bug: 27915347 Test: mount with option. Check if altering one obb alters the other Signed-off-by: Daniel Rosenberg Change-Id: I3956e06bd0a222b0bbb2768c9a8a8372ada85e1e --- fs/sdcardfs/derived_perm.c | 3 ++- fs/sdcardfs/main.c | 10 +++++++++- fs/sdcardfs/sdcardfs.h | 1 + 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c index ea0d9900eb6a4..78a669c8a4d62 100644 --- a/fs/sdcardfs/derived_perm.c +++ b/fs/sdcardfs/derived_perm.c @@ -360,7 +360,8 @@ int need_graft_path(struct dentry *dentry) struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); struct qstr obb = QSTR_LITERAL("obb"); - if (parent_info->data->perm == PERM_ANDROID && + if (!sbi->options.unshared_obb && + parent_info->data->perm == PERM_ANDROID && qstr_case_eq(&dentry->d_name, &obb)) { /* /Android/obb is the base obbpath of DERIVED_UNIFIED */ diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index ba52af8644cc8..d890c57119078 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -35,6 +35,7 @@ enum { Opt_gid_derivation, Opt_default_normal, Opt_nocache, + Opt_unshared_obb, Opt_err, }; @@ -48,6 +49,7 @@ static const match_table_t sdcardfs_tokens = { {Opt_multiuser, "multiuser"}, {Opt_gid_derivation, "derive_gid"}, {Opt_default_normal, "default_normal"}, + {Opt_unshared_obb, "unshared_obb"}, {Opt_reserved_mb, "reserved_mb=%u"}, {Opt_nocache, "nocache"}, {Opt_err, NULL} @@ -134,6 +136,9 @@ static int parse_options(struct super_block *sb, char *options, int silent, case Opt_nocache: opts->nocache = true; break; + case Opt_unshared_obb: + opts->unshared_obb = true; + break; /* unknown option */ default: if (!silent) @@ -187,13 +192,16 @@ int parse_options_remount(struct super_block *sb, char *options, int silent, return 0; vfsopts->mask = option; break; + case Opt_unshared_obb: case Opt_default_normal: case Opt_multiuser: case Opt_userid: case Opt_fsuid: case Opt_fsgid: case Opt_reserved_mb: - pr_warn("Option \"%s\" can't be changed during remount\n", p); + case Opt_gid_derivation: + if (!silent) + pr_warn("Option \"%s\" can't be changed during remount\n", p); break; /* unknown option */ default: diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index c2a16a023f2cf..6219771ed71c3 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -197,6 +197,7 @@ struct sdcardfs_mount_options { bool multiuser; bool gid_derivation; bool default_normal; + bool unshared_obb; unsigned int reserved_mb; bool nocache; }; -- GitLab From 9ff6ab2fbc0052a9e81d1eeb168cf78832dcbb6b Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Tue, 25 Oct 2016 13:59:59 -0700 Subject: [PATCH 1691/2269] ANDROID: Kbuild, LLVMLinux: allow overriding clang target triple Android has an unusual setup where the kernel needs to target [arch]-linux-gnu to avoid Android userspace-specific flags and optimizations, but AOSP doesn't ship a matching binutils. Add a new variable CLANG_TRIPLE which can override the "-target" triple used to compile the kernel, while using a different CROSS_COMPILE to pick the binutils/gcc installation. For Android you'd do something like: export CLANG_TRIPLE=aarch64-linux-gnu- export CROSS_COMPILE=aarch64-linux-android- If you don't need something like this, leave CLANG_TRIPLE unset and it will default to CROSS_COMPILE. Change-Id: I85d63599c6ab8ed458071cdf9197d85b1f7f150b Signed-off-by: Greg Hackmann [astrachan: Added a script to check for incorrectly falling back to the default when CLANG_TRIPLE is unset] Bug: 118439987 Signed-off-by: Alistair Strachan --- Makefile | 6 +++++- scripts/clang-android.sh | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100755 scripts/clang-android.sh diff --git a/Makefile b/Makefile index e7585b87582b4..663373f80220a 100644 --- a/Makefile +++ b/Makefile @@ -480,7 +480,11 @@ endif ifeq ($(cc-name),clang) ifneq ($(CROSS_COMPILE),) -CLANG_TARGET := --target=$(notdir $(CROSS_COMPILE:%-=%)) +CLANG_TRIPLE ?= $(CROSS_COMPILE) +CLANG_TARGET := --target=$(notdir $(CLANG_TRIPLE:%-=%)) +ifeq ($(shell $(srctree)/scripts/clang-android.sh $(CC) $(CLANG_TARGET)), y) +$(error "Clang with Android --target detected. Did you specify CLANG_TRIPLE?") +endif GCC_TOOLCHAIN := $(realpath $(dir $(shell which $(LD)))/..) endif ifneq ($(GCC_TOOLCHAIN),) diff --git a/scripts/clang-android.sh b/scripts/clang-android.sh new file mode 100755 index 0000000000000..9186c4f48576e --- /dev/null +++ b/scripts/clang-android.sh @@ -0,0 +1,4 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +$* -dM -E - &1 | grep -q __ANDROID__ && echo "y" -- GitLab From def7f54726120e29482b49c79846f75ae432294c Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Wed, 24 Oct 2018 17:24:14 -0700 Subject: [PATCH 1692/2269] ANDROID: arm64 defconfig / build config for cuttlefish Add an arm64 defconfig with the Android and cuttlefish feature sets merged in. This has been boot tested only on the QEMU virt model for AArch64. $ qemu-system-aarch64 -M virt -cpu cortex-a57 \ -kernel arch/arm64/boot/Image.gz -serial stdio Bug: 118442619 Change-Id: I99f3b78af85de8d051226f202351bd852a032248 Signed-off-by: Alistair Strachan --- arch/arm64/configs/cuttlefish_defconfig | 430 ++++++++++++++++++++++++ build.config.cuttlefish.aarch64 | 16 + 2 files changed, 446 insertions(+) create mode 100644 arch/arm64/configs/cuttlefish_defconfig create mode 100644 build.config.cuttlefish.aarch64 diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig new file mode 100644 index 0000000000000..a5c6506f4bbb3 --- /dev/null +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -0,0 +1,430 @@ +# CONFIG_FHANDLE is not set +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_MEMCG=y +CONFIG_MEMCG_SWAP=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CPUSETS=y +# CONFIG_PROC_PID_CPUSET is not set +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_BPF=y +CONFIG_SCHED_AUTOGROUP=y +CONFIG_SCHED_TUNE=y +CONFIG_DEFAULT_USE_ENERGY_AWARE=y +CONFIG_BLK_DEV_INITRD=y +# CONFIG_RD_BZIP2 is not set +# CONFIG_RD_LZMA is not set +# CONFIG_RD_XZ is not set +# CONFIG_RD_LZO is not set +# CONFIG_RD_LZ4 is not set +CONFIG_SGETMASK_SYSCALL=y +# CONFIG_SYSFS_SYSCALL is not set +CONFIG_KALLSYMS_ALL=y +CONFIG_BPF_SYSCALL=y +CONFIG_EMBEDDED=y +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_COMPAT_BRK is not set +# CONFIG_SLAB_MERGE_DEFAULT is not set +CONFIG_PROFILING=y +CONFIG_KPROBES=y +CONFIG_JUMP_LABEL=y +CONFIG_CC_STACKPROTECTOR_STRONG=y +CONFIG_REFCOUNT_FULL=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_PCI=y +CONFIG_PCI_HOST_GENERIC=y +CONFIG_PREEMPT=y +CONFIG_HZ_100=y +# CONFIG_SPARSEMEM_VMEMMAP is not set +CONFIG_KSM=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_ZSMALLOC=y +CONFIG_SECCOMP=y +CONFIG_PARAVIRT=y +CONFIG_ARMV8_DEPRECATED=y +CONFIG_SWP_EMULATION=y +CONFIG_CP15_BARRIER_EMULATION=y +CONFIG_SETEND_EMULATION=y +CONFIG_ARM64_SW_TTBR0_PAN=y +CONFIG_RANDOMIZE_BASE=y +CONFIG_CMDLINE="console=ttyAMA0" +CONFIG_CMDLINE_EXTEND=y +# CONFIG_EFI is not set +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_COMPAT=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +# CONFIG_PM_WAKELOCKS_GC is not set +CONFIG_PM_DEBUG=y +CONFIG_CPU_IDLE=y +CONFIG_ARM_CPUIDLE=y +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y +CONFIG_CPU_FREQ_GOV_POWERSAVE=y +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +CONFIG_CPUFREQ_DT=y +CONFIG_ARM_BIG_LITTLE_CPUFREQ=y +CONFIG_ARM_DT_BL_CPUFREQ=y +CONFIG_ARM_SCPI_CPUFREQ=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_NET_IPGRE_DEMUX=y +CONFIG_NET_IPVTI=y +CONFIG_INET_ESP=y +# CONFIG_INET_XFRM_MODE_BEET is not set +CONFIG_INET_UDP_DIAG=y +CONFIG_INET_DIAG_DESTROY=y +CONFIG_TCP_CONG_ADVANCED=y +# CONFIG_TCP_CONG_BIC is not set +# CONFIG_TCP_CONG_WESTWOOD is not set +# CONFIG_TCP_CONG_HTCP is not set +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_VTI=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_BPF=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_SOCKET_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_NAT=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_IP_NF_TARGET_NETMAP=y +CONFIG_IP_NF_TARGET_REDIRECT=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_NF_SOCKET_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MATCH_RPFILTER=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_L2TP=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_CLS_ACT=y +CONFIG_CFG80211=y +# CONFIG_CFG80211_DEFAULT_PS is not set +CONFIG_MAC80211=y +# CONFIG_MAC80211_RC_MINSTREL is not set +CONFIG_RFKILL=y +# CONFIG_UEVENT_HELPER is not set +CONFIG_DEVTMPFS=y +# CONFIG_ALLOW_DEV_COREDUMP is not set +CONFIG_DEBUG_DEVRES=y +CONFIG_OF_UNITTEST=y +CONFIG_ZRAM=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_VIRTIO_BLK=y +CONFIG_UID_SYS_STATS=y +CONFIG_SCSI=y +# CONFIG_SCSI_PROC_FS is not set +CONFIG_BLK_DEV_SD=y +CONFIG_SCSI_VIRTIO=y +CONFIG_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_CRYPT=y +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y +CONFIG_DM_VERITY_AVB=y +CONFIG_NETDEVICES=y +CONFIG_NETCONSOLE=y +CONFIG_NETCONSOLE_DYNAMIC=y +CONFIG_TUN=y +CONFIG_VIRTIO_NET=y +# CONFIG_ETHERNET is not set +CONFIG_PHYLIB=y +CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_PPTP=y +CONFIG_PPPOL2TP=y +CONFIG_USB_USBNET=y +# CONFIG_USB_NET_AX8817X is not set +# CONFIG_USB_NET_AX88179_178A is not set +# CONFIG_USB_NET_CDCETHER is not set +# CONFIG_USB_NET_CDC_NCM is not set +# CONFIG_USB_NET_NET1080 is not set +# CONFIG_USB_NET_CDC_SUBSET is not set +# CONFIG_USB_NET_ZAURUS is not set +# CONFIG_WLAN_VENDOR_ADMTEK is not set +# CONFIG_WLAN_VENDOR_ATH is not set +# CONFIG_WLAN_VENDOR_ATMEL is not set +# CONFIG_WLAN_VENDOR_BROADCOM is not set +# CONFIG_WLAN_VENDOR_CISCO is not set +# CONFIG_WLAN_VENDOR_INTEL is not set +# CONFIG_WLAN_VENDOR_INTERSIL is not set +# CONFIG_WLAN_VENDOR_MARVELL is not set +# CONFIG_WLAN_VENDOR_MEDIATEK is not set +# CONFIG_WLAN_VENDOR_RALINK is not set +# CONFIG_WLAN_VENDOR_REALTEK is not set +# CONFIG_WLAN_VENDOR_RSI is not set +# CONFIG_WLAN_VENDOR_ST is not set +# CONFIG_WLAN_VENDOR_TI is not set +# CONFIG_WLAN_VENDOR_ZYDAS is not set +# CONFIG_WLAN_VENDOR_QUANTENNA is not set +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_KEYRESET=y +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_INPUT_TABLET=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVMEM is not set +CONFIG_SERIAL_8250=y +# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set +CONFIG_SERIAL_8250_CONSOLE=y +# CONFIG_SERIAL_8250_EXAR is not set +CONFIG_SERIAL_8250_NR_UARTS=48 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_HW_RANDOM=y +CONFIG_HW_RANDOM_VIRTIO=y +# CONFIG_HW_RANDOM_CAVIUM is not set +# CONFIG_DEVPORT is not set +# CONFIG_I2C_COMPAT is not set +# CONFIG_I2C_HELPER_AUTO is not set +# CONFIG_HWMON is not set +CONFIG_THERMAL=y +CONFIG_CPU_THERMAL=y +CONFIG_MEDIA_SUPPORT=y +# CONFIG_VGA_ARB is not set +CONFIG_DRM=y +# CONFIG_DRM_FBDEV_EMULATION is not set +CONFIG_DRM_VIRTIO_GPU=y +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_HIDRAW=y +CONFIG_UHID=y +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_HOLTEK=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_ROCCAT=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_WACOM=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +CONFIG_USB_HIDDEV=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_GADGET=y +CONFIG_USB_CONFIGFS=y +CONFIG_USB_CONFIGFS_F_FS=y +CONFIG_USB_CONFIGFS_F_ACC=y +CONFIG_USB_CONFIGFS_F_AUDIO_SRC=y +CONFIG_USB_CONFIGFS_UEVENT=y +CONFIG_USB_CONFIGFS_F_MIDI=y +CONFIG_MMC=y +# CONFIG_PWRSEQ_EMMC is not set +# CONFIG_PWRSEQ_SIMPLE is not set +# CONFIG_MMC_BLOCK is not set +CONFIG_RTC_CLASS=y +# CONFIG_RTC_HCTOSYS is not set +# CONFIG_RTC_SYSTOHC is not set +CONFIG_RTC_DRV_PL031=y +CONFIG_VIRTIO_PCI=y +# CONFIG_VIRTIO_PCI_LEGACY is not set +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y +CONFIG_STAGING=y +CONFIG_ASHMEM=y +CONFIG_ANDROID_VSOC=y +CONFIG_ION=y +CONFIG_COMMON_CLK_SCPI=y +# CONFIG_COMMON_CLK_XGENE is not set +CONFIG_MAILBOX=y +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ARM_SCPI_PROTOCOL=y +# CONFIG_ARM_SCPI_POWER_DOMAIN is not set +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_EXT4_ENCRYPTION=y +CONFIG_F2FS_FS=y +CONFIG_F2FS_FS_SECURITY=y +CONFIG_F2FS_FS_ENCRYPTION=y +# CONFIG_DNOTIFY is not set +CONFIG_QUOTA=y +CONFIG_QFMT_V2=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_SDCARD_FS=y +CONFIG_PSTORE=y +CONFIG_PSTORE_CONSOLE=y +CONFIG_PSTORE_RAM=y +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_INFO=y +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=1024 +# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_STACK_USAGE=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_SOFTLOCKUP_DETECTOR=y +# CONFIG_DETECT_HUNG_TASK is not set +CONFIG_PANIC_TIMEOUT=5 +CONFIG_SCHEDSTATS=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_ENABLE_DEFAULT_TRACERS=y +CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_LSM_MMAP_MIN_ADDR=65536 +CONFIG_HARDENED_USERCOPY=y +CONFIG_SECURITY_SELINUX=y +CONFIG_CRYPTO_SHA512=y +CONFIG_CRYPTO_LZ4=y +CONFIG_CRYPTO_ZSTD=y +CONFIG_CRYPTO_ANSI_CPRNG=y +CONFIG_CRYPTO_DEV_VIRTIO=y +CONFIG_XZ_DEC=y diff --git a/build.config.cuttlefish.aarch64 b/build.config.cuttlefish.aarch64 new file mode 100644 index 0000000000000..d2dcc966ee68b --- /dev/null +++ b/build.config.cuttlefish.aarch64 @@ -0,0 +1,16 @@ +ARCH=arm64 +BRANCH=android-4.14 +CLANG_TRIPLE=aarch64-linux-gnu- +CROSS_COMPILE=aarch64-linux-androidkernel- +DEFCONFIG=cuttlefish_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +POST_DEFCONFIG_CMDS="check_defconfig" +CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r328903/bin +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/aarch64/aarch64-linux-android-4.9/bin +FILES=" +arch/arm64/boot/Image.gz +vmlinux +System.map +" +STOP_SHIP_TRACEPRINTK=1 -- GitLab From 51164110fa6e8a0326c90f1dd8062f7179840403 Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Wed, 14 Nov 2018 10:46:34 +0000 Subject: [PATCH 1693/2269] BACKPORT: sched/fair: Fix cpu_util_wake() for 'execl' type workloads A performance regression was fixed upstream in util_est, but since 4.14 does not contain util_est, this fix will not be backported to android through any stable updates. Original Commit log: A ~10% regression has been reported for UnixBench's execl throughput test by Aaron Lu and Ye Xiaolong: https://lkml.org/lkml/2018/10/30/765 That test is pretty simple, it does a "recursive" execve() syscall on the same binary. Starting from the syscall, this sequence is possible: do_execve() do_execveat_common() __do_execve_file() sched_exec() select_task_rq_fair() <==| Task already enqueued find_idlest_cpu() find_idlest_group() capacity_spare_wake() <==| Functions not called from cpu_util_wake() | the wakeup path which means we can end up calling cpu_util_wake() not only from the "wakeup path", as its name would suggest. Indeed, the task doing an execve() syscall is already enqueued on the CPU we want to get the cpu_util_wake() for. The estimated utilization for a CPU computed in cpu_util_wake() was written under the assumption that function can be called only from the wakeup path. If instead the task is already enqueued, we end up with a utilization which does not remove the current task's contribution from the estimated utilization of the CPU. This will wrongly assume a reduced spare capacity on the current CPU and increase the chances to migrate the task on execve. The regression is tracked down to: commit d519329f72a6 ("sched/fair: Update util_est only on util_avg updates") because in that patch we turn on by default the UTIL_EST sched feature. However, the real issue is introduced by: commit f9be3e5961c5 ("sched/fair: Use util_est in LB and WU paths") Let's fix this by ensuring to always discount the task estimated utilization from the CPU's estimated utilization when the task is also the current one. The same benchmark of the bug report, executed on a dual socket 40 CPUs Intel(R) Xeon(R) CPU E5-2690 v2 @ 3.00GHz machine, reports these "Execl Throughput" figures (higher the better): mainline : 48136.5 lps mainline+fix : 55376.5 lps which correspond to a 15% speedup. Moreover, since {cpu_util,capacity_spare}_wake() are not really only used from the wakeup path, let's remove this ambiguity by using a better matching name: {cpu_util,capacity_spare}_without(). Since we are at that, let's also improve the existing documentation. Reported-by: Aaron Lu Reported-by: Ye Xiaolong Tested-by: Aaron Lu Signed-off-by: Patrick Bellasi Signed-off-by: Peter Zijlstra (Intel) Cc: Dietmar Eggemann Cc: Juri Lelli Cc: Linus Torvalds Cc: Morten Rasmussen Cc: Peter Zijlstra Cc: Quentin Perret Cc: Steve Muckle Cc: Suren Baghdasaryan Cc: Thomas Gleixner Cc: Todd Kjos Cc: Vincent Guittot Fixes: f9be3e5961c5 (sched/fair: Use util_est in LB and WU paths) Link: https://lore.kernel.org/lkml/20181025093100.GB13236@e110439-lin/ Signed-off-by: Ingo Molnar (backported to android-4.14 from c469933e772132 in tip/sched/urgent) Signed-off-by: Chris Redpath Change-Id: Ifce5d54b73f352d137ea70d1d880a7ea92db2309 --- kernel/sched/fair.c | 68 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 16 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 9fd3bd0ba2fe5..30cb205512627 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5942,10 +5942,19 @@ static inline unsigned long cpu_util_freq(int cpu) } /* - * cpu_util_wake: Compute CPU utilization with any contributions from - * the waking task p removed. + * cpu_util_without: compute cpu utilization without any contributions from *p + * @cpu: the CPU which utilization is requested + * @p: the task which utilization should be discounted + * + * The utilization of a CPU is defined by the utilization of tasks currently + * enqueued on that CPU as well as tasks which are currently sleeping after an + * execution on that CPU. + * + * This method returns the utilization of the specified CPU by discounting the + * utilization of the specified task, whenever the task is currently + * contributing to the CPU utilization. */ -static unsigned long cpu_util_wake(int cpu, struct task_struct *p) +static unsigned long cpu_util_without(int cpu, struct task_struct *p) { struct cfs_rq *cfs_rq; unsigned int util; @@ -5968,7 +5977,7 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p) cfs_rq = &cpu_rq(cpu)->cfs; util = READ_ONCE(cfs_rq->avg.util_avg); - /* Discount task's blocked util from CPU's util */ + /* Discount task's util from CPU's util */ util -= min_t(unsigned int, util, task_util(p)); /* @@ -5977,14 +5986,14 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p) * a) if *p is the only task sleeping on this CPU, then: * cpu_util (== task_util) > util_est (== 0) * and thus we return: - * cpu_util_wake = (cpu_util - task_util) = 0 + * cpu_util_without = (cpu_util - task_util) = 0 * * b) if other tasks are SLEEPING on this CPU, which is now exiting * IDLE, then: * cpu_util >= task_util * cpu_util > util_est (== 0) * and thus we discount *p's blocked utilization to return: - * cpu_util_wake = (cpu_util - task_util) >= 0 + * cpu_util_without = (cpu_util - task_util) >= 0 * * c) if other tasks are RUNNABLE on that CPU and * util_est > cpu_util @@ -5997,8 +6006,33 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p) * covered by the following code when estimated utilization is * enabled. */ - if (sched_feat(UTIL_EST)) - util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued)); + if (sched_feat(UTIL_EST)) { + unsigned int estimated = + READ_ONCE(cfs_rq->avg.util_est.enqueued); + + /* + * Despite the following checks we still have a small window + * for a possible race, when an execl's select_task_rq_fair() + * races with LB's detach_task(): + * + * detach_task() + * p->on_rq = TASK_ON_RQ_MIGRATING; + * ---------------------------------- A + * deactivate_task() \ + * dequeue_task() + RaceTime + * util_est_dequeue() / + * ---------------------------------- B + * + * The additional check on "current == p" it's required to + * properly fix the execl regression and it helps in further + * reducing the chances for the above race. + */ + if (unlikely(task_on_rq_queued(p) || current == p)) { + estimated -= min_t(unsigned int, estimated, + (_task_util_est(p) | UTIL_AVG_UNCHANGED)); + } + util = max(util, estimated); + } /* * Utilization (estimated) can exceed the CPU capacity, thus let's @@ -6015,7 +6049,7 @@ static unsigned long group_max_util(struct energy_env *eenv, int cpu_idx) int cpu; for_each_cpu(cpu, sched_group_span(eenv->sg_cap)) { - util = cpu_util_wake(cpu, eenv->p); + util = cpu_util_without(cpu, eenv->p); /* * If we are looking at the target CPU specified by the eenv, @@ -6048,7 +6082,7 @@ long group_norm_util(struct energy_env *eenv, int cpu_idx) int cpu; for_each_cpu(cpu, sched_group_span(eenv->sg)) { - util = cpu_util_wake(cpu, eenv->p); + util = cpu_util_without(cpu, eenv->p); /* * If we are looking at the target CPU specified by the eenv, @@ -6709,9 +6743,11 @@ boosted_task_util(struct task_struct *task) return util + margin; } -static unsigned long capacity_spare_wake(int cpu, struct task_struct *p) +static unsigned long cpu_util_without(int cpu, struct task_struct *p); + +static unsigned long capacity_spare_without(int cpu, struct task_struct *p) { - return max_t(long, capacity_of(cpu) - cpu_util_wake(cpu, p), 0); + return max_t(long, capacity_of(cpu) - cpu_util_without(cpu, p), 0); } /* @@ -6771,7 +6807,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs); - spare_cap = capacity_spare_wake(i, p); + spare_cap = capacity_spare_without(i, p); if (spare_cap > max_spare_cap) max_spare_cap = spare_cap; @@ -7310,7 +7346,7 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu, * so prev_cpu will receive a negative bias due to the double * accounting. However, the blocked utilization may be zero. */ - wake_util = cpu_util_wake(i, p); + wake_util = cpu_util_without(i, p); new_util = wake_util + task_util_est(p); /* @@ -7767,7 +7803,7 @@ static int find_energy_efficient_cpu(struct sched_domain *sd, * Consider only CPUs where the task is expected to * fit without making the CPU overutilized. */ - spare = capacity_spare_wake(cpu_iter, p); + spare = capacity_spare_without(cpu_iter, p); if (spare * 1024 < capacity_margin * task_util_est(p)) continue; @@ -7963,7 +7999,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f if (sd && !(sd_flag & SD_BALANCE_FORK)) { /* - * We're going to need the task's util for capacity_spare_wake + * We're going to need the task's util for capacity_spare_without * in find_idlest_group. Sync it up to prev_cpu's * last_update_time. */ -- GitLab From ed482390c8dbf51484bfe69d0f4952cf29a4dc6b Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Tue, 27 Nov 2018 15:33:17 -0800 Subject: [PATCH 1694/2269] efi/libstub: arm: support building with clang (commit 41f1c48420709470c51ee0e54b6fb28b956bb4e0 upstream) When building with CONFIG_EFI and CONFIG_EFI_STUB on ARM, the libstub Makefile would use -mno-single-pic-base without checking it was supported by the compiler. As the ARM (32-bit) clang backend does not support this flag, the build would fail. This changes the Makefile to check the compiler's support for -mno-single-pic-base before using it, similar to c1c386681bd7 ("ARM: 8767/1: add support for building ARM kernel with clang"). Signed-off-by: Alistair Strachan Reviewed-by: Stefan Agner Signed-off-by: Ard Biesheuvel [ND: adjusted due to missing commit ce279d374ff3 ("efi/libstub: Only disable stackleak plugin for arm64")] Signed-off-by: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index adaa4a964f0c5..678bc910e080d 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -13,7 +13,8 @@ cflags-$(CONFIG_X86) += -m$(BITS) -D__KERNEL__ -O2 \ cflags-$(CONFIG_ARM64) := $(subst -pg,,$(KBUILD_CFLAGS)) -fpie cflags-$(CONFIG_ARM) := $(subst -pg,,$(KBUILD_CFLAGS)) \ - -fno-builtin -fpic -mno-single-pic-base + -fno-builtin -fpic \ + $(call cc-option,-mno-single-pic-base) cflags-$(CONFIG_EFI_ARMSTUB) += -I$(srctree)/scripts/dtc/libfdt -- GitLab From f1222c5f7f6036cbab74abefdbce0bffe85d3eea Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Tue, 8 May 2018 22:49:49 +0100 Subject: [PATCH 1695/2269] ARM: 8766/1: drop no-thumb-interwork in EABI mode (commit 22905a24306c8c312c2d66da9f90d09af0414f81 upstream) According to GCC documentation -m(no-)thumb-interwork is meaningless in AAPCS configurations. Also clang does not support the flag: clang-5.0: error: unknown argument: '-mno-thumb-interwork' Just drop -mno-thumb-interwork in AEABI configuration. Signed-off-by: Stefan Agner Signed-off-by: Russell King Signed-off-by: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- arch/arm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 36ae4454554ce..17e80f4832816 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -106,7 +106,7 @@ tune-$(CONFIG_CPU_V6K) =$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm) tune-y := $(tune-y) ifeq ($(CONFIG_AEABI),y) -CFLAGS_ABI :=-mabi=aapcs-linux -mno-thumb-interwork -mfpu=vfp +CFLAGS_ABI :=-mabi=aapcs-linux -mfpu=vfp else CFLAGS_ABI :=$(call cc-option,-mapcs-32,-mabi=apcs-gnu) $(call cc-option,-mno-thumb-interwork,) endif -- GitLab From 764ecc201217f676fdc7f0878e74ca034716d9ee Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Tue, 8 May 2018 22:50:38 +0100 Subject: [PATCH 1696/2269] ARM: 8767/1: add support for building ARM kernel with clang (commit c1c386681bd73c4fc28eb5cc91cf8b7be9b409ba upstream) Use cc-options call for compiler options which are not available in clang. With this patch an ARMv7 multi platform kernel can be successfully build using clang (tested with version 5.0.1). Based-on-patches-by: Behan Webster Signed-off-by: Stefan Agner Signed-off-by: Russell King Signed-off-by: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/compressed/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index a5889238fc9fc..746c8c575f987 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -113,7 +113,7 @@ CFLAGS_fdt_ro.o := $(nossp_flags) CFLAGS_fdt_rw.o := $(nossp_flags) CFLAGS_fdt_wip.o := $(nossp_flags) -ccflags-y := -fpic -mno-single-pic-base -fno-builtin -I$(obj) +ccflags-y := -fpic $(call cc-option,-mno-single-pic-base,) -fno-builtin -I$(obj) asflags-y := -DZIMAGE # Supply kernel BSS size to the decompressor via a linker symbol. -- GitLab From 8d7925ad96c682dd1a0b15943a5d545c5191107e Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Tue, 8 May 2018 16:27:26 +0200 Subject: [PATCH 1697/2269] bus: arm-cci: remove unnecessary unreachable() (commit 10d8713429d345867fc8998d6193b233c0cab28c upstream) Mixing asm and C code is not recommended in a naked function by gcc and leads to an error when using clang: drivers/bus/arm-cci.c:2107:2: error: non-ASM statement in naked function is not supported unreachable(); ^ While the function is marked __naked it actually properly return in asm. There is no need for the unreachable() call. GCC 7.2 generates identical object files before and after, other than (for obvious reasons) the line numbers generated by WANT_WARN_ON_SLOWPATH for all the WARN()s appearing later in the file. Suggested-by: Russell King Signed-off-by: Stefan Agner Acked-by: Nicolas Pitre Reviewed-by: Robin Murphy Signed-off-by: Olof Johansson Signed-off-by: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- drivers/bus/arm-cci.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index 5426c04fe24bc..fc2da3a617ac4 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -2103,8 +2103,6 @@ asmlinkage void __naked cci_enable_port_for_self(void) [sizeof_struct_cpu_port] "i" (sizeof(struct cpu_port)), [sizeof_struct_ace_port] "i" (sizeof(struct cci_ace_port)), [offsetof_port_phys] "i" (offsetof(struct cci_ace_port, phys)) ); - - unreachable(); } /** -- GitLab From 5d47e129a7107d6f35ac4ce7f32ff9376b555bfe Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sun, 25 Mar 2018 20:09:56 +0200 Subject: [PATCH 1698/2269] ARM: trusted_foundations: do not use naked function (commit 4ea7bdc6b5b33427bbd3f41c333e21c1825462a3 upstream) As documented in GCC naked functions should only use basic ASM syntax. The extended ASM or mixture of basic ASM and "C" code is not guaranteed. Currently this works because it was hard coded to follow and check GCC behavior for arguments and register placement. Furthermore with clang using parameters in Extended asm in a naked function is not supported: arch/arm/firmware/trusted_foundations.c:47:10: error: parameter references not allowed in naked functions : "r" (type), "r" (arg1), "r" (arg2) ^ Use a regular function to be more portable. This aligns also with the other SMC call implementations e.g. in qcom_scm-32.c and bcm_kona_smc.c. Cc: Dmitry Osipenko Cc: Stephen Warren Cc: Thierry Reding Signed-off-by: Stefan Agner Signed-off-by: Thierry Reding Signed-off-by: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- arch/arm/firmware/trusted_foundations.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/arm/firmware/trusted_foundations.c b/arch/arm/firmware/trusted_foundations.c index 3fb1b5a1dce96..689e6565abfc4 100644 --- a/arch/arm/firmware/trusted_foundations.c +++ b/arch/arm/firmware/trusted_foundations.c @@ -31,21 +31,25 @@ static unsigned long cpu_boot_addr; -static void __naked tf_generic_smc(u32 type, u32 arg1, u32 arg2) +static void tf_generic_smc(u32 type, u32 arg1, u32 arg2) { + register u32 r0 asm("r0") = type; + register u32 r1 asm("r1") = arg1; + register u32 r2 asm("r2") = arg2; + asm volatile( ".arch_extension sec\n\t" - "stmfd sp!, {r4 - r11, lr}\n\t" + "stmfd sp!, {r4 - r11}\n\t" __asmeq("%0", "r0") __asmeq("%1", "r1") __asmeq("%2", "r2") "mov r3, #0\n\t" "mov r4, #0\n\t" "smc #0\n\t" - "ldmfd sp!, {r4 - r11, pc}" + "ldmfd sp!, {r4 - r11}\n\t" : - : "r" (type), "r" (arg1), "r" (arg2) - : "memory"); + : "r" (r0), "r" (r1), "r" (r2) + : "memory", "r3", "r12", "lr"); } static int tf_set_cpu_boot_addr(int cpu, unsigned long boot_addr) -- GitLab From 427d4e4c27442ed2329a9672f65af49cd5a56389 Mon Sep 17 00:00:00 2001 From: Dennis Wassenberg Date: Tue, 13 Nov 2018 14:40:34 +0100 Subject: [PATCH 1699/2269] usb: core: Fix hub port connection events lost commit 22454b79e6de05fa61a2a72d00d2eed798abbb75 upstream. This will clear the USB_PORT_FEAT_C_CONNECTION bit in case of a hub port reset only if a device is was attached to the hub port before resetting the hub port. Using a Lenovo T480s attached to the ultra dock it was not possible to detect some usb-c devices at the dock usb-c ports because the hub_port_reset code will clear the USB_PORT_FEAT_C_CONNECTION bit after the actual hub port reset. Using this device combo the USB_PORT_FEAT_C_CONNECTION bit was set between the actual hub port reset and the clear of the USB_PORT_FEAT_C_CONNECTION bit. This ends up with clearing the USB_PORT_FEAT_C_CONNECTION bit after the new device was attached such that it was not detected. This patch will not clear the USB_PORT_FEAT_C_CONNECTION bit if there is currently no device attached to the port before the hub port reset. This will avoid clearing the connection bit for new attached devices. Signed-off-by: Dennis Wassenberg Acked-by: Mathias Nyman Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index a9db0887edca5..638dc6f66d70d 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2815,7 +2815,9 @@ static int hub_port_reset(struct usb_hub *hub, int port1, USB_PORT_FEAT_C_BH_PORT_RESET); usb_clear_port_feature(hub->hdev, port1, USB_PORT_FEAT_C_PORT_LINK_STATE); - usb_clear_port_feature(hub->hdev, port1, + + if (udev) + usb_clear_port_feature(hub->hdev, port1, USB_PORT_FEAT_C_CONNECTION); /* -- GitLab From 47631d837acccf5532a32b47943621ce62a3a477 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Wed, 1 Aug 2018 09:37:34 +0300 Subject: [PATCH 1700/2269] usb: dwc3: gadget: fix ISOC TRB type on unaligned transfers commit 2fc6d4be35fb1e262f209758e25bfe2b7a113a7f upstream. When chaining ISOC TRBs together, only the first ISOC TRB should be of type ISOC_FIRST, all others should be of type ISOC. This patch fixes that. Fixes: c6267a51639b ("usb: dwc3: gadget: align transfers to wMaxPacketSize") Cc: # v4.11+ Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index d7fae66a06811..e28d2904e3a36 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1088,7 +1088,7 @@ static void dwc3_prepare_one_trb_sg(struct dwc3_ep *dep, /* Now prepare one extra TRB to align transfer size */ trb = &dep->trb_pool[dep->trb_enqueue]; __dwc3_prepare_one_trb(dep, trb, dwc->bounce_addr, - maxp - rem, false, 0, + maxp - rem, false, 1, req->request.stream_id, req->request.short_not_ok, req->request.no_interrupt); @@ -1120,7 +1120,7 @@ static void dwc3_prepare_one_trb_linear(struct dwc3_ep *dep, /* Now prepare one extra TRB to align transfer size */ trb = &dep->trb_pool[dep->trb_enqueue]; __dwc3_prepare_one_trb(dep, trb, dwc->bounce_addr, maxp - rem, - false, 0, req->request.stream_id, + false, 1, req->request.stream_id, req->request.short_not_ok, req->request.no_interrupt); } else if (req->request.zero && req->request.length && @@ -1136,7 +1136,7 @@ static void dwc3_prepare_one_trb_linear(struct dwc3_ep *dep, /* Now prepare one extra TRB to handle ZLP */ trb = &dep->trb_pool[dep->trb_enqueue]; __dwc3_prepare_one_trb(dep, trb, dwc->bounce_addr, 0, - false, 0, req->request.stream_id, + false, 1, req->request.stream_id, req->request.short_not_ok, req->request.no_interrupt); } else { -- GitLab From 474e39f53816ef70224cdb6fae27893d679fb65f Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 2 Aug 2018 20:17:16 -0700 Subject: [PATCH 1701/2269] usb: dwc3: gadget: Properly check last unaligned/zero chain TRB commit ba3a51ac32ebcf8d0a54b37f1af268ad8a31c52f upstream. Current check for the last extra TRB for zero and unaligned transfers does not account for isoc OUT. The last TRB of the Buffer Descriptor for isoc OUT transfers will be retired with HWO=0. As a result, we won't return early. The req->remaining will be updated to include the BUFSIZ count of the extra TRB, and the actual number of transferred bytes calculation will be wrong. To fix this, check whether it's a short or zero packet and the last TRB chain bit to return early. Fixes: c6267a51639b ("usb: dwc3: gadget: align transfers to wMaxPacketSize") Cc: Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index e28d2904e3a36..ac8d619ff8876 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2249,7 +2249,7 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep, * with one TRB pending in the ring. We need to manually clear HWO bit * from that TRB. */ - if ((req->zero || req->unaligned) && (trb->ctrl & DWC3_TRB_CTRL_HWO)) { + if ((req->zero || req->unaligned) && !(trb->ctrl & DWC3_TRB_CTRL_CHN)) { trb->ctrl &= ~DWC3_TRB_CTRL_HWO; return 1; } -- GitLab From 2c3e97edbb19a4a47e1aa47101b1e2553e3bdad0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 27 Aug 2018 18:30:16 +0300 Subject: [PATCH 1702/2269] usb: dwc3: core: Clean up ULPI device commit 08fd9a82fda86529bb2f2af3c2f7cb657b4d3066 upstream. If dwc3_core_init_mode() fails with deferred probe, next probe fails on sysfs with sysfs: cannot create duplicate filename '/devices/pci0000:00/0000:00:11.0/dwc3.0.auto/dwc3.0.auto.ulpi' To avoid this failure, clean up ULPI device. Cc: Signed-off-by: Andy Shevchenko Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 8b323a360e037..783d16a534661 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1276,6 +1276,7 @@ static int dwc3_probe(struct platform_device *pdev) err5: dwc3_event_buffers_cleanup(dwc); + dwc3_ulpi_exit(dwc); err4: dwc3_free_scratch_buffers(dwc); -- GitLab From 0902c4e7d6dbdeaa73fa6fc635391624de6e9a5f Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 9 Nov 2018 17:21:19 +0200 Subject: [PATCH 1703/2269] xhci: Add check for invalid byte size error when UAS devices are connected. commit d9193efba84fe4c4aa22a569fade5e6ca971f8af upstream. Observed "TRB completion code (27)" error which corresponds to Stopped - Length Invalid error(xhci spec section 4.17.4) while connecting USB to SATA bridge. Looks like this case was not considered when the following patch[1] was committed. Hence adding this new check which can prevent the invalid byte size error. [1] ade2e3a xhci: handle transfer events without TRB pointer Cc: Signed-off-by: Sandeep Singh cc: Nehal Shah cc: Shyam Sundar S K Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 6996235e34a95..2d64cb024d002 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2335,6 +2335,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, goto cleanup; case COMP_RING_UNDERRUN: case COMP_RING_OVERRUN: + case COMP_STOPPED_LENGTH_INVALID: goto cleanup; default: xhci_err(xhci, "ERROR Transfer event for unknown stream ring slot %u ep %u\n", -- GitLab From 494427532654fc2b3c6791bdd997587643717684 Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Fri, 9 Nov 2018 17:21:21 +0200 Subject: [PATCH 1704/2269] usb: xhci: fix timeout for transition from RExit to U0 commit a5baeaeabcca3244782a9b6382ebab6f8a58f583 upstream. This definition is used by msecs_to_jiffies in milliseconds. According to the comments, max rexit timeout should be 20ms. Align with the comments to properly calculate the delay. Verified on Sunrise Point-LP and Cannon Lake. Cc: stable@vger.kernel.org Signed-off-by: Aaron Ma Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 4 ++-- drivers/usb/host/xhci.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index d2a9767a8e9c5..4f612b5d0f944 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -941,7 +941,7 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd, time_left = wait_for_completion_timeout( &bus_state->rexit_done[wIndex], msecs_to_jiffies( - XHCI_MAX_REXIT_TIMEOUT)); + XHCI_MAX_REXIT_TIMEOUT_MS)); spin_lock_irqsave(&xhci->lock, flags); if (time_left) { @@ -955,7 +955,7 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd, } else { int port_status = readl(port_array[wIndex]); xhci_warn(xhci, "Port resume took longer than %i msec, port status = 0x%x\n", - XHCI_MAX_REXIT_TIMEOUT, + XHCI_MAX_REXIT_TIMEOUT_MS, port_status); status |= USB_PORT_STAT_SUSPEND; clear_bit(wIndex, &bus_state->rexit_ports); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 11232e62b8985..29dc6160c9ebe 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1684,7 +1684,7 @@ struct xhci_bus_state { * It can take up to 20 ms to transition from RExit to U0 on the * Intel Lynx Point LP xHCI host. */ -#define XHCI_MAX_REXIT_TIMEOUT (20 * 1000) +#define XHCI_MAX_REXIT_TIMEOUT_MS 20 static inline unsigned int hcd_index(struct usb_hcd *hcd) { -- GitLab From 1129534f73336addfcea53a65c7847d1aa1d2175 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 9 Nov 2018 11:59:45 +0100 Subject: [PATCH 1705/2269] ALSA: oss: Use kvzalloc() for local buffer allocations commit 65766ee0bf7fe8b3be80e2e1c3ef54ad59b29476 upstream. PCM OSS layer may allocate a few temporary buffers, one for the core read/write and another for the conversions via plugins. Currently both are allocated via vmalloc(). But as the allocation size is equivalent with the PCM period size, the required size might be quite small, depending on the application. This patch replaces these vmalloc() calls with kvzalloc() for covering small period sizes better. Also, we use "z"-alloc variant here for addressing the possible uninitialized access reported by syzkaller. Reported-by: syzbot+1cb36954e127c98dd037@syzkaller.appspotmail.com Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/oss/pcm_oss.c | 6 +++--- sound/core/oss/pcm_plugin.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index b4f954e6d2db7..df358e838b5bd 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -1062,8 +1062,8 @@ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream) runtime->oss.channels = params_channels(params); runtime->oss.rate = params_rate(params); - vfree(runtime->oss.buffer); - runtime->oss.buffer = vmalloc(runtime->oss.period_bytes); + kvfree(runtime->oss.buffer); + runtime->oss.buffer = kvzalloc(runtime->oss.period_bytes, GFP_KERNEL); if (!runtime->oss.buffer) { err = -ENOMEM; goto failure; @@ -2328,7 +2328,7 @@ static void snd_pcm_oss_release_substream(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime; runtime = substream->runtime; - vfree(runtime->oss.buffer); + kvfree(runtime->oss.buffer); runtime->oss.buffer = NULL; #ifdef CONFIG_SND_PCM_OSS_PLUGINS snd_pcm_oss_plugin_clear(substream); diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c index 85a56af104bd6..617845d4a811b 100644 --- a/sound/core/oss/pcm_plugin.c +++ b/sound/core/oss/pcm_plugin.c @@ -66,8 +66,8 @@ static int snd_pcm_plugin_alloc(struct snd_pcm_plugin *plugin, snd_pcm_uframes_t return -ENXIO; size /= 8; if (plugin->buf_frames < frames) { - vfree(plugin->buf); - plugin->buf = vmalloc(size); + kvfree(plugin->buf); + plugin->buf = kvzalloc(size, GFP_KERNEL); plugin->buf_frames = frames; } if (!plugin->buf) { @@ -191,7 +191,7 @@ int snd_pcm_plugin_free(struct snd_pcm_plugin *plugin) if (plugin->private_free) plugin->private_free(plugin); kfree(plugin->buf_channels); - vfree(plugin->buf); + kvfree(plugin->buf); kfree(plugin); return 0; } -- GitLab From d34af066f084defffd77f33778217fef8889e063 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 15 Nov 2018 15:03:24 -0800 Subject: [PATCH 1706/2269] MAINTAINERS: Add Sasha as a stable branch maintainer commit cb5d21946d2a2f4687c482ab4604af1d29dac35a upstream. Sasha has somehow been convinced into helping me with the stable kernel maintenance. Codify this slip in good judgement before he realizes what he really signed up for :) Signed-off-by: Greg Kroah-Hartman Acked-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 546beb6b01762..6cb70b8533237 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12662,6 +12662,7 @@ F: arch/alpha/kernel/srm_env.c STABLE BRANCH M: Greg Kroah-Hartman +M: Sasha Levin L: stable@vger.kernel.org S: Supported F: Documentation/process/stable-kernel-rules.rst -- GitLab From 7c9f55c5a58a8a7131e1f5a82a7f85bf06c62a63 Mon Sep 17 00:00:00 2001 From: Rajat Jain Date: Mon, 29 Oct 2018 15:17:01 -0700 Subject: [PATCH 1707/2269] mmc: sdhci-pci: Try "cd" for card-detect lookup before using NULL commit cdcefe6bd9df754f528ffc339d3cc143cea4ddf6 upstream. Problem: The card detect IRQ does not work with modern BIOS (that want to use _DSD to provide the card detect GPIO to the driver). Details: The mmc core provides the mmc_gpiod_request_cd() API to let host drivers request the gpio descriptor for the "card detect" pin. This pin is specified in the ACPI for the SDHC device: * Either as a resource using _CRS. This is a method used by legacy BIOS. (The driver needs to tell which resource index). * Or as a named property ("cd-gpios"/"cd-gpio") in _DSD (which internally points to an entry in _CRS). This way, the driver can lookup using a string. This is what modern BIOS prefer to use. This API finally results in a call to the following code: struct gpio_desc *acpi_find_gpio(..., const char *con_id,...) { ... /* Lookup gpio (using "-gpio") in the _DSD */ ... if (!acpi_can_fallback_to_crs(adev, con_id)) return ERR_PTR(-ENOENT); ... /* Falling back to _CRS is allowed, Lookup gpio in the _CRS */ ... } Note that this means that if the ACPI has _DSD properties, the kernel will never use _CRS for the lookup (Because acpi_can_fallback_to_crs() will always be false for any device hat has _DSD entries). The SDHCI driver is thus currently broken on a modern BIOS, even if BIOS provides both _CRS (for index based lookup) and _DSD entries (for string based lookup). Ironically, none of these will be used for the lookup currently because: * Since the con_id is NULL, acpi_find_gpio() does not find a matching entry in DSDT. (The _DSDT entry has the property name = "cd-gpios") * Because ACPI contains DSDT entries, thus acpi_can_fallback_to_crs() returns false (because device properties have been populated from _DSD), thus the _CRS is never used for the lookup. Fix: Try "cd" for lookup in the _DSD before falling back to using NULL so as to try looking up in the _CRS. I've tested this patch successfully with both Legacy BIOS (that provide only _CRS method) as well as modern BIOS (that provide both _CRS and _DSD). Also the use of "cd" appears to be fairly consistent across other users of this API (other MMC host controller drivers). Link: https://lkml.org/lkml/2018/9/25/1113 Signed-off-by: Rajat Jain Acked-by: Adrian Hunter Fixes: f10e4bf6632b ("gpio: acpi: Even more tighten up ACPI GPIO lookups") Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-pci-core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 44da037b13ba1..0e386f5cc8365 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -1607,8 +1607,13 @@ static struct sdhci_pci_slot *sdhci_pci_probe_slot( host->mmc->caps2 |= MMC_CAP2_NO_PRESCAN_POWERUP; if (slot->cd_idx >= 0) { - ret = mmc_gpiod_request_cd(host->mmc, NULL, slot->cd_idx, + ret = mmc_gpiod_request_cd(host->mmc, "cd", slot->cd_idx, slot->cd_override_level, 0, NULL); + if (ret && ret != -EPROBE_DEFER) + ret = mmc_gpiod_request_cd(host->mmc, NULL, + slot->cd_idx, + slot->cd_override_level, + 0, NULL); if (ret == -EPROBE_DEFER) goto remove; -- GitLab From 6a5f25e30045bb2c3622136807d76be3bd4fb0ba Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Fri, 2 Nov 2018 15:39:43 +0200 Subject: [PATCH 1708/2269] gpio: don't free unallocated ida on gpiochip_add_data_with_key() error path commit a05a14049999598a3bb6fab12db6b768a0215522 upstream. The change corrects the error path in gpiochip_add_data_with_key() by avoiding to call ida_simple_remove(), if ida_simple_get() returns an error. Note that ida_simple_remove()/ida_free() throws a BUG(), if id argument is negative, it allows to easily check the correctness of the fix by fuzzing the return value from ida_simple_get(). Fixes: ff2b13592299 ("gpio: make the gpiochip a real device") Cc: stable@vger.kernel.org # v4.6+ Signed-off-by: Vladimir Zapolskiy Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 7d5de4ef4f22b..21062cb6b85f4 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1166,7 +1166,7 @@ int gpiochip_add_data(struct gpio_chip *chip, void *data) gdev->descs = kcalloc(chip->ngpio, sizeof(gdev->descs[0]), GFP_KERNEL); if (!gdev->descs) { status = -ENOMEM; - goto err_free_gdev; + goto err_free_ida; } if (chip->ngpio == 0) { @@ -1298,8 +1298,9 @@ err_free_label: kfree(gdev->label); err_free_descs: kfree(gdev->descs); -err_free_gdev: +err_free_ida: ida_simple_remove(&gpio_ida, gdev->id); +err_free_gdev: /* failures here can mean systems won't boot... */ pr_err("%s: GPIOs %d..%d (%s) failed to register\n", __func__, gdev->base, gdev->base + gdev->ngpio - 1, -- GitLab From bc176d7a68bef23e1db1b0ec6e4ad4d75aa63958 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 16 Aug 2018 13:25:48 +0300 Subject: [PATCH 1709/2269] iwlwifi: mvm: support sta_statistics() even on older firmware commit ec484d03ef0df8d34086b95710e355a259cbe1f2 upstream. The oldest firmware supported by iwlmvm do support getting the average beacon RSSI. Enable the sta_statistics() call from mac80211 even on older firmware versions. Fixes: 33cef9256342 ("iwlwifi: mvm: support beacon statistics for BSS client") Cc: stable@vger.kernel.org # 4.2+ Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 80a653950e86b..f161fa55d2021 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -4189,10 +4189,6 @@ static void iwl_mvm_mac_sta_statistics(struct ieee80211_hw *hw, sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL_AVG); } - if (!fw_has_capa(&mvm->fw->ucode_capa, - IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS)) - return; - /* if beacon filtering isn't on mac80211 does it anyway */ if (!(vif->driver_flags & IEEE80211_VIF_BEACON_FILTER)) return; -- GitLab From b58f0659f7c6c9957a528aa15cb0189055570672 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 3 Oct 2018 11:16:54 +0300 Subject: [PATCH 1710/2269] iwlwifi: mvm: fix regulatory domain update when the firmware starts commit 82715ac71e6b94a2c2136e31f3a8e6748e33aa8c upstream. When the firmware starts, it doesn't have any regulatory information, hence it uses the world wide limitations. The driver can feed the firmware with previous knowledge that was kept in the driver, but the firmware may still not update its internal tables. This happens when we start a BSS interface, and then the firmware can change the regulatory tables based on our location and it'll use more lenient, location specific rules. Then, if the firmware is shut down (when the interface is brought down), and then an AP interface is created, the firmware will forget the country specific rules. The host will think that we are in a certain country that may allow channels and will try to teach the firmware about our location, but the firmware may still not allow to drop the world wide limitations and apply country specific rules because it was just re-started. In this case, the firmware will reply with MCC_RESP_ILLEGAL to the MCC_UPDATE_CMD. In that case, iwlwifi needs to let the upper layers (cfg80211 / hostapd) know that the channel list they know about has been updated. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=201105 Cc: stable@vger.kernel.org Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 8 ++++++-- drivers/net/wireless/intel/iwlwifi/mvm/nvm.c | 5 ++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index f161fa55d2021..77ed6ecf5ee54 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -328,8 +328,12 @@ struct ieee80211_regdomain *iwl_mvm_get_regdomain(struct wiphy *wiphy, goto out; } - if (changed) - *changed = (resp->status == MCC_RESP_NEW_CHAN_PROFILE); + if (changed) { + u32 status = le32_to_cpu(resp->status); + + *changed = (status == MCC_RESP_NEW_CHAN_PROFILE || + status == MCC_RESP_ILLEGAL); + } regd = iwl_parse_nvm_mcc_info(mvm->trans->dev, mvm->cfg, __le32_to_cpu(resp->n_channels), diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c index fb25b6f293238..ca2d66ce84247 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c @@ -732,9 +732,8 @@ iwl_mvm_update_mcc(struct iwl_mvm *mvm, const char *alpha2, } IWL_DEBUG_LAR(mvm, - "MCC response status: 0x%x. new MCC: 0x%x ('%c%c') change: %d n_chans: %d\n", - status, mcc, mcc >> 8, mcc & 0xff, - !!(status == MCC_RESP_NEW_CHAN_PROFILE), n_channels); + "MCC response status: 0x%x. new MCC: 0x%x ('%c%c') n_chans: %d\n", + status, mcc, mcc >> 8, mcc & 0xff, n_channels); exit: iwl_free_resp(&cmd); -- GitLab From bf02eaf57a328b7213520396f4920902286e8cd8 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Wed, 17 Oct 2018 08:35:15 +0300 Subject: [PATCH 1711/2269] iwlwifi: mvm: don't use SAR Geo if basic SAR is not used commit 5d041c46ccb9b48acc110e214beff5e2789311df upstream. We can't use SAR Geo if basic SAR is not enabled, since the SAR Geo tables define offsets in relation to the basic SAR table in use. To fix this, make iwl_mvm_sar_init() return one in case WRDS is not available, so we can skip reading WGDS entirely. Fixes: a6bff3cb19b7 ("iwlwifi: mvm: add GEO_TX_POWER_LIMIT cmd for geographic tx power table") Cc: stable@vger.kernel.org # 4.12+ Signed-off-by: Luca Coelho Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 36 ++++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index b71a9d11a50f1..11319675f2d9e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -997,6 +997,11 @@ static int iwl_mvm_sar_get_ewrd_table(struct iwl_mvm *mvm) return -ENOENT; } +static int iwl_mvm_sar_get_wgds_table(struct iwl_mvm *mvm) +{ + return -ENOENT; +} + static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm) { return 0; @@ -1023,8 +1028,11 @@ static int iwl_mvm_sar_init(struct iwl_mvm *mvm) IWL_DEBUG_RADIO(mvm, "WRDS SAR BIOS table invalid or unavailable. (%d)\n", ret); - /* if not available, don't fail and don't bother with EWRD */ - return 0; + /* + * If not available, don't fail and don't bother with EWRD. + * Return 1 to tell that we can't use WGDS either. + */ + return 1; } ret = iwl_mvm_sar_get_ewrd_table(mvm); @@ -1037,9 +1045,13 @@ static int iwl_mvm_sar_init(struct iwl_mvm *mvm) /* choose profile 1 (WRDS) as default for both chains */ ret = iwl_mvm_sar_select_profile(mvm, 1, 1); - /* if we don't have profile 0 from BIOS, just skip it */ + /* + * If we don't have profile 0 from BIOS, just skip it. This + * means that SAR Geo will not be enabled either, even if we + * have other valid profiles. + */ if (ret == -ENOENT) - return 0; + return 1; return ret; } @@ -1229,11 +1241,19 @@ int iwl_mvm_up(struct iwl_mvm *mvm) iwl_mvm_unref(mvm, IWL_MVM_REF_UCODE_DOWN); ret = iwl_mvm_sar_init(mvm); - if (ret) - goto error; + if (ret == 0) { + ret = iwl_mvm_sar_geo_init(mvm); + } else if (ret > 0 && !iwl_mvm_sar_get_wgds_table(mvm)) { + /* + * If basic SAR is not available, we check for WGDS, + * which should *not* be available either. If it is + * available, issue an error, because we can't use SAR + * Geo without basic SAR. + */ + IWL_ERR(mvm, "BIOS contains WGDS but no WRDS\n"); + } - ret = iwl_mvm_sar_geo_init(mvm); - if (ret) + if (ret < 0) goto error; iwl_mvm_leds_sync(mvm); -- GitLab From a0b9ddf4906e6340c4ce920c9450de05021e73ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 8 Nov 2018 16:08:29 +0100 Subject: [PATCH 1712/2269] brcmfmac: fix reporting support for 160 MHz channels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d1fe6ad6f6bd61c84788d3a7b11e459a439c6169 upstream. Driver can report IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ so it's important to provide valid & complete info about supported bands for each channel. By default no support for 160 MHz should be assumed unless firmware reports it for a given channel later. This fixes info passed to the userspace. Without that change userspace could try to use invalid channel and fail to start an interface. Signed-off-by: Rafał Miłecki Cc: stable@vger.kernel.org Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 083e5ce7eac73..cd6c5ece9a5d3 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -6098,7 +6098,8 @@ static int brcmf_construct_chaninfo(struct brcmf_cfg80211_info *cfg, * for subsequent chanspecs. */ channel->flags = IEEE80211_CHAN_NO_HT40 | - IEEE80211_CHAN_NO_80MHZ; + IEEE80211_CHAN_NO_80MHZ | + IEEE80211_CHAN_NO_160MHZ; ch.bw = BRCMU_CHAN_BW_20; cfg->d11inf.encchspec(&ch); chaninfo = ch.chspec; -- GitLab From 3b99dcd4026a08fc9d1fd5cc0a9fc50529803fca Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 16 Oct 2018 11:56:26 +0300 Subject: [PATCH 1713/2269] tools/power/cpupower: fix compilation with STATIC=true commit 9de9aa45e9bd67232e000cca42ceb134b8ae51b6 upstream. Rename duplicate sysfs_read_file into cpupower_read_sysfs and fix linking. Signed-off-by: Konstantin Khlebnikov Acked-by: Thomas Renninger Cc: Signed-off-by: Shuah Khan (Samsung OSG) Signed-off-by: Greg Kroah-Hartman --- tools/power/cpupower/bench/Makefile | 2 +- tools/power/cpupower/lib/cpufreq.c | 2 +- tools/power/cpupower/lib/cpuidle.c | 2 +- tools/power/cpupower/lib/cpupower.c | 4 ++-- tools/power/cpupower/lib/cpupower_intern.h | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/power/cpupower/bench/Makefile b/tools/power/cpupower/bench/Makefile index d79ab161cc75f..f68b4bc552739 100644 --- a/tools/power/cpupower/bench/Makefile +++ b/tools/power/cpupower/bench/Makefile @@ -9,7 +9,7 @@ endif ifeq ($(strip $(STATIC)),true) LIBS = -L../ -L$(OUTPUT) -lm OBJS = $(OUTPUT)main.o $(OUTPUT)parse.o $(OUTPUT)system.o $(OUTPUT)benchmark.o \ - $(OUTPUT)../lib/cpufreq.o $(OUTPUT)../lib/sysfs.o + $(OUTPUT)../lib/cpufreq.o $(OUTPUT)../lib/cpupower.o else LIBS = -L../ -L$(OUTPUT) -lm -lcpupower OBJS = $(OUTPUT)main.o $(OUTPUT)parse.o $(OUTPUT)system.o $(OUTPUT)benchmark.o diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c index 1b993fe1ce237..0c0f3e3f0d803 100644 --- a/tools/power/cpupower/lib/cpufreq.c +++ b/tools/power/cpupower/lib/cpufreq.c @@ -28,7 +28,7 @@ static unsigned int sysfs_cpufreq_read_file(unsigned int cpu, const char *fname, snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpufreq/%s", cpu, fname); - return sysfs_read_file(path, buf, buflen); + return cpupower_read_sysfs(path, buf, buflen); } /* helper function to write a new value to a /sys file */ diff --git a/tools/power/cpupower/lib/cpuidle.c b/tools/power/cpupower/lib/cpuidle.c index 9bd4c7655fdb2..852d25462388c 100644 --- a/tools/power/cpupower/lib/cpuidle.c +++ b/tools/power/cpupower/lib/cpuidle.c @@ -319,7 +319,7 @@ static unsigned int sysfs_cpuidle_read_file(const char *fname, char *buf, snprintf(path, sizeof(path), PATH_TO_CPU "cpuidle/%s", fname); - return sysfs_read_file(path, buf, buflen); + return cpupower_read_sysfs(path, buf, buflen); } diff --git a/tools/power/cpupower/lib/cpupower.c b/tools/power/cpupower/lib/cpupower.c index 9c395ec924def..9711d628b0f44 100644 --- a/tools/power/cpupower/lib/cpupower.c +++ b/tools/power/cpupower/lib/cpupower.c @@ -15,7 +15,7 @@ #include "cpupower.h" #include "cpupower_intern.h" -unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen) +unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen) { int fd; ssize_t numread; @@ -95,7 +95,7 @@ static int sysfs_topology_read_file(unsigned int cpu, const char *fname, int *re snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/topology/%s", cpu, fname); - if (sysfs_read_file(path, linebuf, MAX_LINE_LEN) == 0) + if (cpupower_read_sysfs(path, linebuf, MAX_LINE_LEN) == 0) return -1; *result = strtol(linebuf, &endp, 0); if (endp == linebuf || errno == ERANGE) diff --git a/tools/power/cpupower/lib/cpupower_intern.h b/tools/power/cpupower/lib/cpupower_intern.h index 92affdfbe4174..4887c76d23f86 100644 --- a/tools/power/cpupower/lib/cpupower_intern.h +++ b/tools/power/cpupower/lib/cpupower_intern.h @@ -3,4 +3,4 @@ #define MAX_LINE_LEN 4096 #define SYSFS_PATH_MAX 255 -unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen); +unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen); -- GitLab From 335f31342699f9015f0ab5a8d41d6c911085f033 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Mon, 27 Aug 2018 15:12:05 +0900 Subject: [PATCH 1714/2269] v9fs_dir_readdir: fix double-free on p9stat_read error commit 81c99089bce693b94b775b6eb888115d2d540086 upstream. p9stat_read will call p9stat_free on error, we should only free the struct content on success. There also is no need to "p9stat_init" st as the read function will zero the whole struct for us anyway, so clean up the code a bit while we are here. Link: http://lkml.kernel.org/r/1535410108-20650-1-git-send-email-asmadeus@codewreck.org Signed-off-by: Dominique Martinet Reported-by: syzbot+d4252148d198410b864f@syzkaller.appspotmail.com Signed-off-by: Greg Kroah-Hartman --- fs/9p/vfs_dir.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index b0405d6aac854..48db9a9f13f9e 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -76,15 +76,6 @@ static inline int dt_type(struct p9_wstat *mistat) return rettype; } -static void p9stat_init(struct p9_wstat *stbuf) -{ - stbuf->name = NULL; - stbuf->uid = NULL; - stbuf->gid = NULL; - stbuf->muid = NULL; - stbuf->extension = NULL; -} - /** * v9fs_alloc_rdir_buf - Allocate buffer used for read and readdir * @filp: opened file structure @@ -145,12 +136,10 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) rdir->tail = n; } while (rdir->head < rdir->tail) { - p9stat_init(&st); err = p9stat_read(fid->clnt, rdir->buf + rdir->head, rdir->tail - rdir->head, &st); if (err) { p9_debug(P9_DEBUG_VFS, "returned %d\n", err); - p9stat_free(&st); return -EIO; } reclen = st.size+2; -- GitLab From 9520db16756e24873677d867a6a0f9cab3b6726d Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 8 Sep 2018 01:42:58 +0900 Subject: [PATCH 1715/2269] selinux: Add __GFP_NOWARN to allocation at str_read() commit 4458bba09788e70e8fb39ad003f087cd9dfbd6ac upstream. syzbot is hitting warning at str_read() [1] because len parameter can become larger than KMALLOC_MAX_SIZE. We don't need to emit warning for this case. [1] https://syzkaller.appspot.com/bug?id=7f2f5aad79ea8663c296a2eedb81978401a908f0 Signed-off-by: Tetsuo Handa Reported-by: syzbot Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/selinux/ss/policydb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index 6e8c8056d7adf..6688ac5b991ee 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -1099,7 +1099,7 @@ static int str_read(char **strp, gfp_t flags, void *fp, u32 len) if ((len == 0) || (len == (u32)-1)) return -EINVAL; - str = kmalloc(len + 1, flags); + str = kmalloc(len + 1, flags | __GFP_NOWARN); if (!str) return -ENOMEM; -- GitLab From 922fad66a961a34d970f9d0d0e961813489c556c Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 16 Oct 2018 17:07:35 -0700 Subject: [PATCH 1716/2269] Input: synaptics - avoid using uninitialized variable when probing commit f39f8688888ae74fa8deae2d01289b69b4727394 upstream. synaptics_detect() does not check whether sending commands to the device succeeds and instead relies on getting unique data from the device. Let's make sure we seed entire buffer with zeroes to make sure we will not use garbage on stack that just happen to be 0x47. Reported-by: syzbot+13cb3b01d0784e4ffc3f@syzkaller.appspotmail.com Reviewed-by: Benjamin Tissoires Reviewed-by: Peter Hutterer Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/synaptics.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 6c4bbd38700e2..6f36e2d01e2e9 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -99,9 +99,7 @@ static int synaptics_mode_cmd(struct psmouse *psmouse, u8 mode) int synaptics_detect(struct psmouse *psmouse, bool set_properties) { struct ps2dev *ps2dev = &psmouse->ps2dev; - u8 param[4]; - - param[0] = 0; + u8 param[4] = { 0 }; ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES); ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES); -- GitLab From bc94cd6d869ea0ac78936b14321d37bdea657ba4 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 2 Nov 2018 15:48:42 -0700 Subject: [PATCH 1717/2269] bfs: add sanity check at bfs_fill_super() commit 9f2df09a33aa2c76ce6385d382693f98d7f2f07e upstream. syzbot is reporting too large memory allocation at bfs_fill_super() [1]. Since file system image is corrupted such that bfs_sb->s_start == 0, bfs_fill_super() is trying to allocate 8MB of continuous memory. Fix this by adding a sanity check on bfs_sb->s_start, __GFP_NOWARN and printf(). [1] https://syzkaller.appspot.com/bug?id=16a87c236b951351374a84c8a32f40edbc034e96 Link: http://lkml.kernel.org/r/1525862104-3407-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Reported-by: syzbot Reviewed-by: Andrew Morton Cc: Tigran Aivazian Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/bfs/inode.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 9a69392f1fb37..d81c148682e71 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -350,7 +350,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) s->s_magic = BFS_MAGIC; - if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end)) { + if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end) || + le32_to_cpu(bfs_sb->s_start) < BFS_BSIZE) { printf("Superblock is corrupted\n"); goto out1; } @@ -359,9 +360,11 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) sizeof(struct bfs_inode) + BFS_ROOT_INO - 1; imap_len = (info->si_lasti / 8) + 1; - info->si_imap = kzalloc(imap_len, GFP_KERNEL); - if (!info->si_imap) + info->si_imap = kzalloc(imap_len, GFP_KERNEL | __GFP_NOWARN); + if (!info->si_imap) { + printf("Cannot allocate %u bytes\n", imap_len); goto out1; + } for (i = 0; i < BFS_ROOT_INO; i++) set_bit(i, info->si_imap); -- GitLab From 8376fdc999be008f0e9918db52f1ed8c08f5a1c9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 29 Oct 2018 23:10:29 +0800 Subject: [PATCH 1718/2269] sctp: clear the transport of some out_chunk_list chunks in sctp_assoc_rm_peer commit df132eff463873e14e019a07f387b4d577d6d1f9 upstream. If a transport is removed by asconf but there still are some chunks with this transport queuing on out_chunk_list, later an use-after-free issue will be caused when accessing this transport from these chunks in sctp_outq_flush(). This is an old bug, we fix it by clearing the transport of these chunks in out_chunk_list when removing a transport in sctp_assoc_rm_peer(). Reported-by: syzbot+56a40ceee5fb35932f4d@syzkaller.appspotmail.com Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/associola.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 58f7d8cfd748c..4982b31fec8ef 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -497,8 +497,9 @@ void sctp_assoc_set_primary(struct sctp_association *asoc, void sctp_assoc_rm_peer(struct sctp_association *asoc, struct sctp_transport *peer) { - struct list_head *pos; - struct sctp_transport *transport; + struct sctp_transport *transport; + struct list_head *pos; + struct sctp_chunk *ch; pr_debug("%s: association:%p addr:%pISpc\n", __func__, asoc, &peer->ipaddr.sa); @@ -562,7 +563,6 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc, */ if (!list_empty(&peer->transmitted)) { struct sctp_transport *active = asoc->peer.active_path; - struct sctp_chunk *ch; /* Reset the transport of each chunk on this list */ list_for_each_entry(ch, &peer->transmitted, @@ -584,6 +584,10 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc, sctp_transport_hold(active); } + list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) + if (ch->transport == peer) + ch->transport = NULL; + asoc->peer.transport_count--; sctp_transport_free(peer); -- GitLab From 49ee6220be1b9c2dfd00a4a7adc55c2d7ffc7faf Mon Sep 17 00:00:00 2001 From: Andrew Price Date: Mon, 8 Oct 2018 07:52:43 -0500 Subject: [PATCH 1719/2269] gfs2: Don't leave s_fs_info pointing to freed memory in init_sbd commit 4c62bd9cea7bcf10292f7e4c57a2bca332942697 upstream. When alloc_percpu() fails, sdp gets freed but sb->s_fs_info still points to the same address. Move the assignment after that error check so that s_fs_info can only point to a valid sdp or NULL, which is checked for later in the error path, in gfs2_kill_super(). Reported-by: syzbot+dcb8b3587445007f5808@syzkaller.appspotmail.com Signed-off-by: Andrew Price Signed-off-by: Bob Peterson Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/ops_fstype.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 28d6c65c8bb38..057be88eb1b42 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -72,13 +72,13 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) if (!sdp) return NULL; - sb->s_fs_info = sdp; sdp->sd_vfs = sb; sdp->sd_lkstats = alloc_percpu(struct gfs2_pcpu_lkstats); if (!sdp->sd_lkstats) { kfree(sdp); return NULL; } + sb->s_fs_info = sdp; set_bit(SDF_NOJOURNALID, &sdp->sd_flags); gfs2_tune_init(&sdp->sd_tune); -- GitLab From 8a37895d1e35884c160fbdd2bb90aeab8daafd87 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Oct 2018 09:24:27 -0700 Subject: [PATCH 1720/2269] llc: do not use sk_eat_skb() commit 604d415e2bd642b7e02c80e719e0396b9d4a77a6 upstream. syzkaller triggered a use-after-free [1], caused by a combination of skb_get() in llc_conn_state_process() and usage of sk_eat_skb() sk_eat_skb() is assuming the skb about to be freed is only used by the current thread. TCP/DCCP stacks enforce this because current thread holds the socket lock. llc_conn_state_process() wants to make sure skb does not disappear, and holds a reference on the skb it manipulates. But as soon as this skb is added to socket receive queue, another thread can consume it. This means that llc must use regular skb_unlink() and kfree_skb() so that both producer and consumer can safely work on the same skb. [1] BUG: KASAN: use-after-free in atomic_read include/asm-generic/atomic-instrumented.h:21 [inline] BUG: KASAN: use-after-free in refcount_read include/linux/refcount.h:43 [inline] BUG: KASAN: use-after-free in skb_unref include/linux/skbuff.h:967 [inline] BUG: KASAN: use-after-free in kfree_skb+0xb7/0x580 net/core/skbuff.c:655 Read of size 4 at addr ffff8801d1f6fba4 by task ksoftirqd/1/18 CPU: 1 PID: 18 Comm: ksoftirqd/1 Not tainted 4.19.0-rc8+ #295 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c4/0x2b6 lib/dump_stack.c:113 print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412 check_memory_region_inline mm/kasan/kasan.c:260 [inline] check_memory_region+0x13e/0x1b0 mm/kasan/kasan.c:267 kasan_check_read+0x11/0x20 mm/kasan/kasan.c:272 atomic_read include/asm-generic/atomic-instrumented.h:21 [inline] refcount_read include/linux/refcount.h:43 [inline] skb_unref include/linux/skbuff.h:967 [inline] kfree_skb+0xb7/0x580 net/core/skbuff.c:655 llc_sap_state_process+0x9b/0x550 net/llc/llc_sap.c:224 llc_sap_rcv+0x156/0x1f0 net/llc/llc_sap.c:297 llc_sap_handler+0x65e/0xf80 net/llc/llc_sap.c:438 llc_rcv+0x79e/0xe20 net/llc/llc_input.c:208 __netif_receive_skb_one_core+0x14d/0x200 net/core/dev.c:4913 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:5023 process_backlog+0x218/0x6f0 net/core/dev.c:5829 napi_poll net/core/dev.c:6249 [inline] net_rx_action+0x7c5/0x1950 net/core/dev.c:6315 __do_softirq+0x30c/0xb03 kernel/softirq.c:292 run_ksoftirqd+0x94/0x100 kernel/softirq.c:653 smpboot_thread_fn+0x68b/0xa00 kernel/smpboot.c:164 kthread+0x35a/0x420 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:413 Allocated by task 18: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:490 kmem_cache_alloc_node+0x144/0x730 mm/slab.c:3644 __alloc_skb+0x119/0x770 net/core/skbuff.c:193 alloc_skb include/linux/skbuff.h:995 [inline] llc_alloc_frame+0xbc/0x370 net/llc/llc_sap.c:54 llc_station_ac_send_xid_r net/llc/llc_station.c:52 [inline] llc_station_rcv+0x1dc/0x1420 net/llc/llc_station.c:111 llc_rcv+0xc32/0xe20 net/llc/llc_input.c:220 __netif_receive_skb_one_core+0x14d/0x200 net/core/dev.c:4913 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:5023 process_backlog+0x218/0x6f0 net/core/dev.c:5829 napi_poll net/core/dev.c:6249 [inline] net_rx_action+0x7c5/0x1950 net/core/dev.c:6315 __do_softirq+0x30c/0xb03 kernel/softirq.c:292 Freed by task 16383: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 __cache_free mm/slab.c:3498 [inline] kmem_cache_free+0x83/0x290 mm/slab.c:3756 kfree_skbmem+0x154/0x230 net/core/skbuff.c:582 __kfree_skb+0x1d/0x20 net/core/skbuff.c:642 sk_eat_skb include/net/sock.h:2366 [inline] llc_ui_recvmsg+0xec2/0x1610 net/llc/af_llc.c:882 sock_recvmsg_nosec net/socket.c:794 [inline] sock_recvmsg+0xd0/0x110 net/socket.c:801 ___sys_recvmsg+0x2b6/0x680 net/socket.c:2278 __sys_recvmmsg+0x303/0xb90 net/socket.c:2390 do_sys_recvmmsg+0x181/0x1a0 net/socket.c:2466 __do_sys_recvmmsg net/socket.c:2484 [inline] __se_sys_recvmmsg net/socket.c:2480 [inline] __x64_sys_recvmmsg+0xbe/0x150 net/socket.c:2480 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff8801d1f6fac0 which belongs to the cache skbuff_head_cache of size 232 The buggy address is located 228 bytes inside of 232-byte region [ffff8801d1f6fac0, ffff8801d1f6fba8) The buggy address belongs to the page: page:ffffea000747dbc0 count:1 mapcount:0 mapping:ffff8801d9be7680 index:0xffff8801d1f6fe80 flags: 0x2fffc0000000100(slab) raw: 02fffc0000000100 ffffea0007346e88 ffffea000705b108 ffff8801d9be7680 raw: ffff8801d1f6fe80 ffff8801d1f6f0c0 000000010000000b 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8801d1f6fa80: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb ffff8801d1f6fb00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff8801d1f6fb80: fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc ^ ffff8801d1f6fc00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8801d1f6fc80: fb fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/llc/af_llc.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index b49f5afab405f..2e472d5c3ea41 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -730,7 +730,6 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, struct sk_buff *skb = NULL; struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); - unsigned long cpu_flags; size_t copied = 0; u32 peek_seq = 0; u32 *seq, skb_len; @@ -855,9 +854,8 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, goto copy_uaddr; if (!(flags & MSG_PEEK)) { - spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags); - sk_eat_skb(sk, skb); - spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags); + skb_unlink(skb, &sk->sk_receive_queue); + kfree_skb(skb); *seq = 0; } @@ -878,9 +876,8 @@ copy_uaddr: llc_cmsg_rcv(msg, skb); if (!(flags & MSG_PEEK)) { - spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags); - sk_eat_skb(sk, skb); - spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags); + skb_unlink(skb, &sk->sk_receive_queue); + kfree_skb(skb); *seq = 0; } -- GitLab From 97764043885cd2aa6017891948a6705e4f1086e8 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Fri, 26 Oct 2018 15:03:12 -0700 Subject: [PATCH 1721/2269] mm: don't warn about large allocations for slab commit 61448479a9f2c954cde0cfe778cb6bec5d0a748d upstream. Slub does not call kmalloc_slab() for sizes > KMALLOC_MAX_CACHE_SIZE, instead it falls back to kmalloc_large(). For slab KMALLOC_MAX_CACHE_SIZE == KMALLOC_MAX_SIZE and it calls kmalloc_slab() for all allocations relying on NULL return value for over-sized allocations. This inconsistency leads to unwanted warnings from kmalloc_slab() for over-sized allocations for slab. Returning NULL for failed allocations is the expected behavior. Make slub and slab code consistent by checking size > KMALLOC_MAX_CACHE_SIZE in slab before calling kmalloc_slab(). While we are here also fix the check in kmalloc_slab(). We should check against KMALLOC_MAX_CACHE_SIZE rather than KMALLOC_MAX_SIZE. It all kinda worked because for slab the constants are the same, and slub always checks the size against KMALLOC_MAX_CACHE_SIZE before kmalloc_slab(). But if we get there with size > KMALLOC_MAX_CACHE_SIZE anyhow bad things will happen. For example, in case of a newly introduced bug in slub code. Also move the check in kmalloc_slab() from function entry to the size > 192 case. This partially compensates for the additional check in slab code and makes slub code a bit faster (at least theoretically). Also drop __GFP_NOWARN in the warning check. This warning means a bug in slab code itself, user-passed flags have nothing to do with it. Nothing of this affects slob. Link: http://lkml.kernel.org/r/20180927171502.226522-1-dvyukov@gmail.com Signed-off-by: Dmitry Vyukov Reported-by: syzbot+87829a10073277282ad1@syzkaller.appspotmail.com Reported-by: syzbot+ef4e8fc3a06e9019bb40@syzkaller.appspotmail.com Reported-by: syzbot+6e438f4036df52cbb863@syzkaller.appspotmail.com Reported-by: syzbot+8574471d8734457d98aa@syzkaller.appspotmail.com Reported-by: syzbot+af1504df0807a083dbd9@syzkaller.appspotmail.com Acked-by: Christoph Lameter Acked-by: Vlastimil Babka Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/slab.c | 4 ++++ mm/slab_common.c | 12 ++++++------ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 198c1e2c53585..68ab88e2920e2 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3670,6 +3670,8 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller) struct kmem_cache *cachep; void *ret; + if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) + return NULL; cachep = kmalloc_slab(size, flags); if (unlikely(ZERO_OR_NULL_PTR(cachep))) return cachep; @@ -3705,6 +3707,8 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, struct kmem_cache *cachep; void *ret; + if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) + return NULL; cachep = kmalloc_slab(size, flags); if (unlikely(ZERO_OR_NULL_PTR(cachep))) return cachep; diff --git a/mm/slab_common.c b/mm/slab_common.c index 91d271b90600c..f6764cf162b8c 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -971,18 +971,18 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) { int index; - if (unlikely(size > KMALLOC_MAX_SIZE)) { - WARN_ON_ONCE(!(flags & __GFP_NOWARN)); - return NULL; - } - if (size <= 192) { if (!size) return ZERO_SIZE_PTR; index = size_index[size_index_elem(size)]; - } else + } else { + if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) { + WARN_ON(1); + return NULL; + } index = fls(size - 1); + } #ifdef CONFIG_ZONE_DMA if (unlikely((flags & GFP_DMA))) -- GitLab From 6b43a9978a6b000cf05d19f8765eba0434817f94 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 26 Oct 2018 15:09:01 -0700 Subject: [PATCH 1722/2269] mm/memory.c: recheck page table entry with page table lock held commit ff09d7ec9786be4ad7589aa987d7dc66e2dd9160 upstream. We clear the pte temporarily during read/modify/write update of the pte. If we take a page fault while the pte is cleared, the application can get SIGBUS. One such case is with remap_pfn_range without a backing vm_ops->fault callback. do_fault will return SIGBUS in that case. cpu 0 cpu1 mprotect() ptep_modify_prot_start()/pte cleared. . . page fault. . . prep_modify_prot_commit() Fix this by taking page table lock and rechecking for pte_none. [aneesh.kumar@linux.ibm.com: fix crash observed with syzkaller run] Link: http://lkml.kernel.org/r/87va6bwlfg.fsf@linux.ibm.com Link: http://lkml.kernel.org/r/20180926031858.9692-1-aneesh.kumar@linux.ibm.com Signed-off-by: Aneesh Kumar K.V Acked-by: Kirill A. Shutemov Cc: Willem de Bruijn Cc: Eric Dumazet Cc: Ido Schimmel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 93d5d324904b3..b6cfe0cf0ead3 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3697,10 +3697,36 @@ static int do_fault(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; int ret; - /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ - if (!vma->vm_ops->fault) - ret = VM_FAULT_SIGBUS; - else if (!(vmf->flags & FAULT_FLAG_WRITE)) + /* + * The VMA was not fully populated on mmap() or missing VM_DONTEXPAND + */ + if (!vma->vm_ops->fault) { + /* + * If we find a migration pmd entry or a none pmd entry, which + * should never happen, return SIGBUS + */ + if (unlikely(!pmd_present(*vmf->pmd))) + ret = VM_FAULT_SIGBUS; + else { + vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, + vmf->pmd, + vmf->address, + &vmf->ptl); + /* + * Make sure this is not a temporary clearing of pte + * by holding ptl and checking again. A R/M/W update + * of pte involves: take ptl, clearing the pte so that + * we don't have concurrent modification by hardware + * followed by an update. + */ + if (unlikely(pte_none(*vmf->pte))) + ret = VM_FAULT_SIGBUS; + else + ret = VM_FAULT_NOPAGE; + + pte_unmap_unlock(vmf->pte, vmf->ptl); + } + } else if (!(vmf->flags & FAULT_FLAG_WRITE)) ret = do_read_fault(vmf); else if (!(vma->vm_flags & VM_SHARED)) ret = do_cow_fault(vmf); -- GitLab From e6ddc2c3d89c8ed3266d83254a0c11798f33502e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 1 Oct 2018 23:24:26 -0700 Subject: [PATCH 1723/2269] tcp: do not release socket ownership in tcp_close() commit 8873c064d1de579ea23412a6d3eee972593f142b upstream. syzkaller was able to hit the WARN_ON(sock_owned_by_user(sk)); in tcp_close() While a socket is being closed, it is very possible other threads find it in rtnetlink dump. tcp_get_info() will acquire the socket lock for a short amount of time (slow = lock_sock_fast(sk)/unlock_sock_fast(sk, slow);), enough to trigger the warning. Fixes: 67db3e4bfbc9 ("tcp: no longer hold ehash lock while calling tcp_get_info()") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sock.h | 1 + net/core/sock.c | 2 +- net/ipv4/tcp.c | 11 +++-------- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 9bd5d68076d9f..64a330544dad5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1452,6 +1452,7 @@ static inline void lock_sock(struct sock *sk) lock_sock_nested(sk, 0); } +void __release_sock(struct sock *sk); void release_sock(struct sock *sk); /* BH context may only use the following locking interface. */ diff --git a/net/core/sock.c b/net/core/sock.c index 68d08ed5521e5..36f19458e2fef 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2242,7 +2242,7 @@ static void __lock_sock(struct sock *sk) finish_wait(&sk->sk_lock.wq, &wait); } -static void __release_sock(struct sock *sk) +void __release_sock(struct sock *sk) __releases(&sk->sk_lock.slock) __acquires(&sk->sk_lock.slock) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f9c985460faa3..8109985e78a1d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2217,16 +2217,10 @@ adjudge_to_death: sock_hold(sk); sock_orphan(sk); - /* It is the last release_sock in its life. It will remove backlog. */ - release_sock(sk); - - - /* Now socket is owned by kernel and we acquire BH lock - * to finish close. No need to check for user refs. - */ local_bh_disable(); bh_lock_sock(sk); - WARN_ON(sock_owned_by_user(sk)); + /* remove backlog if any, without releasing ownership. */ + __release_sock(sk); percpu_counter_inc(sk->sk_prot->orphan_count); @@ -2295,6 +2289,7 @@ adjudge_to_death: out: bh_unlock_sock(sk); local_bh_enable(); + release_sock(sk); sock_put(sk); } EXPORT_SYMBOL(tcp_close); -- GitLab From c271b660b798998c926d2d62b47d0cd4d97d8d2e Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 9 Jan 2018 15:24:50 +0200 Subject: [PATCH 1724/2269] IB/core: Perform modify QP on real one commit b2bedfb39541a7e14798d066b6f8685d84c8fcf5 upstream. Currently qp->port stores the port number whenever IB_QP_PORT QP attribute mask is set (during QP state transition to INIT state). This port number should be stored for the real QP when XRC target QP is used. Follow the ib_modify_qp() implementation and hide the access to ->real_qp. Fixes: a512c2fbef9c ("IB/core: Introduce modify QP operation with udata") Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/verbs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index feb80dbb59487..6d59af07d3387 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1285,7 +1285,7 @@ EXPORT_SYMBOL(ib_resolve_eth_dmac); /** * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. - * @qp: The QP to modify. + * @ib_qp: The QP to modify. * @attr: On input, specifies the QP attributes to modify. On output, * the current values of selected QP attributes are returned. * @attr_mask: A bit-mask used to specify which attributes of the QP @@ -1294,9 +1294,10 @@ EXPORT_SYMBOL(ib_resolve_eth_dmac); * are being modified. * It returns 0 on success and returns appropriate error code on error. */ -int ib_modify_qp_with_udata(struct ib_qp *qp, struct ib_qp_attr *attr, +int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { + struct ib_qp *qp = ib_qp->real_qp; int ret; if (attr_mask & IB_QP_AV) { -- GitLab From d84bb18ed7a29a3a571feed8ed36ee2c3e58d1bc Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 15 Nov 2018 11:38:41 +0200 Subject: [PATCH 1725/2269] usb: xhci: Prevent bus suspend if a port connect change or polling state is detected commit 2f31a67f01a8beb22cae754c53522cb61a005750 upstream. USB3 roothub might autosuspend before a plugged USB3 device is detected, causing USB3 device enumeration failure. USB3 devices don't show up as connected and enabled until USB3 link trainig completes. On a fast booting platform with a slow USB3 link training the link might reach the connected enabled state just as the bus is suspending. If this device is discovered first time by the xhci_bus_suspend() routine it will be put to U3 suspended state like the other ports which failed to suspend earlier. The hub thread will notice the connect change and resume the bus, moving the port back to U0 This U0 -> U3 -> U0 transition right after being connected seems to be too much for some devices, causing them to first go to SS.Inactive state, and finally end up stuck in a polling state with reset asserted Fix this by failing the bus suspend if a port has a connect change or is in a polling state in xhci_bus_suspend(). Don't do any port changes until all ports are checked, buffer all port changes and only write them in the end if suspend can proceed Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 60 ++++++++++++++++++++++++++++--------- 1 file changed, 46 insertions(+), 14 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 4f612b5d0f944..a2e350d28c05a 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1481,13 +1481,16 @@ int xhci_bus_suspend(struct usb_hcd *hcd) __le32 __iomem **port_array; struct xhci_bus_state *bus_state; unsigned long flags; + u32 portsc_buf[USB_MAXCHILDREN]; + bool wake_enabled; max_ports = xhci_get_ports(hcd, &port_array); bus_state = &xhci->bus_state[hcd_index(hcd)]; + wake_enabled = hcd->self.root_hub->do_remote_wakeup; spin_lock_irqsave(&xhci->lock, flags); - if (hcd->self.root_hub->do_remote_wakeup) { + if (wake_enabled) { if (bus_state->resuming_ports || /* USB2 */ bus_state->port_remote_wakeup) { /* USB3 */ spin_unlock_irqrestore(&xhci->lock, flags); @@ -1495,26 +1498,36 @@ int xhci_bus_suspend(struct usb_hcd *hcd) return -EBUSY; } } - - port_index = max_ports; + /* + * Prepare ports for suspend, but don't write anything before all ports + * are checked and we know bus suspend can proceed + */ bus_state->bus_suspended = 0; + port_index = max_ports; while (port_index--) { - /* suspend the port if the port is not suspended */ u32 t1, t2; - int slot_id; t1 = readl(port_array[port_index]); t2 = xhci_port_state_to_neutral(t1); + portsc_buf[port_index] = 0; - if ((t1 & PORT_PE) && !(t1 & PORT_PLS_MASK)) { - xhci_dbg(xhci, "port %d not suspended\n", port_index); - slot_id = xhci_find_slot_id_by_port(hcd, xhci, - port_index + 1); - if (slot_id) { + /* Bail out if a USB3 port has a new device in link training */ + if ((t1 & PORT_PLS_MASK) == XDEV_POLLING) { + bus_state->bus_suspended = 0; + spin_unlock_irqrestore(&xhci->lock, flags); + xhci_dbg(xhci, "Bus suspend bailout, port in polling\n"); + return -EBUSY; + } + + /* suspend ports in U0, or bail out for new connect changes */ + if ((t1 & PORT_PE) && (t1 & PORT_PLS_MASK) == XDEV_U0) { + if ((t1 & PORT_CSC) && wake_enabled) { + bus_state->bus_suspended = 0; spin_unlock_irqrestore(&xhci->lock, flags); - xhci_stop_device(xhci, slot_id, 1); - spin_lock_irqsave(&xhci->lock, flags); + xhci_dbg(xhci, "Bus suspend bailout, port connect change\n"); + return -EBUSY; } + xhci_dbg(xhci, "port %d not suspended\n", port_index); t2 &= ~PORT_PLS_MASK; t2 |= PORT_LINK_STROBE | XDEV_U3; set_bit(port_index, &bus_state->bus_suspended); @@ -1523,7 +1536,7 @@ int xhci_bus_suspend(struct usb_hcd *hcd) * including the USB 3.0 roothub, but only if CONFIG_PM * is enabled, so also enable remote wake here. */ - if (hcd->self.root_hub->do_remote_wakeup) { + if (wake_enabled) { if (t1 & PORT_CONNECT) { t2 |= PORT_WKOC_E | PORT_WKDISC_E; t2 &= ~PORT_WKCONN_E; @@ -1543,7 +1556,26 @@ int xhci_bus_suspend(struct usb_hcd *hcd) t1 = xhci_port_state_to_neutral(t1); if (t1 != t2) - writel(t2, port_array[port_index]); + portsc_buf[port_index] = t2; + } + + /* write port settings, stopping and suspending ports if needed */ + port_index = max_ports; + while (port_index--) { + if (!portsc_buf[port_index]) + continue; + if (test_bit(port_index, &bus_state->bus_suspended)) { + int slot_id; + + slot_id = xhci_find_slot_id_by_port(hcd, xhci, + port_index + 1); + if (slot_id) { + spin_unlock_irqrestore(&xhci->lock, flags); + xhci_stop_device(xhci, slot_id, 1); + spin_lock_irqsave(&xhci->lock, flags); + } + } + writel(portsc_buf[port_index], port_array[port_index]); } hcd->state = HC_STATE_SUSPENDED; bus_state->next_statechange = jiffies + msecs_to_jiffies(10); -- GitLab From e51b0b1c1f015c72315a2706e043cd9665af73cc Mon Sep 17 00:00:00 2001 From: "Y.C. Chen" Date: Wed, 3 Oct 2018 14:57:47 +0800 Subject: [PATCH 1726/2269] drm/ast: change resolution may cause screen blurred commit 1a37bd823891568f8721989aed0615835632d81a upstream. The value of pitches is not correct while calling mode_set. The issue we found so far on following system: - Debian8 with XFCE Desktop - Ubuntu with KDE Desktop - SUSE15 with KDE Desktop Signed-off-by: Y.C. Chen Cc: Tested-by: Jean Delvare Reviewed-by: Jean Delvare Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ast/ast_mode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index e9f1e6fe7b943..604ff42121f8d 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -568,6 +568,7 @@ static int ast_crtc_do_set_base(struct drm_crtc *crtc, } ast_bo_unreserve(bo); + ast_set_offset_reg(crtc); ast_set_start_address_crt1(crtc, (u32)gpu_addr); return 0; -- GitLab From ffc3c0ffbb93be9b1b75f0a71ba38501039f5180 Mon Sep 17 00:00:00 2001 From: "Y.C. Chen" Date: Tue, 30 Oct 2018 11:34:46 +0800 Subject: [PATCH 1727/2269] drm/ast: fixed cursor may disappear sometimes commit 7989b9ee8bafe5cc625381dd0c3c4586de27ca26 upstream. Signed-off-by: Y.C. Chen Cc: Reviewed-by: Dave Airlie Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ast/ast_mode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index 604ff42121f8d..fae1176b24722 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -1255,7 +1255,7 @@ static int ast_cursor_move(struct drm_crtc *crtc, ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xc7, ((y >> 8) & 0x07)); /* dummy write to fire HWC */ - ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xCB, 0xFF, 0x00); + ast_show_cursor(crtc); return 0; } -- GitLab From dca24b512763930963408ea5c78a2f223a73a2a8 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 15 Nov 2018 11:42:16 +0100 Subject: [PATCH 1728/2269] drm/ast: Remove existing framebuffers before loading driver commit 5478ad10e7850ce3d8b7056db05ddfa3c9ddad9a upstream. If vesafb attaches to the AST device, it configures the framebuffer memory for uncached access by default. When ast.ko later tries to attach itself to the device, it wants to use write-combining on the framebuffer memory, but vesefb's existing configuration for uncached access takes precedence. This results in reduced performance. Removing the framebuffer's configuration before loding the AST driver fixes the problem. Other DRM drivers already contain equivalent code. Link: https://bugzilla.opensuse.org/show_bug.cgi?id=1112963 Signed-off-by: Thomas Zimmermann Cc: Tested-by: Y.C. Chen Reviewed-by: Jean Delvare Tested-by: Jean Delvare Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ast/ast_drv.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c index 69dab82a37714..bf589c53b908d 100644 --- a/drivers/gpu/drm/ast/ast_drv.c +++ b/drivers/gpu/drm/ast/ast_drv.c @@ -60,8 +60,29 @@ static const struct pci_device_id pciidlist[] = { MODULE_DEVICE_TABLE(pci, pciidlist); +static void ast_kick_out_firmware_fb(struct pci_dev *pdev) +{ + struct apertures_struct *ap; + bool primary = false; + + ap = alloc_apertures(1); + if (!ap) + return; + + ap->ranges[0].base = pci_resource_start(pdev, 0); + ap->ranges[0].size = pci_resource_len(pdev, 0); + +#ifdef CONFIG_X86 + primary = pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW; +#endif + drm_fb_helper_remove_conflicting_framebuffers(ap, "astdrmfb", primary); + kfree(ap); +} + static int ast_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { + ast_kick_out_firmware_fb(pdev); + return drm_get_pci_dev(pdev, ent, &driver); } -- GitLab From b3f1f493529e060fd3e9d4fe5b406f8379b24d71 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 31 Oct 2018 10:37:46 +0100 Subject: [PATCH 1729/2269] can: dev: can_get_echo_skb(): factor out non sending code to __can_get_echo_skb() commit a4310fa2f24687888ce80fdb0e88583561a23700 upstream. This patch factors out all non sending parts of can_get_echo_skb() into a seperate function __can_get_echo_skb(), so that it can be re-used in an upcoming patch. Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/dev.c | 36 +++++++++++++++++++++++++----------- include/linux/can/dev.h | 1 + 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index b6a681bce4000..ae08bc6e7267b 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -476,14 +476,7 @@ void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, } EXPORT_SYMBOL_GPL(can_put_echo_skb); -/* - * Get the skb from the stack and loop it back locally - * - * The function is typically called when the TX done interrupt - * is handled in the device driver. The driver must protect - * access to priv->echo_skb, if necessary. - */ -unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx) +struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr) { struct can_priv *priv = netdev_priv(dev); @@ -494,13 +487,34 @@ unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx) struct can_frame *cf = (struct can_frame *)skb->data; u8 dlc = cf->can_dlc; - netif_rx(priv->echo_skb[idx]); + *len_ptr = dlc; priv->echo_skb[idx] = NULL; - return dlc; + return skb; } - return 0; + return NULL; +} + +/* + * Get the skb from the stack and loop it back locally + * + * The function is typically called when the TX done interrupt + * is handled in the device driver. The driver must protect + * access to priv->echo_skb, if necessary. + */ +unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx) +{ + struct sk_buff *skb; + u8 len; + + skb = __can_get_echo_skb(dev, idx, &len); + if (!skb) + return 0; + + netif_rx(skb); + + return len; } EXPORT_SYMBOL_GPL(can_get_echo_skb); diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 61f1cf2d9f440..c0c0b992210e9 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -163,6 +163,7 @@ void can_change_state(struct net_device *dev, struct can_frame *cf, void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, unsigned int idx); +struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr); unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx); void can_free_echo_skb(struct net_device *dev, unsigned int idx); -- GitLab From 3f6b2bbbee20a5aa52eed1aaea65cdab98aed8c6 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 31 Oct 2018 11:08:21 +0100 Subject: [PATCH 1730/2269] can: dev: __can_get_echo_skb(): replace struct can_frame by canfd_frame to access frame length commit 200f5c49f7a2cd694436bfc6cb0662b794c96736 upstream. This patch replaces the use of "struct can_frame::can_dlc" by "struct canfd_frame::len" to access the frame's length. As it is ensured that both structures have a compatible memory layout for this member this is no functional change. Futher, this compatibility is documented in a comment. Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/dev.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index ae08bc6e7267b..7900d5a399894 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -483,11 +483,14 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 BUG_ON(idx >= priv->echo_skb_max); if (priv->echo_skb[idx]) { + /* Using "struct canfd_frame::len" for the frame + * length is supported on both CAN and CANFD frames. + */ struct sk_buff *skb = priv->echo_skb[idx]; - struct can_frame *cf = (struct can_frame *)skb->data; - u8 dlc = cf->can_dlc; + struct canfd_frame *cf = (struct canfd_frame *)skb->data; + u8 len = cf->len; - *len_ptr = dlc; + *len_ptr = len; priv->echo_skb[idx] = NULL; return skb; -- GitLab From bf991335b02d62e067e09471b15cf2216d54d5cf Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 31 Oct 2018 14:05:26 +0100 Subject: [PATCH 1731/2269] can: dev: __can_get_echo_skb(): Don't crash the kernel if can_priv::echo_skb is accessed out of bounds commit e7a6994d043a1e31d5b17706a22ce33d2a3e4cdc upstream. If the "struct can_priv::echo_skb" is accessed out of bounds would lead to a kernel crash. Better print a sensible warning message instead and try to recover. Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/dev.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 7900d5a399894..7d61d8801220e 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -480,7 +480,11 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 { struct can_priv *priv = netdev_priv(dev); - BUG_ON(idx >= priv->echo_skb_max); + if (idx >= priv->echo_skb_max) { + netdev_err(dev, "%s: BUG! Trying to access can_priv::echo_skb out of bounds (%u/max %u)\n", + __func__, idx, priv->echo_skb_max); + return NULL; + } if (priv->echo_skb[idx]) { /* Using "struct canfd_frame::len" for the frame -- GitLab From 906ed1bdf850851b3433e4f5521b406651643b27 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 31 Oct 2018 14:15:13 +0100 Subject: [PATCH 1732/2269] can: dev: __can_get_echo_skb(): print error message, if trying to echo non existing skb commit 7da11ba5c5066dadc2e96835a6233d56d7b7764a upstream. Prior to echoing a successfully transmitted CAN frame (by calling can_get_echo_skb()), CAN drivers have to put the CAN frame (by calling can_put_echo_skb() in the transmit function). These put and get function take an index as parameter, which is used to identify the CAN frame. A driver calling can_get_echo_skb() with a index not pointing to a skb is a BUG, so add an appropriate error message. Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/dev.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 7d61d8801220e..035daca631682 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -479,6 +479,8 @@ EXPORT_SYMBOL_GPL(can_put_echo_skb); struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr) { struct can_priv *priv = netdev_priv(dev); + struct sk_buff *skb = priv->echo_skb[idx]; + struct canfd_frame *cf; if (idx >= priv->echo_skb_max) { netdev_err(dev, "%s: BUG! Trying to access can_priv::echo_skb out of bounds (%u/max %u)\n", @@ -486,21 +488,20 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 return NULL; } - if (priv->echo_skb[idx]) { - /* Using "struct canfd_frame::len" for the frame - * length is supported on both CAN and CANFD frames. - */ - struct sk_buff *skb = priv->echo_skb[idx]; - struct canfd_frame *cf = (struct canfd_frame *)skb->data; - u8 len = cf->len; - - *len_ptr = len; - priv->echo_skb[idx] = NULL; - - return skb; + if (!skb) { + netdev_err(dev, "%s: BUG! Trying to echo non existing skb: can_priv::echo_skb[%u]\n", + __func__, idx); + return NULL; } - return NULL; + /* Using "struct canfd_frame::len" for the frame + * length is supported on both CAN and CANFD frames. + */ + cf = (struct canfd_frame *)skb->data; + *len_ptr = cf->len; + priv->echo_skb[idx] = NULL; + + return skb; } /* -- GitLab From bb8813be5cc7a0134e5bd8584005b189223f237c Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 18 Sep 2018 11:40:38 +0200 Subject: [PATCH 1733/2269] can: rx-offload: introduce can_rx_offload_get_echo_skb() and can_rx_offload_queue_sorted() functions commit 55059f2b7f868cd43b3ad30e28e18347e1b46ace upstream. Current CAN framework can't guarantee proper/chronological order of RX and TX-ECHO messages. To make this possible, drivers should use this functions instead of can_get_echo_skb(). Signed-off-by: Oleksij Rempel Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/rx-offload.c | 46 ++++++++++++++++++++++++++++++++++ include/linux/can/rx-offload.h | 4 +++ 2 files changed, 50 insertions(+) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index f394f77d75289..61b30bfffaccc 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -209,6 +209,52 @@ int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload) } EXPORT_SYMBOL_GPL(can_rx_offload_irq_offload_fifo); +int can_rx_offload_queue_sorted(struct can_rx_offload *offload, + struct sk_buff *skb, u32 timestamp) +{ + struct can_rx_offload_cb *cb; + unsigned long flags; + + if (skb_queue_len(&offload->skb_queue) > + offload->skb_queue_len_max) + return -ENOMEM; + + cb = can_rx_offload_get_cb(skb); + cb->timestamp = timestamp; + + spin_lock_irqsave(&offload->skb_queue.lock, flags); + __skb_queue_add_sort(&offload->skb_queue, skb, can_rx_offload_compare); + spin_unlock_irqrestore(&offload->skb_queue.lock, flags); + + can_rx_offload_schedule(offload); + + return 0; +} +EXPORT_SYMBOL_GPL(can_rx_offload_queue_sorted); + +unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, + unsigned int idx, u32 timestamp) +{ + struct net_device *dev = offload->dev; + struct net_device_stats *stats = &dev->stats; + struct sk_buff *skb; + u8 len; + int err; + + skb = __can_get_echo_skb(dev, idx, &len); + if (!skb) + return 0; + + err = can_rx_offload_queue_sorted(offload, skb, timestamp); + if (err) { + stats->rx_errors++; + stats->tx_fifo_errors++; + } + + return len; +} +EXPORT_SYMBOL_GPL(can_rx_offload_get_echo_skb); + int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb) { if (skb_queue_len(&offload->skb_queue) > diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h index cb31683bbe154..01a7c9e5d8d85 100644 --- a/include/linux/can/rx-offload.h +++ b/include/linux/can/rx-offload.h @@ -41,6 +41,10 @@ int can_rx_offload_add_timestamp(struct net_device *dev, struct can_rx_offload * int can_rx_offload_add_fifo(struct net_device *dev, struct can_rx_offload *offload, unsigned int weight); int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 reg); int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload); +int can_rx_offload_queue_sorted(struct can_rx_offload *offload, + struct sk_buff *skb, u32 timestamp); +unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, + unsigned int idx, u32 timestamp); int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb); void can_rx_offload_reset(struct can_rx_offload *offload); void can_rx_offload_del(struct can_rx_offload *offload); -- GitLab From 30b996ac6e61e9a39d6bec70ec4e39daeeeff8b6 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 18 Sep 2018 11:40:40 +0200 Subject: [PATCH 1734/2269] can: rx-offload: rename can_rx_offload_irq_queue_err_skb() to can_rx_offload_queue_tail() commit 4530ec36bb1e0d24f41c33229694adacda3d5d89 upstream. This function has nothing todo with error. Signed-off-by: Oleksij Rempel Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/flexcan.c | 4 ++-- drivers/net/can/rx-offload.c | 5 +++-- include/linux/can/rx-offload.h | 3 ++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index ed8a2a7ce500a..9ef501fd153f0 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -599,7 +599,7 @@ static void flexcan_irq_bus_err(struct net_device *dev, u32 reg_esr) if (tx_errors) dev->stats.tx_errors++; - can_rx_offload_irq_queue_err_skb(&priv->offload, skb); + can_rx_offload_queue_tail(&priv->offload, skb); } static void flexcan_irq_state(struct net_device *dev, u32 reg_esr) @@ -639,7 +639,7 @@ static void flexcan_irq_state(struct net_device *dev, u32 reg_esr) if (unlikely(new_state == CAN_STATE_BUS_OFF)) can_bus_off(dev); - can_rx_offload_irq_queue_err_skb(&priv->offload, skb); + can_rx_offload_queue_tail(&priv->offload, skb); } static inline struct flexcan_priv *rx_offload_to_priv(struct can_rx_offload *offload) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index 61b30bfffaccc..d227db45fec97 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -255,7 +255,8 @@ unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, } EXPORT_SYMBOL_GPL(can_rx_offload_get_echo_skb); -int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb) +int can_rx_offload_queue_tail(struct can_rx_offload *offload, + struct sk_buff *skb) { if (skb_queue_len(&offload->skb_queue) > offload->skb_queue_len_max) @@ -266,7 +267,7 @@ int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_b return 0; } -EXPORT_SYMBOL_GPL(can_rx_offload_irq_queue_err_skb); +EXPORT_SYMBOL_GPL(can_rx_offload_queue_tail); static int can_rx_offload_init_queue(struct net_device *dev, struct can_rx_offload *offload, unsigned int weight) { diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h index 01a7c9e5d8d85..8268811a697e2 100644 --- a/include/linux/can/rx-offload.h +++ b/include/linux/can/rx-offload.h @@ -45,7 +45,8 @@ int can_rx_offload_queue_sorted(struct can_rx_offload *offload, struct sk_buff *skb, u32 timestamp); unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, unsigned int idx, u32 timestamp); -int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb); +int can_rx_offload_queue_tail(struct can_rx_offload *offload, + struct sk_buff *skb); void can_rx_offload_reset(struct can_rx_offload *offload); void can_rx_offload_del(struct can_rx_offload *offload); void can_rx_offload_enable(struct can_rx_offload *offload); -- GitLab From 9af977aeef27001698133948fd44fa7e59a5901d Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 24 Oct 2018 10:27:12 +0200 Subject: [PATCH 1735/2269] can: raw: check for CAN FD capable netdev in raw_sendmsg() commit a43608fa77213ad5ac5f75994254b9f65d57cfa0 upstream. When the socket is CAN FD enabled it can handle CAN FD frame transmissions. Add an additional check in raw_sendmsg() as a CAN2.0 CAN driver (non CAN FD) should never see a CAN FD frame. Due to the commonly used can_dropped_invalid_skb() function the CAN 2.0 driver would drop that CAN FD frame anyway - but with this patch the user gets a proper -EINVAL return code. Signed-off-by: Oliver Hartkopp Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- net/can/raw.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/can/raw.c b/net/can/raw.c index 864c80dbdb72d..e1f26441b49ac 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -745,18 +745,19 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) } else ifindex = ro->ifindex; - if (ro->fd_frames) { + dev = dev_get_by_index(sock_net(sk), ifindex); + if (!dev) + return -ENXIO; + + err = -EINVAL; + if (ro->fd_frames && dev->mtu == CANFD_MTU) { if (unlikely(size != CANFD_MTU && size != CAN_MTU)) - return -EINVAL; + goto put_dev; } else { if (unlikely(size != CAN_MTU)) - return -EINVAL; + goto put_dev; } - dev = dev_get_by_index(sock_net(sk), ifindex); - if (!dev) - return -ENXIO; - skb = sock_alloc_send_skb(sk, size + sizeof(struct can_skb_priv), msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) -- GitLab From 7d0724cecb0ebfcf161277fc4c44616a2f9d1e0c Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 27 Oct 2018 10:36:54 +0200 Subject: [PATCH 1736/2269] can: hi311x: Use level-triggered interrupt commit f164d0204b1156a7e0d8d1622c1a8d25752befec upstream. If the hi3110 shares the SPI bus with another traffic-intensive device and packets are received in high volume (by a separate machine sending with "cangen -g 0 -i -x"), reception stops after a few minutes and the counter in /proc/interrupts stops incrementing. Bus state is "active". Bringing the interface down and back up reconvenes the reception. The issue is not observed when the hi3110 is the sole device on the SPI bus. Using a level-triggered interrupt makes the issue go away and lets the hi3110 successfully receive 2 GByte over the course of 5 days while a ks8851 Ethernet chip on the same SPI bus handles 6 GByte of traffic. Unfortunately the hi3110 datasheet is mum on the trigger type. The pin description on page 3 only specifies the polarity (active high): http://www.holtic.com/documents/371-hi-3110_v-rev-kpdf.do Cc: Mathias Duckeck Cc: Akshay Bhat Cc: Casey Fitzpatrick Signed-off-by: Lukas Wunner Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/net/can/holt_hi311x.txt | 2 +- drivers/net/can/spi/hi311x.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/can/holt_hi311x.txt b/Documentation/devicetree/bindings/net/can/holt_hi311x.txt index 23aa94eab2076..4e0ec14f7abf6 100644 --- a/Documentation/devicetree/bindings/net/can/holt_hi311x.txt +++ b/Documentation/devicetree/bindings/net/can/holt_hi311x.txt @@ -18,7 +18,7 @@ Example: reg = <1>; clocks = <&clk32m>; interrupt-parent = <&gpio4>; - interrupts = <13 IRQ_TYPE_EDGE_RISING>; + interrupts = <13 IRQ_TYPE_LEVEL_HIGH>; vdd-supply = <®5v0>; xceiver-supply = <®5v0>; }; diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c index 53e320c92a8be..ddaf46239e39e 100644 --- a/drivers/net/can/spi/hi311x.c +++ b/drivers/net/can/spi/hi311x.c @@ -760,7 +760,7 @@ static int hi3110_open(struct net_device *net) { struct hi3110_priv *priv = netdev_priv(net); struct spi_device *spi = priv->spi; - unsigned long flags = IRQF_ONESHOT | IRQF_TRIGGER_RISING; + unsigned long flags = IRQF_ONESHOT | IRQF_TRIGGER_HIGH; int ret; ret = open_candev(net); -- GitLab From 240ec6ca4b9a2f05bf1190e37c637820fcfbeecd Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 10 Sep 2018 09:39:03 -0700 Subject: [PATCH 1737/2269] IB/hfi1: Eliminate races in the SDMA send error path commit a0e0cb82804a6a21d9067022c2dfdf80d11da429 upstream. pq_update() can only be called in two places: from the completion function when the complete (npkts) sequence of packets has been submitted and processed, or from setup function if a subset of the packets were submitted (i.e. the error path). Currently both paths can call pq_update() if an error occurrs. This race will cause the n_req value to go negative, hanging file_close(), or cause a crash by freeing the txlist more than once. Several variables are used to determine SDMA send state. Most of these are unnecessary, and have code inspectible races between the setup function and the completion function, in both the send path and the error path. The request 'status' value can be set by the setup or by the completion function. This is code inspectibly racy. Since the status is not needed in the completion code or by the caller it has been removed. The request 'done' value races between usage by the setup and the completion function. The completion function does not need this. When the number of processed packets matches npkts, it is done. The 'has_error' value races between usage of the setup and the completion function. This can cause incorrect error handling and leave the n_req in an incorrect value (i.e. negative). Simplify the code by removing all of the unneeded state checks and variables. Clean up iovs node when it is freed. Eliminate race conditions in the error path: If all packets are submitted, the completion handler will set the completion status correctly (ok or aborted). If all packets are not submitted, the caller must wait until the submitted packets have completed, and then set the completion status. These two change eliminate the race condition in the error path. Reviewed-by: Mitko Haralanov Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/user_sdma.c | 87 ++++++++++++-------------- drivers/infiniband/hw/hfi1/user_sdma.h | 3 - 2 files changed, 39 insertions(+), 51 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 8c954a0ae3b69..c14ec04f2a895 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -328,7 +328,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, u8 opcode, sc, vl; u16 pkey; u32 slid; - int req_queued = 0; u16 dlid; u32 selector; @@ -392,7 +391,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, req->data_len = 0; req->pq = pq; req->cq = cq; - req->status = -1; req->ahg_idx = -1; req->iov_idx = 0; req->sent = 0; @@ -400,12 +398,14 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, req->seqcomp = 0; req->seqsubmitted = 0; req->tids = NULL; - req->done = 0; req->has_error = 0; INIT_LIST_HEAD(&req->txps); memcpy(&req->info, &info, sizeof(info)); + /* The request is initialized, count it */ + atomic_inc(&pq->n_reqs); + if (req_opcode(info.ctrl) == EXPECTED) { /* expected must have a TID info and at least one data vector */ if (req->data_iovs < 2) { @@ -500,7 +500,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, ret = pin_vector_pages(req, &req->iovs[i]); if (ret) { req->data_iovs = i; - req->status = ret; goto free_req; } req->data_len += req->iovs[i].iov.iov_len; @@ -561,14 +560,10 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, req->ahg_idx = sdma_ahg_alloc(req->sde); set_comp_state(pq, cq, info.comp_idx, QUEUED, 0); - atomic_inc(&pq->n_reqs); - req_queued = 1; /* Send the first N packets in the request to buy us some time */ ret = user_sdma_send_pkts(req, pcount); - if (unlikely(ret < 0 && ret != -EBUSY)) { - req->status = ret; + if (unlikely(ret < 0 && ret != -EBUSY)) goto free_req; - } /* * It is possible that the SDMA engine would have processed all the @@ -588,14 +583,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, while (req->seqsubmitted != req->info.npkts) { ret = user_sdma_send_pkts(req, pcount); if (ret < 0) { - if (ret != -EBUSY) { - req->status = ret; - WRITE_ONCE(req->has_error, 1); - if (ACCESS_ONCE(req->seqcomp) == - req->seqsubmitted - 1) - goto free_req; - return ret; - } + if (ret != -EBUSY) + goto free_req; wait_event_interruptible_timeout( pq->busy.wait_dma, (pq->state == SDMA_PKT_Q_ACTIVE), @@ -606,10 +595,19 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, *count += idx; return 0; free_req: - user_sdma_free_request(req, true); - if (req_queued) + /* + * If the submitted seqsubmitted == npkts, the completion routine + * controls the final state. If sequbmitted < npkts, wait for any + * outstanding packets to finish before cleaning up. + */ + if (req->seqsubmitted < req->info.npkts) { + if (req->seqsubmitted) + wait_event(pq->busy.wait_dma, + (req->seqcomp == req->seqsubmitted - 1)); + user_sdma_free_request(req, true); pq_update(pq); - set_comp_state(pq, cq, info.comp_idx, ERROR, req->status); + set_comp_state(pq, cq, info.comp_idx, ERROR, ret); + } return ret; } @@ -917,7 +915,6 @@ dosend: ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count); req->seqsubmitted += count; if (req->seqsubmitted == req->info.npkts) { - WRITE_ONCE(req->done, 1); /* * The txreq has already been submitted to the HW queue * so we can free the AHG entry now. Corruption will not @@ -1347,11 +1344,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, return diff; } -/* - * SDMA tx request completion callback. Called when the SDMA progress - * state machine gets notification that the SDMA descriptors for this - * tx request have been processed by the DMA engine. Called in - * interrupt context. +/** + * user_sdma_txreq_cb() - SDMA tx request completion callback. + * @txreq: valid sdma tx request + * @status: success/failure of request + * + * Called when the SDMA progress state machine gets notification that + * the SDMA descriptors for this tx request have been processed by the + * DMA engine. Called in interrupt context. + * Only do work on completed sequences. */ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) { @@ -1360,7 +1361,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) struct user_sdma_request *req; struct hfi1_user_sdma_pkt_q *pq; struct hfi1_user_sdma_comp_q *cq; - u16 idx; + enum hfi1_sdma_comp_state state = COMPLETE; if (!tx->req) return; @@ -1373,31 +1374,19 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) SDMA_DBG(req, "SDMA completion with error %d", status); WRITE_ONCE(req->has_error, 1); + state = ERROR; } req->seqcomp = tx->seqnum; kmem_cache_free(pq->txreq_cache, tx); - tx = NULL; - - idx = req->info.comp_idx; - if (req->status == -1 && status == SDMA_TXREQ_S_OK) { - if (req->seqcomp == req->info.npkts - 1) { - req->status = 0; - user_sdma_free_request(req, false); - pq_update(pq); - set_comp_state(pq, cq, idx, COMPLETE, 0); - } - } else { - if (status != SDMA_TXREQ_S_OK) - req->status = status; - if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) && - (READ_ONCE(req->done) || - READ_ONCE(req->has_error))) { - user_sdma_free_request(req, false); - pq_update(pq); - set_comp_state(pq, cq, idx, ERROR, req->status); - } - } + + /* sequence isn't complete? We are done */ + if (req->seqcomp != req->info.npkts - 1) + return; + + user_sdma_free_request(req, false); + set_comp_state(pq, cq, req->info.comp_idx, state, status); + pq_update(pq); } static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq) @@ -1430,6 +1419,8 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) if (!node) continue; + req->iovs[i].node = NULL; + if (unpin) hfi1_mmu_rb_remove(req->pq->handler, &node->rb); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 9b8bb5634c0d5..5af52334b7dca 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -196,8 +196,6 @@ struct user_sdma_request { /* Writeable fields shared with interrupt */ u64 seqcomp ____cacheline_aligned_in_smp; u64 seqsubmitted; - /* status of the last txreq completed */ - int status; /* Send side fields */ struct list_head txps ____cacheline_aligned_in_smp; @@ -219,7 +217,6 @@ struct user_sdma_request { u16 tididx; /* progress index moving along the iovs array */ u8 iov_idx; - u8 done; u8 has_error; struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ]; -- GitLab From 6dce186b48bbf6abbe207a9f18deaaa6b6363d22 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Tue, 23 Oct 2018 18:03:19 +0200 Subject: [PATCH 1738/2269] pinctrl: meson: fix pinconf bias disable [ Upstream commit e39f9dd8206ad66992ac0e6218ef1ba746f2cce9 ] If a bias is enabled on a pin of an Amlogic SoC, calling .pin_config_set() with PIN_CONFIG_BIAS_DISABLE will not disable the bias. Instead it will force a pull-down bias on the pin. Instead of the pull type register bank, the driver should access the pull enable register bank. Fixes: 6ac730951104 ("pinctrl: add driver for Amlogic Meson SoCs") Signed-off-by: Jerome Brunet Acked-by: Neil Armstrong Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/meson/pinctrl-meson.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c index 66ed70c127331..6c43322dbb97c 100644 --- a/drivers/pinctrl/meson/pinctrl-meson.c +++ b/drivers/pinctrl/meson/pinctrl-meson.c @@ -273,7 +273,7 @@ static int meson_pinconf_set(struct pinctrl_dev *pcdev, unsigned int pin, dev_dbg(pc->dev, "pin %u: disable bias\n", pin); meson_calc_reg_and_bit(bank, pin, REG_PULL, ®, &bit); - ret = regmap_update_bits(pc->reg_pull, reg, + ret = regmap_update_bits(pc->reg_pullen, reg, BIT(bit), 0); if (ret) return ret; -- GitLab From 6af32ab13852790718ed3fb63cd09fc4d3e8afa3 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 6 Nov 2018 19:49:34 -0600 Subject: [PATCH 1739/2269] KVM: PPC: Move and undef TRACE_INCLUDE_PATH/FILE [ Upstream commit 28c5bcf74fa07c25d5bd118d1271920f51ce2a98 ] TRACE_INCLUDE_PATH and TRACE_INCLUDE_FILE are used by , so like that #include, they should be outside #ifdef protection. They also need to be #undefed before defining, in case multiple trace headers are included by the same C file. This became the case on book3e after commit cf4a6085151a ("powerpc/mm: Add missing tracepoint for tlbie"), leading to the following build error: CC arch/powerpc/kvm/powerpc.o In file included from arch/powerpc/kvm/powerpc.c:51:0: arch/powerpc/kvm/trace.h:9:0: error: "TRACE_INCLUDE_PATH" redefined [-Werror] #define TRACE_INCLUDE_PATH . ^ In file included from arch/powerpc/kvm/../mm/mmu_decl.h:25:0, from arch/powerpc/kvm/powerpc.c:48: ./arch/powerpc/include/asm/trace.h:224:0: note: this is the location of the previous definition #define TRACE_INCLUDE_PATH asm ^ cc1: all warnings being treated as errors Reported-by: Christian Zigotzky Signed-off-by: Scott Wood Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/kvm/trace.h | 8 ++++++-- arch/powerpc/kvm/trace_booke.h | 9 +++++++-- arch/powerpc/kvm/trace_hv.h | 9 +++++++-- arch/powerpc/kvm/trace_pr.h | 9 +++++++-- 4 files changed, 27 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h index 491b0f715d6bc..ea1d7c8083190 100644 --- a/arch/powerpc/kvm/trace.h +++ b/arch/powerpc/kvm/trace.h @@ -6,8 +6,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace /* * Tracepoint for guest mode entry. @@ -120,4 +118,10 @@ TRACE_EVENT(kvm_check_requests, #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace + #include diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h index ac640e81fdc5f..3837842986aa4 100644 --- a/arch/powerpc/kvm/trace_booke.h +++ b/arch/powerpc/kvm/trace_booke.h @@ -6,8 +6,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm_booke -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace_booke #define kvm_trace_symbol_exit \ {0, "CRITICAL"}, \ @@ -218,4 +216,11 @@ TRACE_EVENT(kvm_booke_queue_irqprio, #endif /* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_booke + #include diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index bcfe8a987f6a9..8a1e3b0047f19 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -9,8 +9,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm_hv -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace_hv #define kvm_trace_symbol_hcall \ {H_REMOVE, "H_REMOVE"}, \ @@ -497,4 +495,11 @@ TRACE_EVENT(kvmppc_run_vcpu_exit, #endif /* _TRACE_KVM_HV_H */ /* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_hv + #include diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h index 85785a370c0ea..256530eb1354e 100644 --- a/arch/powerpc/kvm/trace_pr.h +++ b/arch/powerpc/kvm/trace_pr.h @@ -8,8 +8,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm_pr -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace_pr TRACE_EVENT(kvm_book3s_reenter, TP_PROTO(int r, struct kvm_vcpu *vcpu), @@ -272,4 +270,11 @@ TRACE_EVENT(kvm_unmap_hva, #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_pr + #include -- GitLab From abaf1eb8132120f80071137cf5421a1c1eb7d720 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Mon, 5 Nov 2018 00:59:28 +0000 Subject: [PATCH 1740/2269] cpufreq: imx6q: add return value check for voltage scale [ Upstream commit 6ef28a04d1ccf718eee069b72132ce4aa1e52ab9 ] Add return value check for voltage scale when ARM clock rate change fail. Signed-off-by: Anson Huang Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/cpufreq/imx6q-cpufreq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 14466a9b01c0b..63d28323a29c5 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -135,8 +135,13 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index) /* Ensure the arm clock divider is what we expect */ ret = clk_set_rate(arm_clk, new_freq * 1000); if (ret) { + int ret1; + dev_err(cpu_dev, "failed to set clock rate: %d\n", ret); - regulator_set_voltage_tol(arm_reg, volt_old, 0); + ret1 = regulator_set_voltage_tol(arm_reg, volt_old, 0); + if (ret1) + dev_warn(cpu_dev, + "failed to restore vddarm voltage: %d\n", ret1); return ret; } -- GitLab From 6e4fbdc7a308957c98d4599255a7a700ec92ca1c Mon Sep 17 00:00:00 2001 From: Xulin Sun Date: Tue, 6 Nov 2018 16:42:19 +0800 Subject: [PATCH 1741/2269] rtc: pcf2127: fix a kmemleak caused in pcf2127_i2c_gather_write [ Upstream commit 9bde0afb7a906f1dabdba37162551565740b862d ] pcf2127_i2c_gather_write() allocates memory as local variable for i2c_master_send(), after finishing the master transfer, the allocated memory should be freed. The kmemleak is reported: unreferenced object 0xffff80231e7dba80 (size 64): comm "hwclock", pid 27762, jiffies 4296880075 (age 356.944s) hex dump (first 32 bytes): 03 00 12 03 19 02 11 13 00 80 98 18 00 00 ff ff ................ 00 50 00 00 00 00 00 00 02 00 00 00 00 00 00 00 .P.............. backtrace: [] create_object+0xf8/0x278 [] kmemleak_alloc+0x74/0xa0 [] __kmalloc+0x1ac/0x348 [] pcf2127_i2c_gather_write+0x54/0xf8 [] _regmap_raw_write+0x464/0x850 [] regmap_bulk_write+0x1a4/0x348 [] pcf2127_rtc_set_time+0xac/0xe8 [] rtc_set_time+0x80/0x138 [] rtc_dev_ioctl+0x398/0x610 [] do_vfs_ioctl+0xb0/0x848 [] SyS_ioctl+0x8c/0xa8 [] el0_svc_naked+0x34/0x38 [] 0xffffffffffffffff Signed-off-by: Xulin Sun Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/rtc-pcf2127.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index f33447c5db85e..9f1b14bf91aed 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -248,6 +248,9 @@ static int pcf2127_i2c_gather_write(void *context, memcpy(buf + 1, val, val_size); ret = i2c_master_send(client, buf, val_size + 1); + + kfree(buf); + if (ret != val_size + 1) return ret < 0 ? ret : -EIO; -- GitLab From 66da887d8732b75b9dccc92b75d3972381fe62c7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 8 Nov 2018 23:55:16 +0100 Subject: [PATCH 1742/2269] crypto: simd - correctly take reqsize of wrapped skcipher into account [ Upstream commit 508a1c4df085a547187eed346f1bfe5e381797f1 ] The simd wrapper's skcipher request context structure consists of a single subrequest whose size is taken from the subordinate skcipher. However, in simd_skcipher_init(), the reqsize that is retrieved is not from the subordinate skcipher but from the cryptd request structure, whose size is completely unrelated to the actual wrapped skcipher. Reported-by: Qian Cai Signed-off-by: Ard Biesheuvel Tested-by: Qian Cai Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/simd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crypto/simd.c b/crypto/simd.c index 88203370a62f9..894c629441066 100644 --- a/crypto/simd.c +++ b/crypto/simd.c @@ -126,8 +126,9 @@ static int simd_skcipher_init(struct crypto_skcipher *tfm) ctx->cryptd_tfm = cryptd_tfm; - reqsize = sizeof(struct skcipher_request); - reqsize += crypto_skcipher_reqsize(&cryptd_tfm->base); + reqsize = crypto_skcipher_reqsize(cryptd_skcipher_child(cryptd_tfm)); + reqsize = max(reqsize, crypto_skcipher_reqsize(&cryptd_tfm->base)); + reqsize += sizeof(struct skcipher_request); crypto_skcipher_set_reqsize(tfm, reqsize); -- GitLab From 5565c30a0bc770764cd085db881bc7a9f5e9d63c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Nov 2018 15:58:40 -0700 Subject: [PATCH 1743/2269] floppy: fix race condition in __floppy_read_block_0() [ Upstream commit de7b75d82f70c5469675b99ad632983c50b6f7e7 ] LKP recently reported a hang at bootup in the floppy code: [ 245.678853] INFO: task mount:580 blocked for more than 120 seconds. [ 245.679906] Tainted: G T 4.19.0-rc6-00172-ga9f38e1 #1 [ 245.680959] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 245.682181] mount D 6372 580 1 0x00000004 [ 245.683023] Call Trace: [ 245.683425] __schedule+0x2df/0x570 [ 245.683975] schedule+0x2d/0x80 [ 245.684476] schedule_timeout+0x19d/0x330 [ 245.685090] ? wait_for_common+0xa5/0x170 [ 245.685735] wait_for_common+0xac/0x170 [ 245.686339] ? do_sched_yield+0x90/0x90 [ 245.686935] wait_for_completion+0x12/0x20 [ 245.687571] __floppy_read_block_0+0xfb/0x150 [ 245.688244] ? floppy_resume+0x40/0x40 [ 245.688844] floppy_revalidate+0x20f/0x240 [ 245.689486] check_disk_change+0x43/0x60 [ 245.690087] floppy_open+0x1ea/0x360 [ 245.690653] __blkdev_get+0xb4/0x4d0 [ 245.691212] ? blkdev_get+0x1db/0x370 [ 245.691777] blkdev_get+0x1f3/0x370 [ 245.692351] ? path_put+0x15/0x20 [ 245.692871] ? lookup_bdev+0x4b/0x90 [ 245.693539] blkdev_get_by_path+0x3d/0x80 [ 245.694165] mount_bdev+0x2a/0x190 [ 245.694695] squashfs_mount+0x10/0x20 [ 245.695271] ? squashfs_alloc_inode+0x30/0x30 [ 245.695960] mount_fs+0xf/0x90 [ 245.696451] vfs_kern_mount+0x43/0x130 [ 245.697036] do_mount+0x187/0xc40 [ 245.697563] ? memdup_user+0x28/0x50 [ 245.698124] ksys_mount+0x60/0xc0 [ 245.698639] sys_mount+0x19/0x20 [ 245.699167] do_int80_syscall_32+0x61/0x130 [ 245.699813] entry_INT80_32+0xc7/0xc7 showing that we never complete that read request. The reason is that the completion setup is racy - it initializes the completion event AFTER submitting the IO, which means that the IO could complete before/during the init. If it does, we are passing garbage to complete() and we may sleep forever waiting for the event to occur. Fixes: 7b7b68bba5ef ("floppy: bail out in open() if drive is not responding to block0 read") Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/floppy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 3d0287e212fe5..a7f212ea17bf1 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4146,10 +4146,11 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive) bio.bi_end_io = floppy_rb0_cb; bio_set_op_attrs(&bio, REQ_OP_READ, 0); + init_completion(&cbdata.complete); + submit_bio(&bio); process_fd_request(); - init_completion(&cbdata.complete); wait_for_completion(&cbdata.complete); __free_page(page); -- GitLab From d5e236ba5bcdd95751980558b7af5519d05e19d1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 6 Nov 2018 23:37:58 +1100 Subject: [PATCH 1744/2269] powerpc/io: Fix the IO workarounds code to work with Radix [ Upstream commit 43c6494fa1499912c8177e71450c0279041152a6 ] Back in 2006 Ben added some workarounds for a misbehaviour in the Spider IO bridge used on early Cell machines, see commit 014da7ff47b5 ("[POWERPC] Cell "Spider" MMIO workarounds"). Later these were made to be generic, ie. not tied specifically to Spider. The code stashes a token in the high bits (59-48) of virtual addresses used for IO (eg. returned from ioremap()). This works fine when using the Hash MMU, but when we're using the Radix MMU the bits used for the token overlap with some of the bits of the virtual address. This is because the maximum virtual address is larger with Radix, up to c00fffffffffffff, and in fact we use that high part of the address range for ioremap(), see RADIX_KERN_IO_START. As it happens the bits that are used overlap with the bits that differentiate an IO address vs a linear map address. If the resulting address lies outside the linear mapping we will crash (see below), if not we just corrupt memory. virtio-pci 0000:00:00.0: Using 64-bit direct DMA at offset 800000000000000 Unable to handle kernel paging request for data at address 0xc000000080000014 ... CFAR: c000000000626b98 DAR: c000000080000014 DSISR: 42000000 IRQMASK: 0 GPR00: c0000000006c54fc c00000003e523378 c0000000016de600 0000000000000000 GPR04: c00c000080000014 0000000000000007 0fffffff000affff 0000000000000030 ^^^^ ... NIP [c000000000626c5c] .iowrite8+0xec/0x100 LR [c0000000006c992c] .vp_reset+0x2c/0x90 Call Trace: .pci_bus_read_config_dword+0xc4/0x120 (unreliable) .register_virtio_device+0x13c/0x1c0 .virtio_pci_probe+0x148/0x1f0 .local_pci_probe+0x68/0x140 .pci_device_probe+0x164/0x220 .really_probe+0x274/0x3b0 .driver_probe_device+0x80/0x170 .__driver_attach+0x14c/0x150 .bus_for_each_dev+0xb8/0x130 .driver_attach+0x34/0x50 .bus_add_driver+0x178/0x2f0 .driver_register+0x90/0x1a0 .__pci_register_driver+0x6c/0x90 .virtio_pci_driver_init+0x2c/0x40 .do_one_initcall+0x64/0x280 .kernel_init_freeable+0x36c/0x474 .kernel_init+0x24/0x160 .ret_from_kernel_thread+0x58/0x7c This hasn't been a problem because CONFIG_PPC_IO_WORKAROUNDS which enables this code is usually not enabled. It is only enabled when it's selected by PPC_CELL_NATIVE which is only selected by PPC_IBM_CELL_BLADE and that in turn depends on BIG_ENDIAN. So in order to hit the bug you need to build a big endian kernel, with IBM Cell Blade support enabled, as well as Radix MMU support, and then boot that on Power9 using Radix MMU. Still we can fix the bug, so let's do that. We simply use fewer bits for the token, taking the union of the restrictions on the address from both Hash and Radix, we end up with 8 bits we can use for the token. The only user of the token is iowa_mem_find_bus() which only supports 8 token values, so 8 bits is plenty for that. Fixes: 566ca99af026 ("powerpc/mm/radix: Add dummy radix_enabled()") Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/io.h | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 422f99cf99248..e6d33eed82021 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -287,19 +287,13 @@ extern void _memcpy_toio(volatile void __iomem *dest, const void *src, * their hooks, a bitfield is reserved for use by the platform near the * top of MMIO addresses (not PIO, those have to cope the hard way). * - * This bit field is 12 bits and is at the top of the IO virtual - * addresses PCI_IO_INDIRECT_TOKEN_MASK. + * The highest address in the kernel virtual space are: * - * The kernel virtual space is thus: + * d0003fffffffffff # with Hash MMU + * c00fffffffffffff # with Radix MMU * - * 0xD000000000000000 : vmalloc - * 0xD000080000000000 : PCI PHB IO space - * 0xD000080080000000 : ioremap - * 0xD0000fffffffffff : end of ioremap region - * - * Since the top 4 bits are reserved as the region ID, we use thus - * the next 12 bits and keep 4 bits available for the future if the - * virtual address space is ever to be extended. + * The top 4 bits are reserved as the region ID on hash, leaving us 8 bits + * that can be used for the field. * * The direct IO mapping operations will then mask off those bits * before doing the actual access, though that only happen when @@ -311,8 +305,8 @@ extern void _memcpy_toio(volatile void __iomem *dest, const void *src, */ #ifdef CONFIG_PPC_INDIRECT_MMIO -#define PCI_IO_IND_TOKEN_MASK 0x0fff000000000000ul -#define PCI_IO_IND_TOKEN_SHIFT 48 +#define PCI_IO_IND_TOKEN_SHIFT 52 +#define PCI_IO_IND_TOKEN_MASK (0xfful << PCI_IO_IND_TOKEN_SHIFT) #define PCI_FIX_ADDR(addr) \ ((PCI_IO_ADDR)(((unsigned long)(addr)) & ~PCI_IO_IND_TOKEN_MASK)) #define PCI_GET_ADDR_TOKEN(addr) \ -- GitLab From e1f0f55f27883820021a6cbc69deba0576098d0d Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 19 Oct 2018 10:04:18 -0700 Subject: [PATCH 1745/2269] perf/x86/intel/uncore: Add more IMC PCI IDs for KabyLake and CoffeeLake CPUs [ Upstream commit c10a8de0d32e95b0b8c7c17b6dc09baea5a5a899 ] KabyLake and CoffeeLake CPUs have the same client uncore events as SkyLake. Add the PCI IDs for the KabyLake Y, U, S processor lines and CoffeeLake U, H, S processor lines. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/20181019170419.378-1-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/events/intel/uncore_snb.c | 115 ++++++++++++++++++++++++++++- 1 file changed, 114 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index aee5e8496be4b..aa4e6f4e6a01c 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -15,6 +15,25 @@ #define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 #define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f #define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f +#define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c +#define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904 +#define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914 +#define PCI_DEVICE_ID_INTEL_KBL_SD_IMC 0x590f +#define PCI_DEVICE_ID_INTEL_KBL_SQ_IMC 0x591f +#define PCI_DEVICE_ID_INTEL_CFL_2U_IMC 0x3ecc +#define PCI_DEVICE_ID_INTEL_CFL_4U_IMC 0x3ed0 +#define PCI_DEVICE_ID_INTEL_CFL_4H_IMC 0x3e10 +#define PCI_DEVICE_ID_INTEL_CFL_6H_IMC 0x3ec4 +#define PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC 0x3e0f +#define PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC 0x3e1f +#define PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC 0x3ec2 +#define PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC 0x3e30 +#define PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC 0x3e18 +#define PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC 0x3ec6 +#define PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC 0x3e31 +#define PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC 0x3e33 +#define PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC 0x3eca +#define PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC 0x3e32 /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -632,7 +651,82 @@ static const struct pci_device_id skl_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SQ_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, - + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_Y_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_UQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SD_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4H_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6H_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -681,6 +775,25 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Quad Core */ IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Dual Core */ IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Quad Core */ + IMC_DEV(KBL_Y_IMC, &skl_uncore_pci_driver), /* 7th Gen Core Y */ + IMC_DEV(KBL_U_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U */ + IMC_DEV(KBL_UQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U Quad Core */ + IMC_DEV(KBL_SD_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Dual Core */ + IMC_DEV(KBL_SQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Quad Core */ + IMC_DEV(CFL_2U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 2 Cores */ + IMC_DEV(CFL_4U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 4 Cores */ + IMC_DEV(CFL_4H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 4 Cores */ + IMC_DEV(CFL_6H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 6 Cores */ + IMC_DEV(CFL_2S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 2 Cores Desktop */ + IMC_DEV(CFL_4S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Desktop */ + IMC_DEV(CFL_6S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Desktop */ + IMC_DEV(CFL_8S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Desktop */ + IMC_DEV(CFL_4S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Work Station */ + IMC_DEV(CFL_6S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Work Station */ + IMC_DEV(CFL_8S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Work Station */ + IMC_DEV(CFL_4S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Server */ + IMC_DEV(CFL_6S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Server */ + IMC_DEV(CFL_8S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Server */ { /* end marker */ } }; -- GitLab From 677805a95934eb37bcc2482766d6160e6d216d98 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 12 Nov 2018 16:06:51 -0500 Subject: [PATCH 1746/2269] SUNRPC: Fix a bogus get/put in generic_key_to_expire() [ Upstream commit e3d5e573a54dabdc0f9f3cb039d799323372b251 ] Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- net/sunrpc/auth_generic.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index f1df9837f1aca..1ac08dcbf85db 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -281,13 +281,7 @@ static bool generic_key_to_expire(struct rpc_cred *cred) { struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred; - bool ret; - - get_rpccred(cred); - ret = test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags); - put_rpccred(cred); - - return ret; + return test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags); } static const struct rpc_credops generic_credops = { -- GitLab From 281a4f41c6be4f6c09859f1877ab7a8b294ead62 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Thu, 20 Sep 2018 08:59:14 -0400 Subject: [PATCH 1747/2269] kdb: Use strscpy with destination buffer size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c2b94c72d93d0929f48157eef128c4f9d2e603ce ] gcc 8.1.0 warns with: kernel/debug/kdb/kdb_support.c: In function ‘kallsyms_symbol_next’: kernel/debug/kdb/kdb_support.c:239:4: warning: ‘strncpy’ specified bound depends on the length of the source argument [-Wstringop-overflow=] strncpy(prefix_name, name, strlen(name)+1); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ kernel/debug/kdb/kdb_support.c:239:31: note: length computed here Use strscpy() with the destination buffer size, and use ellipses when displaying truncated symbols. v2: Use strscpy() Signed-off-by: Prarit Bhargava Cc: Jonathan Toppins Cc: Jason Wessel Cc: Daniel Thompson Cc: kgdb-bugreport@lists.sourceforge.net Reviewed-by: Daniel Thompson Signed-off-by: Daniel Thompson Signed-off-by: Sasha Levin --- kernel/debug/kdb/kdb_io.c | 15 +++++++++------ kernel/debug/kdb/kdb_private.h | 2 +- kernel/debug/kdb/kdb_support.c | 10 +++++----- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index ed5d34925ad06..6a4b41484afe6 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -216,7 +216,7 @@ static char *kdb_read(char *buffer, size_t bufsize) int count; int i; int diag, dtab_count; - int key; + int key, buf_size, ret; diag = kdbgetintenv("DTABCOUNT", &dtab_count); @@ -336,9 +336,8 @@ poll_again: else p_tmp = tmpbuffer; len = strlen(p_tmp); - count = kallsyms_symbol_complete(p_tmp, - sizeof(tmpbuffer) - - (p_tmp - tmpbuffer)); + buf_size = sizeof(tmpbuffer) - (p_tmp - tmpbuffer); + count = kallsyms_symbol_complete(p_tmp, buf_size); if (tab == 2 && count > 0) { kdb_printf("\n%d symbols are found.", count); if (count > dtab_count) { @@ -350,9 +349,13 @@ poll_again: } kdb_printf("\n"); for (i = 0; i < count; i++) { - if (WARN_ON(!kallsyms_symbol_next(p_tmp, i))) + ret = kallsyms_symbol_next(p_tmp, i, buf_size); + if (WARN_ON(!ret)) break; - kdb_printf("%s ", p_tmp); + if (ret != -E2BIG) + kdb_printf("%s ", p_tmp); + else + kdb_printf("%s... ", p_tmp); *(p_tmp + len) = '\0'; } if (i >= dtab_count) diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index fc224fbcf9549..f2158e463a0f6 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -83,7 +83,7 @@ typedef struct __ksymtab { unsigned long sym_start; unsigned long sym_end; } kdb_symtab_t; -extern int kallsyms_symbol_next(char *prefix_name, int flag); +extern int kallsyms_symbol_next(char *prefix_name, int flag, int buf_size); extern int kallsyms_symbol_complete(char *prefix_name, int max_len); /* Exported Symbols for kernel loadable modules to use. */ diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index 84422d2b95c05..014f6fbb38327 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -221,11 +221,13 @@ int kallsyms_symbol_complete(char *prefix_name, int max_len) * Parameters: * prefix_name prefix of a symbol name to lookup * flag 0 means search from the head, 1 means continue search. + * buf_size maximum length that can be written to prefix_name + * buffer * Returns: * 1 if a symbol matches the given prefix. * 0 if no string found */ -int kallsyms_symbol_next(char *prefix_name, int flag) +int kallsyms_symbol_next(char *prefix_name, int flag, int buf_size) { int prefix_len = strlen(prefix_name); static loff_t pos; @@ -235,10 +237,8 @@ int kallsyms_symbol_next(char *prefix_name, int flag) pos = 0; while ((name = kdb_walk_kallsyms(&pos))) { - if (strncmp(name, prefix_name, prefix_len) == 0) { - strncpy(prefix_name, name, strlen(name)+1); - return 1; - } + if (!strncmp(name, prefix_name, prefix_len)) + return strscpy(prefix_name, name, buf_size); } return 0; } -- GitLab From 2a968a024116dc75b9488744a4711b0166ac3172 Mon Sep 17 00:00:00 2001 From: Satheesh Rajendran Date: Thu, 8 Nov 2018 10:47:56 +0530 Subject: [PATCH 1748/2269] powerpc/numa: Suppress "VPHN is not supported" messages [ Upstream commit 437ccdc8ce629470babdda1a7086e2f477048cbd ] When VPHN function is not supported and during cpu hotplug event, kernel prints message 'VPHN function not supported. Disabling polling...'. Currently it prints on every hotplug event, it floods dmesg when a KVM guest tries to hotplug huge number of vcpus, let's just print once and suppress further kernel prints. Signed-off-by: Satheesh Rajendran Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 9fead0796364b..40fb9a8835fe3 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1261,7 +1261,7 @@ static long vphn_get_associativity(unsigned long cpu, switch (rc) { case H_FUNCTION: - printk(KERN_INFO + printk_once(KERN_INFO "VPHN is not supported. Disabling polling...\n"); stop_topology_update(); break; -- GitLab From d6bfb89267efe3ae5eef1fc11984fae3dcd95137 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 14 Nov 2018 09:55:41 -0800 Subject: [PATCH 1749/2269] efi/arm: Revert deferred unmap of early memmap mapping [ Upstream commit 33412b8673135b18ea42beb7f5117ed0091798b6 ] Commit: 3ea86495aef2 ("efi/arm: preserve early mapping of UEFI memory map longer for BGRT") deferred the unmap of the early mapping of the UEFI memory map to accommodate the ACPI BGRT code, which looks up the memory type that backs the BGRT table to validate it against the requirements of the UEFI spec. Unfortunately, this causes problems on ARM, which does not permit early mappings to persist after paging_init() is called, resulting in a WARN() splat. Since we don't support the BGRT table on ARM anway, let's revert ARM to the old behaviour, which is to take down the early mapping at the end of efi_init(). Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: 3ea86495aef2 ("efi/arm: preserve early mapping of UEFI memory ...") Link: http://lkml.kernel.org/r/20181114175544.12860-3-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- drivers/firmware/efi/arm-init.c | 4 ++++ drivers/firmware/efi/arm-runtime.c | 2 +- drivers/firmware/efi/memmap.c | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c index a7c522eac640f..312f9f32e1683 100644 --- a/drivers/firmware/efi/arm-init.c +++ b/drivers/firmware/efi/arm-init.c @@ -265,6 +265,10 @@ void __init efi_init(void) (params.mmap & ~PAGE_MASK))); init_screen_info(); + + /* ARM does not permit early mappings to persist across paging_init() */ + if (IS_ENABLED(CONFIG_ARM)) + efi_memmap_unmap(); } static int __init register_gop_device(void) diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 8995a48bd0676..ad1530aff633f 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -122,7 +122,7 @@ static int __init arm_enable_runtime_services(void) { u64 mapsize; - if (!efi_enabled(EFI_BOOT) || !efi_enabled(EFI_MEMMAP)) { + if (!efi_enabled(EFI_BOOT)) { pr_info("EFI services will not be available.\n"); return 0; } diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c index 5fc70520e04c4..1907db2b38d81 100644 --- a/drivers/firmware/efi/memmap.c +++ b/drivers/firmware/efi/memmap.c @@ -118,6 +118,9 @@ int __init efi_memmap_init_early(struct efi_memory_map_data *data) void __init efi_memmap_unmap(void) { + if (!efi_enabled(EFI_MEMMAP)) + return; + if (!efi.memmap.late) { unsigned long size; -- GitLab From e28ae7aaa9920570820c1af795663df317946ad7 Mon Sep 17 00:00:00 2001 From: Vitaly Wool Date: Fri, 16 Nov 2018 15:07:56 -0800 Subject: [PATCH 1750/2269] z3fold: fix possible reclaim races [ Upstream commit ca0246bb97c23da9d267c2107c07fb77e38205c9 ] Reclaim and free can race on an object which is basically fine but in order for reclaim to be able to map "freed" object we need to encode object length in the handle. handle_to_chunks() is then introduced to extract object length from a handle and use it during mapping. Moreover, to avoid racing on a z3fold "headless" page release, we should not try to free that page in z3fold_free() if the reclaim bit is set. Also, in the unlikely case of trying to reclaim a page being freed, we should not proceed with that page. While at it, fix the page accounting in reclaim function. This patch supersedes "[PATCH] z3fold: fix reclaim lock-ups". Link: http://lkml.kernel.org/r/20181105162225.74e8837d03583a9b707cf559@gmail.com Signed-off-by: Vitaly Wool Signed-off-by: Jongseok Kim Reported-by-by: Jongseok Kim Reviewed-by: Snild Dolkow Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/z3fold.c | 101 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 62 insertions(+), 39 deletions(-) diff --git a/mm/z3fold.c b/mm/z3fold.c index f33403d718ac9..2813cdfa46b98 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -99,6 +99,7 @@ struct z3fold_header { #define NCHUNKS ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT) #define BUDDY_MASK (0x3) +#define BUDDY_SHIFT 2 /** * struct z3fold_pool - stores metadata for each z3fold pool @@ -145,7 +146,7 @@ enum z3fold_page_flags { MIDDLE_CHUNK_MAPPED, NEEDS_COMPACTING, PAGE_STALE, - UNDER_RECLAIM + PAGE_CLAIMED, /* by either reclaim or free */ }; /***************** @@ -174,7 +175,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page, clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); clear_bit(NEEDS_COMPACTING, &page->private); clear_bit(PAGE_STALE, &page->private); - clear_bit(UNDER_RECLAIM, &page->private); + clear_bit(PAGE_CLAIMED, &page->private); spin_lock_init(&zhdr->page_lock); kref_init(&zhdr->refcount); @@ -223,8 +224,11 @@ static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud) unsigned long handle; handle = (unsigned long)zhdr; - if (bud != HEADLESS) - handle += (bud + zhdr->first_num) & BUDDY_MASK; + if (bud != HEADLESS) { + handle |= (bud + zhdr->first_num) & BUDDY_MASK; + if (bud == LAST) + handle |= (zhdr->last_chunks << BUDDY_SHIFT); + } return handle; } @@ -234,6 +238,12 @@ static struct z3fold_header *handle_to_z3fold_header(unsigned long handle) return (struct z3fold_header *)(handle & PAGE_MASK); } +/* only for LAST bud, returns zero otherwise */ +static unsigned short handle_to_chunks(unsigned long handle) +{ + return (handle & ~PAGE_MASK) >> BUDDY_SHIFT; +} + /* * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle * but that doesn't matter. because the masking will result in the @@ -717,37 +727,39 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) page = virt_to_page(zhdr); if (test_bit(PAGE_HEADLESS, &page->private)) { - /* HEADLESS page stored */ - bud = HEADLESS; - } else { - z3fold_page_lock(zhdr); - bud = handle_to_buddy(handle); - - switch (bud) { - case FIRST: - zhdr->first_chunks = 0; - break; - case MIDDLE: - zhdr->middle_chunks = 0; - zhdr->start_middle = 0; - break; - case LAST: - zhdr->last_chunks = 0; - break; - default: - pr_err("%s: unknown bud %d\n", __func__, bud); - WARN_ON(1); - z3fold_page_unlock(zhdr); - return; + /* if a headless page is under reclaim, just leave. + * NB: we use test_and_set_bit for a reason: if the bit + * has not been set before, we release this page + * immediately so we don't care about its value any more. + */ + if (!test_and_set_bit(PAGE_CLAIMED, &page->private)) { + spin_lock(&pool->lock); + list_del(&page->lru); + spin_unlock(&pool->lock); + free_z3fold_page(page); + atomic64_dec(&pool->pages_nr); } + return; } - if (bud == HEADLESS) { - spin_lock(&pool->lock); - list_del(&page->lru); - spin_unlock(&pool->lock); - free_z3fold_page(page); - atomic64_dec(&pool->pages_nr); + /* Non-headless case */ + z3fold_page_lock(zhdr); + bud = handle_to_buddy(handle); + + switch (bud) { + case FIRST: + zhdr->first_chunks = 0; + break; + case MIDDLE: + zhdr->middle_chunks = 0; + break; + case LAST: + zhdr->last_chunks = 0; + break; + default: + pr_err("%s: unknown bud %d\n", __func__, bud); + WARN_ON(1); + z3fold_page_unlock(zhdr); return; } @@ -755,7 +767,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) atomic64_dec(&pool->pages_nr); return; } - if (test_bit(UNDER_RECLAIM, &page->private)) { + if (test_bit(PAGE_CLAIMED, &page->private)) { z3fold_page_unlock(zhdr); return; } @@ -833,20 +845,30 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) } list_for_each_prev(pos, &pool->lru) { page = list_entry(pos, struct page, lru); + + /* this bit could have been set by free, in which case + * we pass over to the next page in the pool. + */ + if (test_and_set_bit(PAGE_CLAIMED, &page->private)) + continue; + + zhdr = page_address(page); if (test_bit(PAGE_HEADLESS, &page->private)) - /* candidate found */ break; - zhdr = page_address(page); - if (!z3fold_page_trylock(zhdr)) + if (!z3fold_page_trylock(zhdr)) { + zhdr = NULL; continue; /* can't evict at this point */ + } kref_get(&zhdr->refcount); list_del_init(&zhdr->buddy); zhdr->cpu = -1; - set_bit(UNDER_RECLAIM, &page->private); break; } + if (!zhdr) + break; + list_del_init(&page->lru); spin_unlock(&pool->lock); @@ -895,6 +917,7 @@ next: if (test_bit(PAGE_HEADLESS, &page->private)) { if (ret == 0) { free_z3fold_page(page); + atomic64_dec(&pool->pages_nr); return 0; } spin_lock(&pool->lock); @@ -902,7 +925,7 @@ next: spin_unlock(&pool->lock); } else { z3fold_page_lock(zhdr); - clear_bit(UNDER_RECLAIM, &page->private); + clear_bit(PAGE_CLAIMED, &page->private); if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) { atomic64_dec(&pool->pages_nr); @@ -961,7 +984,7 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle) set_bit(MIDDLE_CHUNK_MAPPED, &page->private); break; case LAST: - addr += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT); + addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT); break; default: pr_err("unknown buddy id %d\n", buddy); -- GitLab From a84c872c8b306e11c95c4912f4bed7f567e901b4 Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Fri, 16 Nov 2018 15:08:39 -0800 Subject: [PATCH 1751/2269] tmpfs: make lseek(SEEK_DATA/SEK_HOLE) return ENXIO with a negative offset [ Upstream commit 1a413646931cb14442065cfc17561e50f5b5bb44 ] Other filesystems such as ext4, f2fs and ubifs all return ENXIO when lseek (SEEK_DATA or SEEK_HOLE) requests a negative offset. man 2 lseek says : EINVAL whence is not valid. Or: the resulting file offset would be : negative, or beyond the end of a seekable device. : : ENXIO whence is SEEK_DATA or SEEK_HOLE, and the file offset is beyond : the end of the file. Make tmpfs return ENXIO under these circumstances as well. After this, tmpfs also passes xfstests's generic/448. [akpm@linux-foundation.org: rewrite changelog] Link: http://lkml.kernel.org/r/1540434176-14349-1-git-send-email-yuyufen@huawei.com Signed-off-by: Yufen Yu Reviewed-by: Andrew Morton Cc: Al Viro Cc: Hugh Dickins Cc: William Kucharski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/shmem.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index ea786a504e1b9..fa08f56fd5e52 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2590,9 +2590,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence) inode_lock(inode); /* We're holding i_mutex so we can access i_size directly */ - if (offset < 0) - offset = -EINVAL; - else if (offset >= inode->i_size) + if (offset < 0 || offset >= inode->i_size) offset = -ENXIO; else { start = offset >> PAGE_SHIFT; -- GitLab From c6a4b3c3b81b818dc0138c813fd7656aeb78aa11 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 16 Nov 2018 15:08:53 -0800 Subject: [PATCH 1752/2269] mm, page_alloc: check for max order in hot path [ Upstream commit c63ae43ba53bc432b414fd73dd5f4b01fcb1ab43 ] Konstantin has noticed that kvmalloc might trigger the following warning: WARNING: CPU: 0 PID: 6676 at mm/vmstat.c:986 __fragmentation_index+0x54/0x60 [...] Call Trace: fragmentation_index+0x76/0x90 compaction_suitable+0x4f/0xf0 shrink_node+0x295/0x310 node_reclaim+0x205/0x250 get_page_from_freelist+0x649/0xad0 __alloc_pages_nodemask+0x12a/0x2a0 kmalloc_large_node+0x47/0x90 __kmalloc_node+0x22b/0x2e0 kvmalloc_node+0x3e/0x70 xt_alloc_table_info+0x3a/0x80 [x_tables] do_ip6t_set_ctl+0xcd/0x1c0 [ip6_tables] nf_setsockopt+0x44/0x60 SyS_setsockopt+0x6f/0xc0 do_syscall_64+0x67/0x120 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 the problem is that we only check for an out of bound order in the slow path and the node reclaim might happen from the fast path already. This is fixable by making sure that kvmalloc doesn't ever use kmalloc for requests that are larger than KMALLOC_MAX_SIZE but this also shows that the code is rather fragile. A recent UBSAN report just underlines that by the following report UBSAN: Undefined behaviour in mm/page_alloc.c:3117:19 shift exponent 51 is too large for 32-bit type 'int' CPU: 0 PID: 6520 Comm: syz-executor1 Not tainted 4.19.0-rc2 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xd2/0x148 lib/dump_stack.c:113 ubsan_epilogue+0x12/0x94 lib/ubsan.c:159 __ubsan_handle_shift_out_of_bounds+0x2b6/0x30b lib/ubsan.c:425 __zone_watermark_ok+0x2c7/0x400 mm/page_alloc.c:3117 zone_watermark_fast mm/page_alloc.c:3216 [inline] get_page_from_freelist+0xc49/0x44c0 mm/page_alloc.c:3300 __alloc_pages_nodemask+0x21e/0x640 mm/page_alloc.c:4370 alloc_pages_current+0xcc/0x210 mm/mempolicy.c:2093 alloc_pages include/linux/gfp.h:509 [inline] __get_free_pages+0x12/0x60 mm/page_alloc.c:4414 dma_mem_alloc+0x36/0x50 arch/x86/include/asm/floppy.h:156 raw_cmd_copyin drivers/block/floppy.c:3159 [inline] raw_cmd_ioctl drivers/block/floppy.c:3206 [inline] fd_locked_ioctl+0xa00/0x2c10 drivers/block/floppy.c:3544 fd_ioctl+0x40/0x60 drivers/block/floppy.c:3571 __blkdev_driver_ioctl block/ioctl.c:303 [inline] blkdev_ioctl+0xb3c/0x1a30 block/ioctl.c:601 block_ioctl+0x105/0x150 fs/block_dev.c:1883 vfs_ioctl fs/ioctl.c:46 [inline] do_vfs_ioctl+0x1c0/0x1150 fs/ioctl.c:687 ksys_ioctl+0x9e/0xb0 fs/ioctl.c:702 __do_sys_ioctl fs/ioctl.c:709 [inline] __se_sys_ioctl fs/ioctl.c:707 [inline] __x64_sys_ioctl+0x7e/0xc0 fs/ioctl.c:707 do_syscall_64+0xc4/0x510 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Note that this is not a kvmalloc path. It is just that the fast path really depends on having sanitzed order as well. Therefore move the order check to the fast path. Link: http://lkml.kernel.org/r/20181113094305.GM15120@dhcp22.suse.cz Signed-off-by: Michal Hocko Reported-by: Konstantin Khlebnikov Reported-by: Kyungtae Kim Acked-by: Vlastimil Babka Cc: Balbir Singh Cc: Mel Gorman Cc: Pavel Tatashin Cc: Oscar Salvador Cc: Mike Rapoport Cc: Aaron Lu Cc: Joonsoo Kim Cc: Byoungyoung Lee Cc: "Dae R. Jeong" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/page_alloc.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a604b5da6755e..2074f424dabfa 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3867,17 +3867,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, unsigned int cpuset_mems_cookie; int reserve_flags; - /* - * In the slowpath, we sanity check order to avoid ever trying to - * reclaim >= MAX_ORDER areas which will never succeed. Callers may - * be using allocators in order of preference for an area that is - * too large. - */ - if (order >= MAX_ORDER) { - WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN)); - return NULL; - } - /* * We also sanity check to catch abuse of atomic reserves being used by * callers that are not in atomic context. @@ -4179,6 +4168,15 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */ struct alloc_context ac = { }; + /* + * There are several places where we assume that the order value is sane + * so bail out early if the request is out of bound. + */ + if (unlikely(order >= MAX_ORDER)) { + WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN)); + return NULL; + } + gfp_mask &= gfp_allowed_mask; alloc_mask = gfp_mask; if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags)) -- GitLab From c1ede7043d74de0022872921b453d106341fcf93 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 27 Aug 2018 10:21:45 +0200 Subject: [PATCH 1753/2269] of: add helper to lookup compatible child node [ Upstream commit 36156f9241cb0f9e37d998052873ca7501ad4b36 ] Add of_get_compatible_child() helper that can be used to lookup compatible child nodes. Several drivers currently use of_find_compatible_node() to lookup child nodes while failing to notice that the of_find_ functions search the entire tree depth-first (from a given start node) and therefore can match unrelated nodes. The fact that these functions also drop a reference to the node they start searching from (e.g. the parent node) is typically also overlooked, something which can lead to use-after-free bugs. Signed-off-by: Johan Hovold Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/of/base.c | 25 +++++++++++++++++++++++++ include/linux/of.h | 8 ++++++++ 2 files changed, 33 insertions(+) diff --git a/drivers/of/base.c b/drivers/of/base.c index 63897531cd75e..ce8a6e0c9b6a9 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -737,6 +737,31 @@ struct device_node *of_get_next_available_child(const struct device_node *node, } EXPORT_SYMBOL(of_get_next_available_child); +/** + * of_get_compatible_child - Find compatible child node + * @parent: parent node + * @compatible: compatible string + * + * Lookup child node whose compatible property contains the given compatible + * string. + * + * Returns a node pointer with refcount incremented, use of_node_put() on it + * when done; or NULL if not found. + */ +struct device_node *of_get_compatible_child(const struct device_node *parent, + const char *compatible) +{ + struct device_node *child; + + for_each_child_of_node(parent, child) { + if (of_device_is_compatible(child, compatible)) + break; + } + + return child; +} +EXPORT_SYMBOL(of_get_compatible_child); + /** * of_get_child_by_name - Find the child node by name for a given parent * @node: parent node diff --git a/include/linux/of.h b/include/linux/of.h index b240ed69dc962..70b7dacf92387 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -288,6 +288,8 @@ extern struct device_node *of_get_next_child(const struct device_node *node, extern struct device_node *of_get_next_available_child( const struct device_node *node, struct device_node *prev); +extern struct device_node *of_get_compatible_child(const struct device_node *parent, + const char *compatible); extern struct device_node *of_get_child_by_name(const struct device_node *node, const char *name); @@ -625,6 +627,12 @@ static inline bool of_have_populated_dt(void) return false; } +static inline struct device_node *of_get_compatible_child(const struct device_node *parent, + const char *compatible) +{ + return NULL; +} + static inline struct device_node *of_get_child_by_name( const struct device_node *node, const char *name) -- GitLab From 2ae0ac647a773fc3db57770a202b1c62a66fe3db Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 27 Aug 2018 10:21:52 +0200 Subject: [PATCH 1754/2269] NFC: nfcmrvl_uart: fix OF child-node lookup [ Upstream commit 5bf59773aaf36dd62117dc83d50e1bbf9ef432da ] Use the new of_get_compatible_child() helper to lookup the nfc child node instead of using of_find_compatible_node(), which searches the entire tree from a given start node and thus can return an unrelated (i.e. non-child) node. This also addresses a potential use-after-free (e.g. after probe deferral) as the tree-wide helper drops a reference to its first argument (i.e. the parent node). Fixes: e097dc624f78 ("NFC: nfcmrvl: add UART driver") Fixes: d8e018c0b321 ("NFC: nfcmrvl: update device tree bindings for Marvell NFC") Cc: stable # 4.2 Cc: Vincent Cuissard Cc: Samuel Ortiz Signed-off-by: Johan Hovold Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/nfc/nfcmrvl/uart.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/nfc/nfcmrvl/uart.c b/drivers/nfc/nfcmrvl/uart.c index 91162f8e0366c..9a22056e8d9ee 100644 --- a/drivers/nfc/nfcmrvl/uart.c +++ b/drivers/nfc/nfcmrvl/uart.c @@ -73,10 +73,9 @@ static int nfcmrvl_uart_parse_dt(struct device_node *node, struct device_node *matched_node; int ret; - matched_node = of_find_compatible_node(node, NULL, "marvell,nfc-uart"); + matched_node = of_get_compatible_child(node, "marvell,nfc-uart"); if (!matched_node) { - matched_node = of_find_compatible_node(node, NULL, - "mrvl,nfc-uart"); + matched_node = of_get_compatible_child(node, "mrvl,nfc-uart"); if (!matched_node) return -ENODEV; } -- GitLab From 6f95f0734968025278b6d8b4d0c589b7afeb9c96 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 27 Aug 2018 10:21:50 +0200 Subject: [PATCH 1755/2269] net: bcmgenet: fix OF child-node lookup [ Upstream commit d397dbe606120a1ea1b11b0020c3f7a3852da5ac ] Use the new of_get_compatible_child() helper to lookup the mdio child node instead of using of_find_compatible_node(), which searches the entire tree from a given start node and thus can return an unrelated (i.e. non-child) node. This also addresses a potential use-after-free (e.g. after probe deferral) as the tree-wide helper drops a reference to its first argument (i.e. the node of the device being probed). Fixes: aa09677cba42 ("net: bcmgenet: add MDIO routines") Cc: stable # 3.15 Cc: David S. Miller Reviewed-by: Florian Fainelli Signed-off-by: Johan Hovold Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/genet/bcmmii.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index abbd2894f870a..c421e2753c8ce 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -360,7 +360,7 @@ static struct device_node *bcmgenet_mii_of_find_mdio(struct bcmgenet_priv *priv) if (!compat) return NULL; - priv->mdio_dn = of_find_compatible_node(dn, NULL, compat); + priv->mdio_dn = of_get_compatible_child(dn, compat); kfree(compat); if (!priv->mdio_dn) { dev_err(kdev, "unable to find MDIO bus node\n"); -- GitLab From 279eb9ce4cbc20ae82f73fd3b2d05184f8f40d53 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 27 Aug 2018 10:21:46 +0200 Subject: [PATCH 1756/2269] drm/mediatek: fix OF sibling-node lookup [ Upstream commit ceff2f4dcd44abf35864d9a99f85ac619e89a01d ] Use the new of_get_compatible_child() helper to lookup the sibling instead of using of_find_compatible_node(), which searches the entire tree from a given start node and thus can return an unrelated (i.e. non-sibling) node. This also addresses a potential use-after-free (e.g. after probe deferral) as the tree-wide helper drops a reference to its first argument (i.e. the parent device node). While at it, also fix the related cec-node reference leak. Fixes: 8f83f26891e1 ("drm/mediatek: Add HDMI support") Cc: stable # 4.8 Cc: Junzhi Zhao Cc: Philipp Zabel Cc: CK Hu Cc: David Airlie Signed-off-by: Johan Hovold Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 690c67507cbce..aba27ea9cea5c 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1446,8 +1446,7 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi, } /* The CEC module handles HDMI hotplug detection */ - cec_np = of_find_compatible_node(np->parent, NULL, - "mediatek,mt8173-cec"); + cec_np = of_get_compatible_child(np->parent, "mediatek,mt8173-cec"); if (!cec_np) { dev_err(dev, "Failed to find CEC node\n"); return -EINVAL; @@ -1457,8 +1456,10 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi, if (!cec_pdev) { dev_err(hdmi->dev, "Waiting for CEC device %pOF\n", cec_np); + of_node_put(cec_np); return -EPROBE_DEFER; } + of_node_put(cec_np); hdmi->cec_dev = &cec_pdev->dev; /* -- GitLab From 5f2b2c591fc8fd84c70fd591aa13d903e32c2df7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 27 Aug 2018 10:21:53 +0200 Subject: [PATCH 1757/2269] power: supply: twl4030-charger: fix OF sibling-node lookup [ Upstream commit 9844fb2e351311210e6660a9a1c62d17424a6145 ] Use the new of_get_compatible_child() helper to lookup the usb sibling node instead of using of_find_compatible_node(), which searches the entire tree from a given start node and thus can return an unrelated (non-sibling) node. This also addresses a potential use-after-free (e.g. after probe deferral) as the tree-wide helper drops a reference to its first argument (i.e. the parent device node). While at it, also fix the related phy-node reference leak. Fixes: f5e4edb8c888 ("power: twl4030_charger: find associated phy by more reliable means.") Cc: stable # 4.2 Cc: NeilBrown Cc: Felipe Balbi Cc: Sebastian Reichel Reviewed-by: Sebastian Reichel Signed-off-by: Johan Hovold Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/power/supply/twl4030_charger.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/power/supply/twl4030_charger.c b/drivers/power/supply/twl4030_charger.c index a5915f498eea3..0cc12bfe7b020 100644 --- a/drivers/power/supply/twl4030_charger.c +++ b/drivers/power/supply/twl4030_charger.c @@ -996,12 +996,13 @@ static int twl4030_bci_probe(struct platform_device *pdev) if (bci->dev->of_node) { struct device_node *phynode; - phynode = of_find_compatible_node(bci->dev->of_node->parent, - NULL, "ti,twl4030-usb"); + phynode = of_get_compatible_child(bci->dev->of_node->parent, + "ti,twl4030-usb"); if (phynode) { bci->usb_nb.notifier_call = twl4030_bci_usb_ncb; bci->transceiver = devm_usb_get_phy_by_node( bci->dev, phynode, &bci->usb_nb); + of_node_put(phynode); if (IS_ERR(bci->transceiver)) { ret = PTR_ERR(bci->transceiver); if (ret == -EPROBE_DEFER) -- GitLab From 38f94eca31d47db6429edb1ba6a560db1f34d298 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Tue, 27 Nov 2018 11:15:20 -0800 Subject: [PATCH 1758/2269] arm64: remove no-op -p linker flag (commit 1a381d4a0a9a0f999a13faaba22bf6b3fc80dcb9 upstream) Linking the ARM64 defconfig kernel with LLVM lld fails with the error: ld.lld: error: unknown argument: -p Makefile:1015: recipe for target 'vmlinux' failed Without this flag, the ARM64 defconfig kernel successfully links with lld and boots on Dragonboard 410c. After digging through binutils source and changelogs, it turns out that -p is only relevant to ancient binutils installations targeting 32-bit ARM. binutils accepts -p for AArch64 too, but it's always been undocumented and silently ignored. A comment in ld/emultempl/aarch64elf.em explains that it's "Only here for backwards compatibility". Since this flag is a no-op on ARM64, we can safely drop it. Acked-by: Will Deacon Reviewed-by: Nick Desaulniers Signed-off-by: Greg Hackmann Signed-off-by: Catalin Marinas Signed-off-by: Nick Desaulniers Signed-off-by: Sasha Levin --- arch/arm64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 7318165cfc90b..48f2b3657507c 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -10,7 +10,7 @@ # # Copyright (C) 1995-2001 by Russell King -LDFLAGS_vmlinux :=-p --no-undefined -X +LDFLAGS_vmlinux :=--no-undefined -X CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) GZFLAGS :=-9 -- GitLab From cd4f18da5ecaf08fbbb61e74434468b63519ecf3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 May 2018 18:41:36 +0100 Subject: [PATCH 1759/2269] xhci: Allow more than 32 quirks commit 36b6857932f380fcb55c31ac75857e3e81dd583a upstream. We now have 32 different quirks, and the field that holds them is full. Let's bump it up to the next stage so that we can handle some more... The type is now an unsigned long long, which is 64bit on most architectures. We take this opportunity to change the quirks from using (1 << x) to BIT_ULL(x). Tested-by: Domenico Andreoli Signed-off-by: Marc Zyngier Tested-by: Faiz Abbas Tested-by: Domenico Andreoli Acked-by: Mathias Nyman Cc: "Cherian, George" Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 6 ++-- drivers/usb/host/xhci.h | 64 +++++++++++++++++++++-------------------- 2 files changed, 36 insertions(+), 34 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 64ddba3f79a95..faf0486821942 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -43,8 +43,8 @@ static int link_quirk; module_param(link_quirk, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(link_quirk, "Don't clear the chain bit on a link TRB"); -static unsigned int quirks; -module_param(quirks, uint, S_IRUGO); +static unsigned long long quirks; +module_param(quirks, ullong, S_IRUGO); MODULE_PARM_DESC(quirks, "Bit flags for quirks to be enabled as default"); static bool td_on_ring(struct xhci_td *td, struct xhci_ring *ring) @@ -4956,7 +4956,7 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) return retval; xhci_dbg(xhci, "Called HCD init\n"); - xhci_info(xhci, "hcc params 0x%08x hci version 0x%x quirks 0x%08x\n", + xhci_info(xhci, "hcc params 0x%08x hci version 0x%x quirks 0x%016llx\n", xhci->hcc_params, xhci->hci_version, xhci->quirks); return 0; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 29dc6160c9ebe..ef7f171d6e97a 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1794,12 +1794,12 @@ struct xhci_hcd { #define XHCI_STATE_DYING (1 << 0) #define XHCI_STATE_HALTED (1 << 1) #define XHCI_STATE_REMOVING (1 << 2) - unsigned int quirks; -#define XHCI_LINK_TRB_QUIRK (1 << 0) -#define XHCI_RESET_EP_QUIRK (1 << 1) -#define XHCI_NEC_HOST (1 << 2) -#define XHCI_AMD_PLL_FIX (1 << 3) -#define XHCI_SPURIOUS_SUCCESS (1 << 4) + unsigned long long quirks; +#define XHCI_LINK_TRB_QUIRK BIT_ULL(0) +#define XHCI_RESET_EP_QUIRK BIT_ULL(1) +#define XHCI_NEC_HOST BIT_ULL(2) +#define XHCI_AMD_PLL_FIX BIT_ULL(3) +#define XHCI_SPURIOUS_SUCCESS BIT_ULL(4) /* * Certain Intel host controllers have a limit to the number of endpoint * contexts they can handle. Ideally, they would signal that they can't handle @@ -1809,33 +1809,35 @@ struct xhci_hcd { * commands, reset device commands, disable slot commands, and address device * commands. */ -#define XHCI_EP_LIMIT_QUIRK (1 << 5) -#define XHCI_BROKEN_MSI (1 << 6) -#define XHCI_RESET_ON_RESUME (1 << 7) -#define XHCI_SW_BW_CHECKING (1 << 8) -#define XHCI_AMD_0x96_HOST (1 << 9) -#define XHCI_TRUST_TX_LENGTH (1 << 10) -#define XHCI_LPM_SUPPORT (1 << 11) -#define XHCI_INTEL_HOST (1 << 12) -#define XHCI_SPURIOUS_REBOOT (1 << 13) -#define XHCI_COMP_MODE_QUIRK (1 << 14) -#define XHCI_AVOID_BEI (1 << 15) -#define XHCI_PLAT (1 << 16) -#define XHCI_SLOW_SUSPEND (1 << 17) -#define XHCI_SPURIOUS_WAKEUP (1 << 18) +#define XHCI_EP_LIMIT_QUIRK BIT_ULL(5) +#define XHCI_BROKEN_MSI BIT_ULL(6) +#define XHCI_RESET_ON_RESUME BIT_ULL(7) +#define XHCI_SW_BW_CHECKING BIT_ULL(8) +#define XHCI_AMD_0x96_HOST BIT_ULL(9) +#define XHCI_TRUST_TX_LENGTH BIT_ULL(10) +#define XHCI_LPM_SUPPORT BIT_ULL(11) +#define XHCI_INTEL_HOST BIT_ULL(12) +#define XHCI_SPURIOUS_REBOOT BIT_ULL(13) +#define XHCI_COMP_MODE_QUIRK BIT_ULL(14) +#define XHCI_AVOID_BEI BIT_ULL(15) +#define XHCI_PLAT BIT_ULL(16) +#define XHCI_SLOW_SUSPEND BIT_ULL(17) +#define XHCI_SPURIOUS_WAKEUP BIT_ULL(18) /* For controllers with a broken beyond repair streams implementation */ -#define XHCI_BROKEN_STREAMS (1 << 19) -#define XHCI_PME_STUCK_QUIRK (1 << 20) -#define XHCI_MTK_HOST (1 << 21) -#define XHCI_SSIC_PORT_UNUSED (1 << 22) -#define XHCI_NO_64BIT_SUPPORT (1 << 23) -#define XHCI_MISSING_CAS (1 << 24) +#define XHCI_BROKEN_STREAMS BIT_ULL(19) +#define XHCI_PME_STUCK_QUIRK BIT_ULL(20) +#define XHCI_MTK_HOST BIT_ULL(21) +#define XHCI_SSIC_PORT_UNUSED BIT_ULL(22) +#define XHCI_NO_64BIT_SUPPORT BIT_ULL(23) +#define XHCI_MISSING_CAS BIT_ULL(24) /* For controller with a broken Port Disable implementation */ -#define XHCI_BROKEN_PORT_PED (1 << 25) -#define XHCI_LIMIT_ENDPOINT_INTERVAL_7 (1 << 26) -#define XHCI_U2_DISABLE_WAKE (1 << 27) -#define XHCI_ASMEDIA_MODIFY_FLOWCONTROL (1 << 28) -#define XHCI_SUSPEND_DELAY (1 << 30) +#define XHCI_BROKEN_PORT_PED BIT_ULL(25) +#define XHCI_LIMIT_ENDPOINT_INTERVAL_7 BIT_ULL(26) +#define XHCI_U2_DISABLE_WAKE BIT_ULL(27) +#define XHCI_ASMEDIA_MODIFY_FLOWCONTROL BIT_ULL(28) +#define XHCI_HW_LPM_DISABLE BIT_ULL(29) +#define XHCI_SUSPEND_DELAY BIT_ULL(30) +#define XHCI_INTEL_USB_ROLE_SW BIT_ULL(31) unsigned int num_active_eps; unsigned int limit_active_eps; -- GitLab From 649594a678c2aa9bea0693a22753034c35ab42c6 Mon Sep 17 00:00:00 2001 From: "Cherian, George" Date: Fri, 9 Nov 2018 17:21:22 +0200 Subject: [PATCH 1760/2269] xhci: Add quirk to workaround the errata seen on Cavium Thunder-X2 Soc commit 11644a7659529730eaf2f166efaabe7c3dc7af8c upstream. Implement workaround for ThunderX2 Errata-129 (documented in CN99XX Known Issues" available at Cavium support site). As per ThunderX2errata-129, USB 2 device may come up as USB 1 if a connection to a USB 1 device is followed by another connection to a USB 2 device, the link will come up as USB 1 for the USB 2 device. Resolution: Reset the PHY after the USB 1 device is disconnected. The PHY reset sequence is done using private registers in XHCI register space. After the PHY is reset we check for the PLL lock status and retry the operation if it fails. From our tests, retrying 4 times is sufficient. Add a new quirk flag XHCI_RESET_PLL_ON_DISCONNECT to invoke the workaround in handle_xhci_port_status(). Cc: stable@vger.kernel.org Signed-off-by: George Cherian Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 5 +++++ drivers/usb/host/xhci-ring.c | 35 ++++++++++++++++++++++++++++++++++- drivers/usb/host/xhci.h | 1 + 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 9218f506f8e33..4b07b6859b4c7 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -236,6 +236,11 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) if (pdev->vendor == PCI_VENDOR_ID_TI && pdev->device == 0x8241) xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_7; + if ((pdev->vendor == PCI_VENDOR_ID_BROADCOM || + pdev->vendor == PCI_VENDOR_ID_CAVIUM) && + pdev->device == 0x9026) + xhci->quirks |= XHCI_RESET_PLL_ON_DISCONNECT; + if (xhci->quirks & XHCI_RESET_ON_RESUME) xhci_dbg_trace(xhci, trace_xhci_dbg_quirks, "QUIRK: Resetting on resume"); diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 2d64cb024d002..998f61d826896 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1568,6 +1568,35 @@ static void handle_device_notification(struct xhci_hcd *xhci, usb_wakeup_notification(udev->parent, udev->portnum); } +/* + * Quirk hanlder for errata seen on Cavium ThunderX2 processor XHCI + * Controller. + * As per ThunderX2errata-129 USB 2 device may come up as USB 1 + * If a connection to a USB 1 device is followed by another connection + * to a USB 2 device. + * + * Reset the PHY after the USB device is disconnected if device speed + * is less than HCD_USB3. + * Retry the reset sequence max of 4 times checking the PLL lock status. + * + */ +static void xhci_cavium_reset_phy_quirk(struct xhci_hcd *xhci) +{ + struct usb_hcd *hcd = xhci_to_hcd(xhci); + u32 pll_lock_check; + u32 retry_count = 4; + + do { + /* Assert PHY reset */ + writel(0x6F, hcd->regs + 0x1048); + udelay(10); + /* De-assert the PHY reset */ + writel(0x7F, hcd->regs + 0x1048); + udelay(200); + pll_lock_check = readl(hcd->regs + 0x1070); + } while (!(pll_lock_check & 0x1) && --retry_count); +} + static void handle_port_status(struct xhci_hcd *xhci, union xhci_trb *event) { @@ -1725,9 +1754,13 @@ static void handle_port_status(struct xhci_hcd *xhci, goto cleanup; } - if (hcd->speed < HCD_USB3) + if (hcd->speed < HCD_USB3) { xhci_test_and_clear_bit(xhci, port_array, faked_port_index, PORT_PLC); + if ((xhci->quirks & XHCI_RESET_PLL_ON_DISCONNECT) && + (portsc & PORT_CSC) && !(portsc & PORT_CONNECT)) + xhci_cavium_reset_phy_quirk(xhci); + } cleanup: /* Update event ring dequeue pointer before dropping the lock */ diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index ef7f171d6e97a..74ba20556020c 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1838,6 +1838,7 @@ struct xhci_hcd { #define XHCI_HW_LPM_DISABLE BIT_ULL(29) #define XHCI_SUSPEND_DELAY BIT_ULL(30) #define XHCI_INTEL_USB_ROLE_SW BIT_ULL(31) +#define XHCI_RESET_PLL_ON_DISCONNECT BIT_ULL(34) unsigned int num_active_eps; unsigned int limit_active_eps; -- GitLab From 0aa6b111cc44f9aef0be1715e2d8ac8c2e9023cb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 27 Aug 2018 10:21:49 +0200 Subject: [PATCH 1761/2269] mtd: rawnand: atmel: fix OF child-node lookup commit 5d1e9c2212ea6b4dd735e4fc3dd6279a365d5d10 upstream. Use the new of_get_compatible_child() helper to lookup the nfc child node instead of using of_find_compatible_node(), which searches the entire tree from a given start node and thus can return an unrelated (i.e. non-child) node. This also addresses a potential use-after-free (e.g. after probe deferral) as the tree-wide helper drops a reference to its first argument (i.e. the node of the device being probed). While at it, also fix a related nfc-node reference leak. Fixes: f88fc122cc34 ("mtd: nand: Cleanup/rework the atmel_nand driver") Cc: stable # 4.11 Cc: Nicolas Ferre Cc: Josh Wu Cc: Boris Brezillon Signed-off-by: Johan Hovold Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/atmel/nand-controller.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/nand/atmel/nand-controller.c b/drivers/mtd/nand/atmel/nand-controller.c index 32a2f947a4541..0b93f152d9937 100644 --- a/drivers/mtd/nand/atmel/nand-controller.c +++ b/drivers/mtd/nand/atmel/nand-controller.c @@ -2077,8 +2077,7 @@ atmel_hsmc_nand_controller_legacy_init(struct atmel_hsmc_nand_controller *nc) int ret; nand_np = dev->of_node; - nfc_np = of_find_compatible_node(dev->of_node, NULL, - "atmel,sama5d3-nfc"); + nfc_np = of_get_compatible_child(dev->of_node, "atmel,sama5d3-nfc"); if (!nfc_np) { dev_err(dev, "Could not find device node for sama5d3-nfc\n"); return -ENODEV; @@ -2492,15 +2491,19 @@ static int atmel_nand_controller_probe(struct platform_device *pdev) } if (caps->legacy_of_bindings) { + struct device_node *nfc_node; u32 ale_offs = 21; /* * If we are parsing legacy DT props and the DT contains a * valid NFC node, forward the request to the sama5 logic. */ - if (of_find_compatible_node(pdev->dev.of_node, NULL, - "atmel,sama5d3-nfc")) + nfc_node = of_get_compatible_child(pdev->dev.of_node, + "atmel,sama5d3-nfc"); + if (nfc_node) { caps = &atmel_sama5_nand_caps; + of_node_put(nfc_node); + } /* * Even if the compatible says we are dealing with an -- GitLab From da141471e77ba27b9c50f858ff866b4958d9e04c Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 28 May 2018 22:04:33 +0200 Subject: [PATCH 1762/2269] ubi: fastmap: Check each mapping only once commit 34653fd8c46e771585fce5975e4243f8fd401914 upstream. Maintain a bitmap to keep track of which LEB->PEB mapping was checked already. That way we have to read back VID headers only once. Fixes: a23cf10d9abb ("ubi: fastmap: Correctly handle interrupted erasures in EBA") Signed-off-by: Richard Weinberger Signed-off-by: Martin Kepplinger --- drivers/mtd/ubi/build.c | 1 + drivers/mtd/ubi/eba.c | 4 ++++ drivers/mtd/ubi/fastmap.c | 20 ++++++++++++++++++++ drivers/mtd/ubi/ubi.h | 11 +++++++++++ drivers/mtd/ubi/vmt.c | 1 + drivers/mtd/ubi/vtbl.c | 16 +++++++++++++++- 6 files changed, 52 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 18a72da759a0f..6445c693d9359 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -526,6 +526,7 @@ void ubi_free_internal_volumes(struct ubi_device *ubi) for (i = ubi->vtbl_slots; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { ubi_eba_replace_table(ubi->volumes[i], NULL); + ubi_fastmap_destroy_checkmap(ubi->volumes[i]); kfree(ubi->volumes[i]); } } diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c index d0884bd9d9553..c4d4b8f076302 100644 --- a/drivers/mtd/ubi/eba.c +++ b/drivers/mtd/ubi/eba.c @@ -517,6 +517,9 @@ static int check_mapping(struct ubi_device *ubi, struct ubi_volume *vol, int lnu if (!ubi->fast_attach) return 0; + if (!vol->checkmap || test_bit(lnum, vol->checkmap)) + return 0; + vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS); if (!vidb) return -ENOMEM; @@ -551,6 +554,7 @@ static int check_mapping(struct ubi_device *ubi, struct ubi_volume *vol, int lnu goto out_free; } + set_bit(lnum, vol->checkmap); err = 0; out_free: diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 5a832bc79b1be..63e8527f7b65d 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -1101,6 +1101,26 @@ free_fm_sb: goto out; } +int ubi_fastmap_init_checkmap(struct ubi_volume *vol, int leb_count) +{ + struct ubi_device *ubi = vol->ubi; + + if (!ubi->fast_attach) + return 0; + + vol->checkmap = kcalloc(BITS_TO_LONGS(leb_count), sizeof(unsigned long), + GFP_KERNEL); + if (!vol->checkmap) + return -ENOMEM; + + return 0; +} + +void ubi_fastmap_destroy_checkmap(struct ubi_volume *vol) +{ + kfree(vol->checkmap); +} + /** * ubi_write_fastmap - writes a fastmap. * @ubi: UBI device object diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index 5fe62653995ea..f5ba97c46160a 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -334,6 +334,9 @@ struct ubi_eba_leb_desc { * @changing_leb: %1 if the atomic LEB change ioctl command is in progress * @direct_writes: %1 if direct writes are enabled for this volume * + * @checkmap: bitmap to remember which PEB->LEB mappings got checked, + * protected by UBI LEB lock tree. + * * The @corrupted field indicates that the volume's contents is corrupted. * Since UBI protects only static volumes, this field is not relevant to * dynamic volumes - it is user's responsibility to assure their data @@ -377,6 +380,10 @@ struct ubi_volume { unsigned int updating:1; unsigned int changing_leb:1; unsigned int direct_writes:1; + +#ifdef CONFIG_MTD_UBI_FASTMAP + unsigned long *checkmap; +#endif }; /** @@ -965,8 +972,12 @@ size_t ubi_calc_fm_size(struct ubi_device *ubi); int ubi_update_fastmap(struct ubi_device *ubi); int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai, struct ubi_attach_info *scan_ai); +int ubi_fastmap_init_checkmap(struct ubi_volume *vol, int leb_count); +void ubi_fastmap_destroy_checkmap(struct ubi_volume *vol); #else static inline int ubi_update_fastmap(struct ubi_device *ubi) { return 0; } +int static inline ubi_fastmap_init_checkmap(struct ubi_volume *vol, int leb_count) { return 0; } +static inline void ubi_fastmap_destroy_checkmap(struct ubi_volume *vol) {} #endif /* block.c */ diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c index 3fd8d7ff7a02f..0be516780e929 100644 --- a/drivers/mtd/ubi/vmt.c +++ b/drivers/mtd/ubi/vmt.c @@ -139,6 +139,7 @@ static void vol_release(struct device *dev) struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev); ubi_eba_replace_table(vol, NULL); + ubi_fastmap_destroy_checkmap(vol); kfree(vol); } diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index 263743e7b7413..94d7a865b135a 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -534,7 +534,7 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_attach_info *ai, const struct ubi_vtbl_record *vtbl) { - int i, reserved_pebs = 0; + int i, err, reserved_pebs = 0; struct ubi_ainf_volume *av; struct ubi_volume *vol; @@ -620,6 +620,16 @@ static int init_volumes(struct ubi_device *ubi, (long long)(vol->used_ebs - 1) * vol->usable_leb_size; vol->used_bytes += av->last_data_size; vol->last_eb_bytes = av->last_data_size; + + /* + * We use ubi->peb_count and not vol->reserved_pebs because + * we want to keep the code simple. Otherwise we'd have to + * resize/check the bitmap upon volume resize too. + * Allocating a few bytes more does not hurt. + */ + err = ubi_fastmap_init_checkmap(vol, ubi->peb_count); + if (err) + return err; } /* And add the layout volume */ @@ -645,6 +655,9 @@ static int init_volumes(struct ubi_device *ubi, reserved_pebs += vol->reserved_pebs; ubi->vol_count += 1; vol->ubi = ubi; + err = ubi_fastmap_init_checkmap(vol, UBI_LAYOUT_VOLUME_EBS); + if (err) + return err; if (reserved_pebs > ubi->avail_pebs) { ubi_err(ubi, "not enough PEBs, required %d, available %d", @@ -849,6 +862,7 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_attach_info *ai) out_free: vfree(ubi->vtbl); for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { + ubi_fastmap_destroy_checkmap(ubi->volumes[i]); kfree(ubi->volumes[i]); ubi->volumes[i] = NULL; } -- GitLab From f299ecb085117412b313094d075e573994c704a0 Mon Sep 17 00:00:00 2001 From: Francis Therien Date: Mon, 26 Mar 2018 15:59:00 -0700 Subject: [PATCH 1763/2269] Input: xpad - add PDP device id 0x02a4 [ Upstream commit c6c848572f4da0e34ffe0a35364b4db871e13e42 ] Adds support for a PDP Xbox One controller with device ID (0x06ef:0x02a4). The Product string for this device is "PDP Wired Controller for Xbox One - Stealth Series | Phantom Black". Signed-off-by: Francis Therien Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/joystick/xpad.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 53f775c41cd1b..d2a1857fdae51 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -231,6 +231,7 @@ static const struct xpad_device { { 0x0e6f, 0x021f, "Rock Candy Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0246, "Rock Candy Gamepad for Xbox One 2015", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02ab, "PDP Controller for Xbox One", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x02a4, "PDP Wired Controller for Xbox One - Stealth Series", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0301, "Logic3 Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0346, "Rock Candy Gamepad for Xbox One 2016", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0401, "Logic3 Controller", 0, XTYPE_XBOX360 }, @@ -480,7 +481,8 @@ static const u8 xboxone_hori_init[] = { /* * This packet is required for some of the PDP pads to start - * sending input reports. One of those pads is (0x0e6f:0x02ab). + * sending input reports. These pads include: (0x0e6f:0x02ab), + * (0x0e6f:0x02a4). */ static const u8 xboxone_pdp_init1[] = { 0x0a, 0x20, 0x00, 0x03, 0x00, 0x01, 0x14 @@ -488,7 +490,8 @@ static const u8 xboxone_pdp_init1[] = { /* * This packet is required for some of the PDP pads to start - * sending input reports. One of those pads is (0x0e6f:0x02ab). + * sending input reports. These pads include: (0x0e6f:0x02ab), + * (0x0e6f:0x02a4). */ static const u8 xboxone_pdp_init2[] = { 0x06, 0x20, 0x00, 0x02, 0x01, 0x00 @@ -526,6 +529,8 @@ static const struct xboxone_init_packet xboxone_init_packets[] = { XBOXONE_INIT_PKT(0x0000, 0x0000, xboxone_fw2015_init), XBOXONE_INIT_PKT(0x0e6f, 0x02ab, xboxone_pdp_init1), XBOXONE_INIT_PKT(0x0e6f, 0x02ab, xboxone_pdp_init2), + XBOXONE_INIT_PKT(0x0e6f, 0x02a4, xboxone_pdp_init1), + XBOXONE_INIT_PKT(0x0e6f, 0x02a4, xboxone_pdp_init2), XBOXONE_INIT_PKT(0x24c6, 0x541a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x542a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x543a, xboxone_rumblebegin_init), -- GitLab From 5d3bf6348692bb664da58a07510cd21c1439394e Mon Sep 17 00:00:00 2001 From: Leo Sperling Date: Wed, 3 Aug 2016 17:31:09 -0700 Subject: [PATCH 1764/2269] Input: xpad - fix some coding style issues [ Upstream commit 68c78d0155e37992268664e134996d2b140ddf38 ] Fix some coding style issues reported by checkpatch.pl. Mostly brackets in macros, spacing and comment style. Signed-off-by: Leo Sperling Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/joystick/xpad.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index d2a1857fdae51..7e812a8877bce 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -89,8 +89,10 @@ #define XPAD_PKT_LEN 64 -/* xbox d-pads should map to buttons, as is required for DDR pads - but we map them to axes when possible to simplify things */ +/* + * xbox d-pads should map to buttons, as is required for DDR pads + * but we map them to axes when possible to simplify things + */ #define MAP_DPAD_TO_BUTTONS (1 << 0) #define MAP_TRIGGERS_TO_BUTTONS (1 << 1) #define MAP_STICKS_TO_NULL (1 << 2) @@ -391,15 +393,15 @@ static const signed short xpad_abs_triggers[] = { * match against vendor id as well. Wired Xbox 360 devices have protocol 1, * wireless controllers have protocol 129. */ -#define XPAD_XBOX360_VENDOR_PROTOCOL(vend,pr) \ +#define XPAD_XBOX360_VENDOR_PROTOCOL(vend, pr) \ .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_INFO, \ .idVendor = (vend), \ .bInterfaceClass = USB_CLASS_VENDOR_SPEC, \ .bInterfaceSubClass = 93, \ .bInterfaceProtocol = (pr) #define XPAD_XBOX360_VENDOR(vend) \ - { XPAD_XBOX360_VENDOR_PROTOCOL(vend,1) }, \ - { XPAD_XBOX360_VENDOR_PROTOCOL(vend,129) } + { XPAD_XBOX360_VENDOR_PROTOCOL((vend), 1) }, \ + { XPAD_XBOX360_VENDOR_PROTOCOL((vend), 129) } /* The Xbox One controller uses subclass 71 and protocol 208. */ #define XPAD_XBOXONE_VENDOR_PROTOCOL(vend, pr) \ @@ -409,7 +411,7 @@ static const signed short xpad_abs_triggers[] = { .bInterfaceSubClass = 71, \ .bInterfaceProtocol = (pr) #define XPAD_XBOXONE_VENDOR(vend) \ - { XPAD_XBOXONE_VENDOR_PROTOCOL(vend, 208) } + { XPAD_XBOXONE_VENDOR_PROTOCOL((vend), 208) } static const struct usb_device_id xpad_table[] = { { USB_INTERFACE_INFO('X', 'B', 0) }, /* X-Box USB-IF not approved class */ @@ -1578,6 +1580,7 @@ static void xpad_close(struct input_dev *dev) static void xpad_set_up_abs(struct input_dev *input_dev, signed short abs) { struct usb_xpad *xpad = input_get_drvdata(input_dev); + set_bit(abs, input_dev->absbit); switch (abs) { -- GitLab From 8d6a81009de5a184da9da76de27961d4abd62092 Mon Sep 17 00:00:00 2001 From: Marcus Folkesson Date: Tue, 8 May 2018 15:17:07 -0700 Subject: [PATCH 1765/2269] Input: xpad - avoid using __set_bit() for capabilities [ Upstream commit a01308031c2647ed5f1c845104b73a8820a958a9 ] input_set_capability() and input_set_abs_param() will do it for you. Signed-off-by: Marcus Folkesson Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/joystick/xpad.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 7e812a8877bce..69b44aebaf011 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -1581,8 +1581,6 @@ static void xpad_set_up_abs(struct input_dev *input_dev, signed short abs) { struct usb_xpad *xpad = input_get_drvdata(input_dev); - set_bit(abs, input_dev->absbit); - switch (abs) { case ABS_X: case ABS_Y: @@ -1601,6 +1599,9 @@ static void xpad_set_up_abs(struct input_dev *input_dev, signed short abs) case ABS_HAT0Y: /* the d-pad (only if dpad is mapped to axes */ input_set_abs_params(input_dev, abs, -1, 1, 0, 0); break; + default: + input_set_abs_params(input_dev, abs, 0, 0, 0, 0); + break; } } @@ -1641,10 +1642,7 @@ static int xpad_init_input(struct usb_xpad *xpad) input_dev->close = xpad_close; } - __set_bit(EV_KEY, input_dev->evbit); - if (!(xpad->mapping & MAP_STICKS_TO_NULL)) { - __set_bit(EV_ABS, input_dev->evbit); /* set up axes */ for (i = 0; xpad_abs[i] >= 0; i++) xpad_set_up_abs(input_dev, xpad_abs[i]); @@ -1652,21 +1650,22 @@ static int xpad_init_input(struct usb_xpad *xpad) /* set up standard buttons */ for (i = 0; xpad_common_btn[i] >= 0; i++) - __set_bit(xpad_common_btn[i], input_dev->keybit); + input_set_capability(input_dev, EV_KEY, xpad_common_btn[i]); /* set up model-specific ones */ if (xpad->xtype == XTYPE_XBOX360 || xpad->xtype == XTYPE_XBOX360W || xpad->xtype == XTYPE_XBOXONE) { for (i = 0; xpad360_btn[i] >= 0; i++) - __set_bit(xpad360_btn[i], input_dev->keybit); + input_set_capability(input_dev, EV_KEY, xpad360_btn[i]); } else { for (i = 0; xpad_btn[i] >= 0; i++) - __set_bit(xpad_btn[i], input_dev->keybit); + input_set_capability(input_dev, EV_KEY, xpad_btn[i]); } if (xpad->mapping & MAP_DPAD_TO_BUTTONS) { for (i = 0; xpad_btn_pad[i] >= 0; i++) - __set_bit(xpad_btn_pad[i], input_dev->keybit); + input_set_capability(input_dev, EV_KEY, + xpad_btn_pad[i]); } /* @@ -1683,7 +1682,8 @@ static int xpad_init_input(struct usb_xpad *xpad) if (xpad->mapping & MAP_TRIGGERS_TO_BUTTONS) { for (i = 0; xpad_btn_triggers[i] >= 0; i++) - __set_bit(xpad_btn_triggers[i], input_dev->keybit); + input_set_capability(input_dev, EV_KEY, + xpad_btn_triggers[i]); } else { for (i = 0; xpad_abs_triggers[i] >= 0; i++) xpad_set_up_abs(input_dev, xpad_abs_triggers[i]); -- GitLab From eb4b4647003688e3b3a4e20fef6ca33a029a1e58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ramses=20Ram=C3=ADrez?= Date: Fri, 28 Sep 2018 16:59:26 -0700 Subject: [PATCH 1766/2269] Input: xpad - add support for Xbox1 PDP Camo series gamepad MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9735082a7cbae572c2eabdc45acecc8c9fa0759b ] The "Xbox One PDP Wired Controller - Camo series" has a different product-id than the regular PDP controller and the PDP stealth series, but it uses the same initialization sequence. This patch adds the product-id of the camo series to the structures that handle the other PDP Xbox One controllers. Signed-off-by: Ramses Ramírez Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/joystick/xpad.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 69b44aebaf011..2e52015634f98 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -234,6 +234,7 @@ static const struct xpad_device { { 0x0e6f, 0x0246, "Rock Candy Gamepad for Xbox One 2015", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02ab, "PDP Controller for Xbox One", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a4, "PDP Wired Controller for Xbox One - Stealth Series", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x02a6, "PDP Wired Controller for Xbox One - Camo Series", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0301, "Logic3 Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0346, "Rock Candy Gamepad for Xbox One 2016", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0401, "Logic3 Controller", 0, XTYPE_XBOX360 }, @@ -533,6 +534,8 @@ static const struct xboxone_init_packet xboxone_init_packets[] = { XBOXONE_INIT_PKT(0x0e6f, 0x02ab, xboxone_pdp_init2), XBOXONE_INIT_PKT(0x0e6f, 0x02a4, xboxone_pdp_init1), XBOXONE_INIT_PKT(0x0e6f, 0x02a4, xboxone_pdp_init2), + XBOXONE_INIT_PKT(0x0e6f, 0x02a6, xboxone_pdp_init1), + XBOXONE_INIT_PKT(0x0e6f, 0x02a6, xboxone_pdp_init2), XBOXONE_INIT_PKT(0x24c6, 0x541a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x542a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x543a, xboxone_rumblebegin_init), -- GitLab From 1fe2316fc23aa6a23721bf97a4cdd8a88e8213f4 Mon Sep 17 00:00:00 2001 From: Matt Chen Date: Fri, 3 Aug 2018 14:29:20 +0800 Subject: [PATCH 1767/2269] iwlwifi: fix wrong WGDS_WIFI_DATA_SIZE commit 66e839030fd698586734e017fd55c4f2a89dba0b upstream. From coreboot/BIOS: Name ("WGDS", Package() { Revision, Package() { DomainType, // 0x7:WiFi ==> We miss this one. WgdsWiFiSarDeltaGroup1PowerMax1, // Group 1 FCC 2400 Max WgdsWiFiSarDeltaGroup1PowerChainA1, // Group 1 FCC 2400 A Offset WgdsWiFiSarDeltaGroup1PowerChainB1, // Group 1 FCC 2400 B Offset WgdsWiFiSarDeltaGroup1PowerMax2, // Group 1 FCC 5200 Max WgdsWiFiSarDeltaGroup1PowerChainA2, // Group 1 FCC 5200 A Offset WgdsWiFiSarDeltaGroup1PowerChainB2, // Group 1 FCC 5200 B Offset WgdsWiFiSarDeltaGroup2PowerMax1, // Group 2 EC Jap 2400 Max WgdsWiFiSarDeltaGroup2PowerChainA1, // Group 2 EC Jap 2400 A Offset WgdsWiFiSarDeltaGroup2PowerChainB1, // Group 2 EC Jap 2400 B Offset WgdsWiFiSarDeltaGroup2PowerMax2, // Group 2 EC Jap 5200 Max WgdsWiFiSarDeltaGroup2PowerChainA2, // Group 2 EC Jap 5200 A Offset WgdsWiFiSarDeltaGroup2PowerChainB2, // Group 2 EC Jap 5200 B Offset WgdsWiFiSarDeltaGroup3PowerMax1, // Group 3 ROW 2400 Max WgdsWiFiSarDeltaGroup3PowerChainA1, // Group 3 ROW 2400 A Offset WgdsWiFiSarDeltaGroup3PowerChainB1, // Group 3 ROW 2400 B Offset WgdsWiFiSarDeltaGroup3PowerMax2, // Group 3 ROW 5200 Max WgdsWiFiSarDeltaGroup3PowerChainA2, // Group 3 ROW 5200 A Offset WgdsWiFiSarDeltaGroup3PowerChainB2, // Group 3 ROW 5200 B Offset } }) When read the ACPI data to find out the WGDS, the DATA_SIZE is never matched. From the above format, it gives 19 numbers, but our driver is hardcode as 18. Fix it to pass then can parse the data into our wgds table. Then we will see: iwlwifi 0000:01:00.0: U iwl_mvm_sar_geo_init Sending GEO_TX_POWER_LIMIT iwlwifi 0000:01:00.0: U iwl_mvm_sar_geo_init SAR geographic profile[0] Band[0]: chain A = 68 chain B = 69 max_tx_power = 54 iwlwifi 0000:01:00.0: U iwl_mvm_sar_geo_init SAR geographic profile[0] Band[1]: chain A = 48 chain B = 49 max_tx_power = 70 iwlwifi 0000:01:00.0: U iwl_mvm_sar_geo_init SAR geographic profile[1] Band[0]: chain A = 51 chain B = 67 max_tx_power = 50 iwlwifi 0000:01:00.0: U iwl_mvm_sar_geo_init SAR geographic profile[1] Band[1]: chain A = 69 chain B = 70 max_tx_power = 68 iwlwifi 0000:01:00.0: U iwl_mvm_sar_geo_init SAR geographic profile[2] Band[0]: chain A = 49 chain B = 50 max_tx_power = 48 iwlwifi 0000:01:00.0: U iwl_mvm_sar_geo_init SAR geographic profile[2] Band[1]: chain A = 52 chain B = 53 max_tx_power = 51 Cc: stable@vger.kernel.org # 4.12+ Fixes: a6bff3cb19b7 ("iwlwifi: mvm: add GEO_TX_POWER_LIMIT cmd for geographic tx power table") Signed-off-by: Matt Chen Signed-off-by: Luca Coelho Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 11319675f2d9e..cebf0ce76d27e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -590,7 +590,7 @@ static int iwl_mvm_config_ltr(struct iwl_mvm *mvm) #define ACPI_WRDS_WIFI_DATA_SIZE (IWL_MVM_SAR_TABLE_SIZE + 2) #define ACPI_EWRD_WIFI_DATA_SIZE ((IWL_MVM_SAR_PROFILE_NUM - 1) * \ IWL_MVM_SAR_TABLE_SIZE + 3) -#define ACPI_WGDS_WIFI_DATA_SIZE 18 +#define ACPI_WGDS_WIFI_DATA_SIZE 19 #define ACPI_WGDS_NUM_BANDS 2 #define ACPI_WGDS_TABLE_SIZE 3 @@ -964,7 +964,7 @@ static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm) IWL_DEBUG_RADIO(mvm, "Sending GEO_TX_POWER_LIMIT\n"); BUILD_BUG_ON(IWL_NUM_GEO_PROFILES * ACPI_WGDS_NUM_BANDS * - ACPI_WGDS_TABLE_SIZE != ACPI_WGDS_WIFI_DATA_SIZE); + ACPI_WGDS_TABLE_SIZE + 1 != ACPI_WGDS_WIFI_DATA_SIZE); for (i = 0; i < IWL_NUM_GEO_PROFILES; i++) { struct iwl_per_chain_offset *chain = -- GitLab From 9d844b0e66929e3c3099fe428fe06543bc18b99a Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 17 Sep 2018 19:31:57 -0700 Subject: [PATCH 1768/2269] kbuild: allow to use GCC toolchain not in Clang search path commit ef8c4ed9db80261f397f0c0bf723684601ae3b52 upstream. When using a GCC cross toolchain which is not in a compiled in Clang search path, Clang reverts to the system assembler and linker. This leads to assembler or linker errors, depending on which tool is first used for a given architecture. It seems that Clang is not searching $PATH for a matching assembler or linker. Make sure that Clang picks up the correct assembler or linker by passing the cross compilers bin directory as search path. This allows to use Clang provided by distributions with GCC toolchains not in /usr/bin. Link: https://github.com/ClangBuiltLinux/linux/issues/78 Signed-off-by: Stefan Agner Reviewed-and-tested-by: Nick Desaulniers Signed-off-by: Masahiro Yamada [nc: Adjust context] Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- Makefile | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 874d72a3e6a71..cb131e135c425 100644 --- a/Makefile +++ b/Makefile @@ -480,13 +480,15 @@ endif ifeq ($(cc-name),clang) ifneq ($(CROSS_COMPILE),) CLANG_TARGET := --target=$(notdir $(CROSS_COMPILE:%-=%)) -GCC_TOOLCHAIN := $(realpath $(dir $(shell which $(LD)))/..) +GCC_TOOLCHAIN_DIR := $(dir $(shell which $(LD))) +CLANG_PREFIX := --prefix=$(GCC_TOOLCHAIN_DIR) +GCC_TOOLCHAIN := $(realpath $(GCC_TOOLCHAIN_DIR)/..) endif ifneq ($(GCC_TOOLCHAIN),) CLANG_GCC_TC := --gcc-toolchain=$(GCC_TOOLCHAIN) endif -KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) -KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) +KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) $(CLANG_PREFIX) +KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) $(CLANG_PREFIX) KBUILD_CFLAGS += $(call cc-option, -no-integrated-as) KBUILD_AFLAGS += $(call cc-option, -no-integrated-as) endif -- GitLab From 7aaa554d09606185650cd1aa422887736b9e1056 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 14 Dec 2017 14:01:45 +0100 Subject: [PATCH 1769/2269] PCI: endpoint: Populate func_no before calling pci_epc_add_epf() commit 0c47cd7a9b6c9c36c08113e594e9ad017fb17865 upstream. func_no is a member of struct pci_epf. Since struct pci_epf is used as an argument to pci_epc_add_epf() (to bind an endpoint function to a controller), struct pci_epf.func_no should be populated before calling pci_epc_add_epf(). Initialize the struct pci_epf.func_no member before calling pci_epc_add_epf(), to fix the endpoint function binding to an endpoint controller. Fixes: d74679911610 ("PCI: endpoint: Introduce configfs entry for configuring EP functions") Signed-off-by: Niklas Cassel [lorenzo.pieralisi@arm.com: rewrote the commit log] Signed-off-by: Lorenzo Pieralisi Suggested-by: Kishon Vijay Abraham I Acked-by: Kishon Vijay Abraham I Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/pci/endpoint/pci-ep-cfs.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/pci/endpoint/pci-ep-cfs.c b/drivers/pci/endpoint/pci-ep-cfs.c index 16cec66b1d0bf..8fdb9d07c50a9 100644 --- a/drivers/pci/endpoint/pci-ep-cfs.c +++ b/drivers/pci/endpoint/pci-ep-cfs.c @@ -97,16 +97,10 @@ static int pci_epc_epf_link(struct config_item *epc_item, { int ret; u32 func_no = 0; - struct pci_epc *epc; - struct pci_epf *epf; struct pci_epf_group *epf_group = to_pci_epf_group(epf_item); struct pci_epc_group *epc_group = to_pci_epc_group(epc_item); - - epc = epc_group->epc; - epf = epf_group->epf; - ret = pci_epc_add_epf(epc, epf); - if (ret) - goto err_add_epf; + struct pci_epc *epc = epc_group->epc; + struct pci_epf *epf = epf_group->epf; func_no = find_first_zero_bit(&epc_group->function_num_map, BITS_PER_LONG); @@ -116,6 +110,10 @@ static int pci_epc_epf_link(struct config_item *epc_item, set_bit(func_no, &epc_group->function_num_map); epf->func_no = func_no; + ret = pci_epc_add_epf(epc, epf); + if (ret) + goto err_add_epf; + ret = pci_epf_bind(epf); if (ret) goto err_epf_bind; -- GitLab From 9d9cd2bcfc5298109bd90c52f239011337147812 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Wed, 13 Dec 2017 18:12:10 +0200 Subject: [PATCH 1770/2269] net/mlx4_core: Fix wrong calculation of free counters commit 0bb9fc4f5429ac970181c073aa32e521e20f7b73 upstream. The field res_free indicates the total number of counters which are available for allocation (reserved and unreserved). Fixed a bug where the reserved counters were subtracted from res_free before any allocation was performed. Before this fix, free counters which were not reserved could not be allocated. Fixes: 9de92c60beaa ("net/mlx4_core: Adjust counter grant policy in the resource tracker") Signed-off-by: Eran Ben Elisha Reviewed-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index b26da0952a4dd..a5381b0917104 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -611,7 +611,6 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) MLX4_MAX_PORTS; else res_alloc->guaranteed[t] = 0; - res_alloc->res_free -= res_alloc->guaranteed[t]; break; default: break; -- GitLab From 31a673709a853cf28ca4a1bb88e15bf4c0bf099f Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Mon, 7 May 2018 12:52:17 -0500 Subject: [PATCH 1771/2269] i40iw: Fix memory leak in error path of create QP commit 5a7189d529cd146cd5838af97b32fcac4122b471 upstream. If i40iw_allocate_dma_mem fails when creating a QP, the memory allocated for the QP structure using kzalloc is not freed because iwqp->allocated_buffer is used to free the memory and it is not setup until later. Fix this by setting iwqp->allocated_buffer before allocating the dma memory. Fixes: d37498417947 ("i40iw: add files for iwarp interface") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 39398dd074d66..c1021b4afb41f 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -631,6 +631,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, return ERR_PTR(-ENOMEM); iwqp = (struct i40iw_qp *)mem; + iwqp->allocated_buffer = mem; qp = &iwqp->sc_qp; qp->back_qp = (void *)iwqp; qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX; @@ -659,7 +660,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, goto error; } - iwqp->allocated_buffer = mem; iwqp->iwdev = iwdev; iwqp->iwpd = iwpd; iwqp->ibqp.qp_num = qp_num; -- GitLab From 7eebc934f302922de7c7399db4752956c19f7147 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 13 Oct 2017 00:06:44 +0200 Subject: [PATCH 1772/2269] rtc: omap: fix error path when pinctrl_register fails commit 26e480f7bb7840fc0daa9c3af7c4501b2cf5902f upstream. If pinctrl_register() fails probe will return with an error without locking the RTC and disabling pm_runtime. Set ret and jump to err instead. Fixes: 97ea1906b3c2 ("rtc: omap: Support ext_wakeup configuration") Signed-off-by: Alexandre Belloni Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-omap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index ac6e6a6a194c7..ae6506a8b4f5c 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -823,7 +823,8 @@ static int omap_rtc_probe(struct platform_device *pdev) rtc->pctldev = pinctrl_register(&rtc_pinctrl_desc, &pdev->dev, rtc); if (IS_ERR(rtc->pctldev)) { dev_err(&pdev->dev, "Couldn't register pinctrl driver\n"); - return PTR_ERR(rtc->pctldev); + ret = PTR_ERR(rtc->pctldev); + goto err; } if (rtc->is_pmic_controller) { -- GitLab From dc32281ec0281ffe2ea57d3157eaae6eb74aef8a Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 13 Mar 2018 11:46:12 +0100 Subject: [PATCH 1773/2269] clk: samsung: exynos5250: Add missing clocks for FIMC LITE SYSMMU devices commit 5b23fceec1ff94305c5d1accde018cae27448005 upstream. FIMC LITE SYSMMU devices are defined in exynos5250.dtsi, but clocks for them are not instantiated by Exynos5250 clock provider driver. Add needed definitions for those clocks to fix IOMMU probe failure: ERROR: could not get clock /soc/sysmmu@13c40000:sysmmu(0) exynos-sysmmu 13c40000.sysmmu: Failed to get device clock(s)! exynos-sysmmu: probe of 13c40000.sysmmu failed with error -38 ERROR: could not get clock /soc/sysmmu@13c50000:sysmmu(0) exynos-sysmmu 13c50000.sysmmu: Failed to get device clock(s)! exynos-sysmmu: probe of 13c50000.sysmmu failed with error -38 Signed-off-by: Marek Szyprowski Fixes: bfed1074f213 ("clk: exynos5250: Add missing sysmmu clocks for DISP and ISP blocks") Acked-by: Chanwoo Choi Signed-off-by: Sylwester Nawrocki Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/clk/samsung/clk-exynos5250.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/clk/samsung/clk-exynos5250.c b/drivers/clk/samsung/clk-exynos5250.c index 6a0cb8a515e89..b609219c802b6 100644 --- a/drivers/clk/samsung/clk-exynos5250.c +++ b/drivers/clk/samsung/clk-exynos5250.c @@ -560,6 +560,8 @@ static const struct samsung_gate_clock exynos5250_gate_clks[] __initconst = { 0), GATE(CLK_GSCL3, "gscl3", "mout_aclk266_gscl_sub", GATE_IP_GSCL, 3, 0, 0), + GATE(CLK_CAMIF_TOP, "camif_top", "mout_aclk266_gscl_sub", + GATE_IP_GSCL, 4, 0, 0), GATE(CLK_GSCL_WA, "gscl_wa", "div_gscl_wa", GATE_IP_GSCL, 5, 0, 0), GATE(CLK_GSCL_WB, "gscl_wb", "div_gscl_wb", GATE_IP_GSCL, 6, 0, 0), GATE(CLK_SMMU_GSCL0, "smmu_gscl0", "mout_aclk266_gscl_sub", @@ -570,6 +572,10 @@ static const struct samsung_gate_clock exynos5250_gate_clks[] __initconst = { GATE_IP_GSCL, 9, 0, 0), GATE(CLK_SMMU_GSCL3, "smmu_gscl3", "mout_aclk266_gscl_sub", GATE_IP_GSCL, 10, 0, 0), + GATE(CLK_SMMU_FIMC_LITE0, "smmu_fimc_lite0", "mout_aclk266_gscl_sub", + GATE_IP_GSCL, 11, 0, 0), + GATE(CLK_SMMU_FIMC_LITE1, "smmu_fimc_lite1", "mout_aclk266_gscl_sub", + GATE_IP_GSCL, 12, 0, 0), GATE(CLK_FIMD1, "fimd1", "mout_aclk200_disp1_sub", GATE_IP_DISP1, 0, 0, 0), -- GitLab From a46ca4c8f1c64723ce0a9f66638a1343a71bbe3f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 21 Apr 2018 20:26:41 +0200 Subject: [PATCH 1774/2269] ARM: dts: exynos: Fix invalid node referenced by i2c20 alias in Peach Pit and Pi commit 70c3250ac1374688d7963e562fe58b23f70bcba9 upstream. After moving all nodes under "soc" node in commit 5d99cc59a3c6 ("ARM: dts: exynos: Move Exynos5250 and Exynos5420 nodes under soc"), the i2c20 alias in Peach Pit and Peach Pi stopped pointing to proper node: arch/arm/boot/dts/exynos5420-peach-pit.dtb: Warning (alias_paths): /aliases:i2c20: aliases property is not a valid node (/spi@12d40000/cros-ec@0/i2c-tunnel) arch/arm/boot/dts/exynos5800-peach-pi.dtb: Warning (alias_paths): /aliases:i2c20: aliases property is not a valid node (/spi@12d40000/cros-ec@0/i2c-tunnel) Fixes: 5d99cc59a3c6 ("ARM: dts: exynos: Move Exynos5250 and Exynos5420 nodes under soc") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/exynos5420-peach-pit.dts | 4 ++-- arch/arm/boot/dts/exynos5800-peach-pi.dts | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/exynos5420-peach-pit.dts b/arch/arm/boot/dts/exynos5420-peach-pit.dts index 683a4cfb4a23d..c91eff8475a87 100644 --- a/arch/arm/boot/dts/exynos5420-peach-pit.dts +++ b/arch/arm/boot/dts/exynos5420-peach-pit.dts @@ -31,7 +31,7 @@ aliases { /* Assign 20 so we don't get confused w/ builtin ones */ - i2c20 = "/spi@12d40000/cros-ec@0/i2c-tunnel"; + i2c20 = &i2c_tunnel; }; backlight: backlight { @@ -952,7 +952,7 @@ samsung,spi-feedback-delay = <1>; }; - i2c-tunnel { + i2c_tunnel: i2c-tunnel { compatible = "google,cros-ec-i2c-tunnel"; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/exynos5800-peach-pi.dts b/arch/arm/boot/dts/exynos5800-peach-pi.dts index b2b95ff205e81..daad5d425cf5c 100644 --- a/arch/arm/boot/dts/exynos5800-peach-pi.dts +++ b/arch/arm/boot/dts/exynos5800-peach-pi.dts @@ -29,7 +29,7 @@ aliases { /* Assign 20 so we don't get confused w/ builtin ones */ - i2c20 = "/spi@12d40000/cros-ec@0/i2c-tunnel"; + i2c20 = &i2c_tunnel; }; backlight: backlight { @@ -921,7 +921,7 @@ samsung,spi-feedback-delay = <1>; }; - i2c-tunnel { + i2c_tunnel: i2c-tunnel { compatible = "google,cros-ec-i2c-tunnel"; #address-cells = <1>; #size-cells = <0>; -- GitLab From c8692d9f5ceff240792df5b42ca1639ace77204b Mon Sep 17 00:00:00 2001 From: Jeffy Chen Date: Fri, 20 Oct 2017 20:01:01 +0800 Subject: [PATCH 1775/2269] driver core: Move device_links_purge() after bus_remove_device() commit 2ec16150179888b81717d1d3ce84e634f4736af2 upstream. The current ordering of code in device_del() triggers a WARN_ON() in device_links_purge(), because of an unexpected link status. The device_links_unbind_consumers() call in device_release_driver() has to take place before device_links_purge() for the status of all links to be correct, so move the device_links_purge() call in device_del() after the invocation of bus_remove_device() which calls device_release_driver(). Fixes: 9ed9895370ae (driver core: Functional dependencies tracking support) Signed-off-by: Jeffy Chen Reviewed-by: Rafael J. Wysocki Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index eb066cc827ef4..3f463a61f8cfb 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1973,7 +1973,6 @@ void device_del(struct device *dev) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_DEL_DEVICE, dev); - device_links_purge(dev); dpm_sysfs_remove(dev); if (parent) klist_del(&dev->p->knode_parent); @@ -2001,6 +2000,7 @@ void device_del(struct device *dev) device_pm_remove(dev); driver_deferred_probe_del(dev); device_remove_properties(dev); + device_links_purge(dev); /* Notify the platform of the removal, in case they * need to do anything... -- GitLab From a956132ac00bd5a4b2909707f00f8d5366931648 Mon Sep 17 00:00:00 2001 From: Sebastien Boisvert Date: Fri, 26 Oct 2018 15:02:23 -0700 Subject: [PATCH 1776/2269] include/linux/pfn_t.h: force '~' to be parsed as an unary operator commit 4d54954a197175c0dcb3c82af0c0740d0c5f827a upstream. Tracing the event "fs_dax:dax_pmd_insert_mapping" with perf produces this warning: [fs_dax:dax_pmd_insert_mapping] unknown op '~' It is printed in process_op (tools/lib/traceevent/event-parse.c) because '~' is parsed as a binary operator. perf reads the format of fs_dax:dax_pmd_insert_mapping ("print fmt") from /sys/kernel/debug/tracing/events/fs_dax/dax_pmd_insert_mapping/format . The format contains: ~(((u64) ~(~(((1UL) << 12)-1))) ^ \ interpreted as a binary operator by process_op(). This part is generated in the declaration of the event class dax_pmd_insert_mapping_class in include/trace/events/fs_dax.h : __print_flags_u64(__entry->pfn_val & PFN_FLAGS_MASK, "|", PFN_FLAGS_TRACE), This patch adds a pair of parentheses in the declaration of PFN_FLAGS_MASK to make sure that '~' is parsed as a unary operator by perf. The part of the format that was problematic is now: ~(((u64) (~(~(((1UL) << 12)-1)))) Now, all the '~' are parsed as unary operators. Link: http://lkml.kernel.org/r/20181021145939.8760-1-sebhtml@videotron.qc.ca Signed-off-by: Sebastien Boisvert Acked-by: Dan Williams Cc: "Steven Rostedt (VMware)" Cc: Arnaldo Carvalho de Melo Cc: "Tzvetomir Stoyanov (VMware)" Cc: Namhyung Kim Cc: Ross Zwisler Cc: Elenie Godzaridis Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/pfn_t.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h index 43b1d7648e826..a8aef18c6244d 100644 --- a/include/linux/pfn_t.h +++ b/include/linux/pfn_t.h @@ -10,7 +10,7 @@ * PFN_DEV - pfn is not covered by system memmap by default * PFN_MAP - pfn has a dynamic page mapping established by a device driver */ -#define PFN_FLAGS_MASK (((u64) ~PAGE_MASK) << (BITS_PER_LONG_LONG - PAGE_SHIFT)) +#define PFN_FLAGS_MASK (((u64) (~PAGE_MASK)) << (BITS_PER_LONG_LONG - PAGE_SHIFT)) #define PFN_SG_CHAIN (1ULL << (BITS_PER_LONG_LONG - 1)) #define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2)) #define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3)) -- GitLab From 5e063b64163be65e2f04588808760e790a889e3c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 4 Oct 2018 11:06:13 -0700 Subject: [PATCH 1777/2269] tty: wipe buffer. commit c9a8e5fce009e3c601a43c49ea9dbcb25d1ffac5 upstream. After we are done with the tty buffer, zero it out. Reported-by: aszlig Tested-by: Milan Broz Tested-by: Daniel Zatovic Tested-by: aszlig Cc: Willy Tarreau Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_buffer.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index 677fa99b77478..217114227f8d3 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -467,11 +467,15 @@ receive_buf(struct tty_port *port, struct tty_buffer *head, int count) { unsigned char *p = char_buf_ptr(head, head->read); char *f = NULL; + int n; if (~head->flags & TTYB_NORMAL) f = flag_buf_ptr(head, head->read); - return port->client_ops->receive_buf(port, p, f, count); + n = port->client_ops->receive_buf(port, p, f, count); + if (n > 0) + memset(p, 0, n); + return n; } /** -- GitLab From b008f3b25588868a89e9e4562b4dc641d1032553 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 4 Oct 2018 11:06:14 -0700 Subject: [PATCH 1778/2269] tty: wipe buffer if not echoing data commit b97b3d9fb57860a60592859e332de7759fd54c2e upstream. If we are not echoing the data to userspace or the console is in icanon mode, then perhaps it is a "secret" so we should wipe it once we are done with it. This mirrors the logic that the audit code has. Reported-by: aszlig Tested-by: Milan Broz Tested-by: Daniel Zatovic Tested-by: aszlig Cc: Willy Tarreau Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 0475f9685a41d..904fc9c37fdea 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -154,17 +154,28 @@ static inline unsigned char *echo_buf_addr(struct n_tty_data *ldata, size_t i) return &ldata->echo_buf[i & (N_TTY_BUF_SIZE - 1)]; } +/* If we are not echoing the data, perhaps this is a secret so erase it */ +static void zero_buffer(struct tty_struct *tty, u8 *buffer, int size) +{ + bool icanon = !!L_ICANON(tty); + bool no_echo = !L_ECHO(tty); + + if (icanon && no_echo) + memset(buffer, 0x00, size); +} + static int tty_copy_to_user(struct tty_struct *tty, void __user *to, size_t tail, size_t n) { struct n_tty_data *ldata = tty->disc_data; size_t size = N_TTY_BUF_SIZE - tail; - const void *from = read_buf_addr(ldata, tail); + void *from = read_buf_addr(ldata, tail); int uncopied; if (n > size) { tty_audit_add_data(tty, from, size); uncopied = copy_to_user(to, from, size); + zero_buffer(tty, from, size - uncopied); if (uncopied) return uncopied; to += size; @@ -173,7 +184,9 @@ static int tty_copy_to_user(struct tty_struct *tty, void __user *to, } tty_audit_add_data(tty, from, n); - return copy_to_user(to, from, n); + uncopied = copy_to_user(to, from, n); + zero_buffer(tty, from, n - uncopied); + return uncopied; } /** @@ -1962,11 +1975,12 @@ static int copy_from_read_buf(struct tty_struct *tty, n = min(head - ldata->read_tail, N_TTY_BUF_SIZE - tail); n = min(*nr, n); if (n) { - const unsigned char *from = read_buf_addr(ldata, tail); + unsigned char *from = read_buf_addr(ldata, tail); retval = copy_to_user(*b, from, n); n -= retval; is_eof = n == 1 && *from == EOF_CHAR(tty); tty_audit_add_data(tty, from, n); + zero_buffer(tty, from, n); smp_store_release(&ldata->read_tail, ldata->read_tail + n); /* Turn single EOF into zero-length read */ if (L_EXTPROC(tty) && ldata->icanon && is_eof && -- GitLab From 2ecab0857ab03cd74bfeeb8946323cf737b12280 Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Fri, 9 Nov 2018 17:21:20 +0200 Subject: [PATCH 1779/2269] usb: xhci: fix uninitialized completion when USB3 port got wrong status commit 958c0bd86075d4ef1c936998deefe1947e539240 upstream. Realtek USB3.0 Card Reader [0bda:0328] reports wrong port status on Cannon lake PCH USB3.1 xHCI [8086:a36d] after resume from S3, after clear port reset it works fine. Since this device is registered on USB3 roothub at boot, when port status reports not superspeed, xhci_get_port_status will call an uninitialized completion in bus_state[0]. Kernel will hang because of NULL pointer. Restrict the USB2 resume status check in USB2 roothub to fix hang issue. Cc: stable@vger.kernel.org Signed-off-by: Aaron Ma Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 2 +- drivers/usb/host/xhci-ring.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index a2e350d28c05a..6b2f6c41e2a9e 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -895,7 +895,7 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd, status |= USB_PORT_STAT_SUSPEND; } if ((raw_port_status & PORT_PLS_MASK) == XDEV_RESUME && - !DEV_SUPERSPEED_ANY(raw_port_status)) { + !DEV_SUPERSPEED_ANY(raw_port_status) && hcd->speed < HCD_USB3) { if ((raw_port_status & PORT_RESET) || !(raw_port_status & PORT_PE)) return 0xffffffff; diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 998f61d826896..aa230706b8757 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1746,7 +1746,7 @@ static void handle_port_status(struct xhci_hcd *xhci, * RExit to a disconnect state). If so, let the the driver know it's * out of the RExit state. */ - if (!DEV_SUPERSPEED_ANY(portsc) && + if (!DEV_SUPERSPEED_ANY(portsc) && hcd->speed < HCD_USB3 && test_and_clear_bit(faked_port_index, &bus_state->rexit_ports)) { complete(&bus_state->rexit_done[faked_port_index]); -- GitLab From 7bcfd8f985f2c7bf7be6a08333dfaf31ed58ccd4 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Thu, 23 Aug 2018 17:00:35 -0700 Subject: [PATCH 1780/2269] namei: allow restricted O_CREAT of FIFOs and regular files commit 30aba6656f61ed44cba445a3c0d38b296fa9e8f5 upstream. Disallows open of FIFOs or regular files not owned by the user in world writable sticky directories, unless the owner is the same as that of the directory or the file is opened without the O_CREAT flag. The purpose is to make data spoofing attacks harder. This protection can be turned on and off separately for FIFOs and regular files via sysctl, just like the symlinks/hardlinks protection. This patch is based on Openwall's "HARDEN_FIFO" feature by Solar Designer. This is a brief list of old vulnerabilities that could have been prevented by this feature, some of them even allow for privilege escalation: CVE-2000-1134 CVE-2007-3852 CVE-2008-0525 CVE-2009-0416 CVE-2011-4834 CVE-2015-1838 CVE-2015-7442 CVE-2016-7489 This list is not meant to be complete. It's difficult to track down all vulnerabilities of this kind because they were often reported without any mention of this particular attack vector. In fact, before hardlinks/symlinks restrictions, fifos/regular files weren't the favorite vehicle to exploit them. [s.mesoraca16@gmail.com: fix bug reported by Dan Carpenter] Link: https://lkml.kernel.org/r/20180426081456.GA7060@mwanda Link: http://lkml.kernel.org/r/1524829819-11275-1-git-send-email-s.mesoraca16@gmail.com [keescook@chromium.org: drop pr_warn_ratelimited() in favor of audit changes in the future] [keescook@chromium.org: adjust commit subjet] Link: http://lkml.kernel.org/r/20180416175918.GA13494@beast Signed-off-by: Salvatore Mesoraca Signed-off-by: Kees Cook Suggested-by: Solar Designer Suggested-by: Kees Cook Cc: Al Viro Cc: Dan Carpenter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Loic Signed-off-by: Greg Kroah-Hartman --- Documentation/sysctl/fs.txt | 36 +++++++++++++++++++++++++ fs/namei.c | 53 ++++++++++++++++++++++++++++++++++--- include/linux/fs.h | 2 ++ kernel/sysctl.c | 18 +++++++++++++ 4 files changed, 106 insertions(+), 3 deletions(-) diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index 35e17f748ca78..af5859b2d0f91 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt @@ -34,7 +34,9 @@ Currently, these files are in /proc/sys/fs: - overflowgid - pipe-user-pages-hard - pipe-user-pages-soft +- protected_fifos - protected_hardlinks +- protected_regular - protected_symlinks - suid_dumpable - super-max @@ -182,6 +184,24 @@ applied. ============================================================== +protected_fifos: + +The intent of this protection is to avoid unintentional writes to +an attacker-controlled FIFO, where a program expected to create a regular +file. + +When set to "0", writing to FIFOs is unrestricted. + +When set to "1" don't allow O_CREAT open on FIFOs that we don't own +in world writable sticky directories, unless they are owned by the +owner of the directory. + +When set to "2" it also applies to group writable sticky directories. + +This protection is based on the restrictions in Openwall. + +============================================================== + protected_hardlinks: A long-standing class of security issues is the hardlink-based @@ -202,6 +222,22 @@ This protection is based on the restrictions in Openwall and grsecurity. ============================================================== +protected_regular: + +This protection is similar to protected_fifos, but it +avoids writes to an attacker-controlled regular file, where a program +expected to create one. + +When set to "0", writing to regular files is unrestricted. + +When set to "1" don't allow O_CREAT open on regular files that we +don't own in world writable sticky directories, unless they are +owned by the owner of the directory. + +When set to "2" it also applies to group writable sticky directories. + +============================================================== + protected_symlinks: A long-standing class of security issues is the symlink-based diff --git a/fs/namei.c b/fs/namei.c index 0b46b858cd42c..d1e467b7b9de8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -904,6 +904,8 @@ static inline void put_link(struct nameidata *nd) int sysctl_protected_symlinks __read_mostly = 0; int sysctl_protected_hardlinks __read_mostly = 0; +int sysctl_protected_fifos __read_mostly; +int sysctl_protected_regular __read_mostly; /** * may_follow_link - Check symlink following for unsafe situations @@ -1017,6 +1019,45 @@ static int may_linkat(struct path *link) return -EPERM; } +/** + * may_create_in_sticky - Check whether an O_CREAT open in a sticky directory + * should be allowed, or not, on files that already + * exist. + * @dir: the sticky parent directory + * @inode: the inode of the file to open + * + * Block an O_CREAT open of a FIFO (or a regular file) when: + * - sysctl_protected_fifos (or sysctl_protected_regular) is enabled + * - the file already exists + * - we are in a sticky directory + * - we don't own the file + * - the owner of the directory doesn't own the file + * - the directory is world writable + * If the sysctl_protected_fifos (or sysctl_protected_regular) is set to 2 + * the directory doesn't have to be world writable: being group writable will + * be enough. + * + * Returns 0 if the open is allowed, -ve on error. + */ +static int may_create_in_sticky(struct dentry * const dir, + struct inode * const inode) +{ + if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) || + (!sysctl_protected_regular && S_ISREG(inode->i_mode)) || + likely(!(dir->d_inode->i_mode & S_ISVTX)) || + uid_eq(inode->i_uid, dir->d_inode->i_uid) || + uid_eq(current_fsuid(), inode->i_uid)) + return 0; + + if (likely(dir->d_inode->i_mode & 0002) || + (dir->d_inode->i_mode & 0020 && + ((sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) || + (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode))))) { + return -EACCES; + } + return 0; +} + static __always_inline const char *get_link(struct nameidata *nd) { @@ -3355,9 +3396,15 @@ finish_open: if (error) return error; audit_inode(nd->name, nd->path.dentry, 0); - error = -EISDIR; - if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry)) - goto out; + if (open_flag & O_CREAT) { + error = -EISDIR; + if (d_is_dir(nd->path.dentry)) + goto out; + error = may_create_in_sticky(dir, + d_backing_inode(nd->path.dentry)); + if (unlikely(error)) + goto out; + } error = -ENOTDIR; if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry)) goto out; diff --git a/include/linux/fs.h b/include/linux/fs.h index 7374639f0aa00..f6a577edec67f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -72,6 +72,8 @@ extern struct inodes_stat_t inodes_stat; extern int leases_enable, lease_break_time; extern int sysctl_protected_symlinks; extern int sysctl_protected_hardlinks; +extern int sysctl_protected_fifos; +extern int sysctl_protected_regular; typedef __kernel_rwf_t rwf_t; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 069550540a392..d330b1ce3b940 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1793,6 +1793,24 @@ static struct ctl_table fs_table[] = { .extra1 = &zero, .extra2 = &one, }, + { + .procname = "protected_fifos", + .data = &sysctl_protected_fifos, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &two, + }, + { + .procname = "protected_regular", + .data = &sysctl_protected_regular, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &two, + }, { .procname = "suid_dumpable", .data = &suid_dumpable, -- GitLab From a4977f3e8a3a668878543828ceaf40d69b5bfc32 Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Thu, 19 Apr 2018 17:59:38 +0100 Subject: [PATCH 1781/2269] lan78xx: Read MAC address from DT if present commit 760db29bdc97b73ff60b091315ad787b1deb5cf5 upstream. There is a standard mechanism for locating and using a MAC address from the Device Tree. Use this facility in the lan78xx driver to support applications without programmed EEPROM or OTP. At the same time, regularise the handling of the different address sources. Signed-off-by: Phil Elwell Signed-off-by: David S. Miller Tested-by: Paolo Pisati Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/lan78xx.c | 42 +++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 50e2e10a9050d..e069b310d6a6b 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "lan78xx.h" #define DRIVER_AUTHOR "WOOJUNG HUH " @@ -1645,34 +1646,31 @@ static void lan78xx_init_mac_address(struct lan78xx_net *dev) addr[5] = (addr_hi >> 8) & 0xFF; if (!is_valid_ether_addr(addr)) { - /* reading mac address from EEPROM or OTP */ - if ((lan78xx_read_eeprom(dev, EEPROM_MAC_OFFSET, ETH_ALEN, - addr) == 0) || - (lan78xx_read_otp(dev, EEPROM_MAC_OFFSET, ETH_ALEN, - addr) == 0)) { - if (is_valid_ether_addr(addr)) { - /* eeprom values are valid so use them */ - netif_dbg(dev, ifup, dev->net, - "MAC address read from EEPROM"); - } else { - /* generate random MAC */ - random_ether_addr(addr); - netif_dbg(dev, ifup, dev->net, - "MAC address set to random addr"); - } - - addr_lo = addr[0] | (addr[1] << 8) | - (addr[2] << 16) | (addr[3] << 24); - addr_hi = addr[4] | (addr[5] << 8); - - ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo); - ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi); + if (!eth_platform_get_mac_address(&dev->udev->dev, addr)) { + /* valid address present in Device Tree */ + netif_dbg(dev, ifup, dev->net, + "MAC address read from Device Tree"); + } else if (((lan78xx_read_eeprom(dev, EEPROM_MAC_OFFSET, + ETH_ALEN, addr) == 0) || + (lan78xx_read_otp(dev, EEPROM_MAC_OFFSET, + ETH_ALEN, addr) == 0)) && + is_valid_ether_addr(addr)) { + /* eeprom values are valid so use them */ + netif_dbg(dev, ifup, dev->net, + "MAC address read from EEPROM"); } else { /* generate random MAC */ random_ether_addr(addr); netif_dbg(dev, ifup, dev->net, "MAC address set to random addr"); } + + addr_lo = addr[0] | (addr[1] << 8) | + (addr[2] << 16) | (addr[3] << 24); + addr_hi = addr[4] | (addr[5] << 8); + + ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo); + ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi); } ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo); -- GitLab From aef9f7db7e8cf0cd5357e5b449669af1e7cdccad Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Thu, 16 Aug 2018 09:02:31 +0100 Subject: [PATCH 1782/2269] s390/mm: Check for valid vma before zapping in gmap_discard commit 1843abd03250115af6cec0892683e70cf2297c25 upstream. Userspace could have munmapped the area before doing unmapping from the gmap. This would leave us with a valid vmaddr, but an invalid vma from which we would try to zap memory. Let's check before using the vma. Fixes: 1e133ab296f3 ("s390/mm: split arch/s390/mm/pgtable.c") Signed-off-by: Janosch Frank Reviewed-by: David Hildenbrand Reported-by: Dan Carpenter Message-Id: <20180816082432.78828-1-frankja@linux.ibm.com> Signed-off-by: Janosch Frank Signed-off-by: Greg Kroah-Hartman --- arch/s390/mm/gmap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 2f66290c9b927..ec9292917d3f2 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -689,6 +689,8 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) vmaddr |= gaddr & ~PMD_MASK; /* Find vma in the parent mm */ vma = find_vma(gmap->mm, vmaddr); + if (!vma) + continue; size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); zap_page_range(vma, vmaddr, size); } -- GitLab From 077506972ba23772b752e08b1ab7052cf5f04511 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Jul 2018 13:47:30 -0700 Subject: [PATCH 1783/2269] rcu: Make need_resched() respond to urgent RCU-QS needs commit 92aa39e9dc77481b90cbef25e547d66cab901496 upstream. The per-CPU rcu_dynticks.rcu_urgent_qs variable communicates an urgent need for an RCU quiescent state from the force-quiescent-state processing within the grace-period kthread to context switches and to cond_resched(). Unfortunately, such urgent needs are not communicated to need_resched(), which is sometimes used to decide when to invoke cond_resched(), for but one example, within the KVM vcpu_run() function. As of v4.15, this can result in synchronize_sched() being delayed by up to ten seconds, which can be problematic, to say nothing of annoying. This commit therefore checks rcu_dynticks.rcu_urgent_qs from within rcu_check_callbacks(), which is invoked from the scheduling-clock interrupt handler. If the current task is not an idle task and is not executing in usermode, a context switch is forced, and either way, the rcu_dynticks.rcu_urgent_qs variable is set to false. If the current task is an idle task, then RCU's dyntick-idle code will detect the quiescent state, so no further action is required. Similarly, if the task is executing in usermode, other code in rcu_check_callbacks() and its called functions will report the corresponding quiescent state. Reported-by: Marius Hillenbrand Reported-by: David Woodhouse Suggested-by: Peter Zijlstra Signed-off-by: Paul E. McKenney [ paulmck: Backported to make patch apply cleanly on older versions. ] Tested-by: Marius Hillenbrand Cc: # 4.12.x - 4.19.x Signed-off-by: Greg Kroah-Hartman --- kernel/rcu/tree.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 3e3650e94ae6b..710ce1d6b9829 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2772,6 +2772,15 @@ void rcu_check_callbacks(int user) rcu_bh_qs(); } rcu_preempt_check_callbacks(); + /* The load-acquire pairs with the store-release setting to true. */ + if (smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs))) { + /* Idle and userspace execution already are quiescent states. */ + if (!rcu_is_cpu_rrupt_from_idle() && !user) { + set_tsk_need_resched(current); + set_preempt_need_resched(); + } + __this_cpu_write(rcu_dynticks.rcu_urgent_qs, false); + } if (rcu_pending()) invoke_rcu_core(); if (user) -- GitLab From 57ccd1fc8ebbd656594354ab0213754fcda03bde Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 20 Apr 2018 14:54:13 -0400 Subject: [PATCH 1784/2269] net: ieee802154: 6lowpan: fix frag reassembly commit f18fa5de5ba7f1d6650951502bb96a6e4715a948 upstream. This patch initialize stack variables which are used in frag_lowpan_compare_key to zero. In my case there are padding bytes in the structures ieee802154_addr as well in frag_lowpan_compare_key. Otherwise the key variable contains random bytes. The result is that a compare of two keys by memcmp works incorrect. Fixes: 648700f76b03 ("inet: frags: use rhashtables for reassembly units") Signed-off-by: Alexander Aring Reported-by: Stefan Schmidt Signed-off-by: Stefan Schmidt Signed-off-by: Greg Kroah-Hartman --- net/ieee802154/6lowpan/6lowpan_i.h | 4 ++-- net/ieee802154/6lowpan/reassembly.c | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h index b8d95cb71c25d..44a7e16bf3b5e 100644 --- a/net/ieee802154/6lowpan/6lowpan_i.h +++ b/net/ieee802154/6lowpan/6lowpan_i.h @@ -20,8 +20,8 @@ typedef unsigned __bitwise lowpan_rx_result; struct frag_lowpan_compare_key { u16 tag; u16 d_size; - const struct ieee802154_addr src; - const struct ieee802154_addr dst; + struct ieee802154_addr src; + struct ieee802154_addr dst; }; /* Equivalent of ipv4 struct ipq diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 1790b65944b3e..2cc224106b692 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -75,14 +75,14 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb, { struct netns_ieee802154_lowpan *ieee802154_lowpan = net_ieee802154_lowpan(net); - struct frag_lowpan_compare_key key = { - .tag = cb->d_tag, - .d_size = cb->d_size, - .src = *src, - .dst = *dst, - }; + struct frag_lowpan_compare_key key = {}; struct inet_frag_queue *q; + key.tag = cb->d_tag; + key.d_size = cb->d_size; + key.src = *src; + key.dst = *dst; + q = inet_frag_find(&ieee802154_lowpan->frags, &key); if (!q) return NULL; @@ -372,7 +372,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type) struct lowpan_frag_queue *fq; struct net *net = dev_net(skb->dev); struct lowpan_802154_cb *cb = lowpan_802154_cb(skb); - struct ieee802154_hdr hdr; + struct ieee802154_hdr hdr = {}; int err; if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) -- GitLab From de72a0f9ad28895b57b9403ae29aa5a6a579f31d Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Sat, 17 Jun 2017 23:56:23 -0400 Subject: [PATCH 1785/2269] ima: always measure and audit files in policy commit f3cc6b25dcc5616f0d5c720009b2ac66f97df2ff upstream. All files matching a "measure" rule must be included in the IMA measurement list, even when the file hash cannot be calculated. Similarly, all files matching an "audit" rule must be audited, even when the file hash can not be calculated. The file data hash field contained in the IMA measurement list template data will contain 0's instead of the actual file hash digest. Note: In general, adding, deleting or in anyway changing which files are included in the IMA measurement list is not a good idea, as it might result in not being able to unseal trusted keys sealed to a specific TPM PCR value. This patch not only adds file measurements that were not previously measured, but specifies that the file hash value for these files will be 0's. As the IMA measurement list ordering is not consistent from one boot to the next, it is unlikely that anyone is sealing keys based on the IMA measurement list. Remote attestation servers should be able to process these new measurement records, but might complain about these unknown records. Signed-off-by: Mimi Zohar Reviewed-by: Dmitry Kasatkin Cc: Aditya Kali Signed-off-by: Greg Kroah-Hartman --- security/integrity/ima/ima_api.c | 67 ++++++++++++++++++----------- security/integrity/ima/ima_crypto.c | 10 +++++ security/integrity/ima/ima_main.c | 9 ++-- 3 files changed, 56 insertions(+), 30 deletions(-) diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c index c2edba8de35e4..c7e8db0ea4c0e 100644 --- a/security/integrity/ima/ima_api.c +++ b/security/integrity/ima/ima_api.c @@ -199,42 +199,59 @@ int ima_collect_measurement(struct integrity_iint_cache *iint, struct inode *inode = file_inode(file); const char *filename = file->f_path.dentry->d_name.name; int result = 0; + int length; + void *tmpbuf; + u64 i_version; struct { struct ima_digest_data hdr; char digest[IMA_MAX_DIGEST_SIZE]; } hash; - if (!(iint->flags & IMA_COLLECTED)) { - u64 i_version = file_inode(file)->i_version; + if (iint->flags & IMA_COLLECTED) + goto out; - if (file->f_flags & O_DIRECT) { - audit_cause = "failed(directio)"; - result = -EACCES; - goto out; - } + /* + * Dectecting file change is based on i_version. On filesystems + * which do not support i_version, support is limited to an initial + * measurement/appraisal/audit. + */ + i_version = file_inode(file)->i_version; + hash.hdr.algo = algo; - hash.hdr.algo = algo; - - result = (!buf) ? ima_calc_file_hash(file, &hash.hdr) : - ima_calc_buffer_hash(buf, size, &hash.hdr); - if (!result) { - int length = sizeof(hash.hdr) + hash.hdr.length; - void *tmpbuf = krealloc(iint->ima_hash, length, - GFP_NOFS); - if (tmpbuf) { - iint->ima_hash = tmpbuf; - memcpy(iint->ima_hash, &hash, length); - iint->version = i_version; - iint->flags |= IMA_COLLECTED; - } else - result = -ENOMEM; - } + /* Initialize hash digest to 0's in case of failure */ + memset(&hash.digest, 0, sizeof(hash.digest)); + + if (buf) + result = ima_calc_buffer_hash(buf, size, &hash.hdr); + else + result = ima_calc_file_hash(file, &hash.hdr); + + if (result && result != -EBADF && result != -EINVAL) + goto out; + + length = sizeof(hash.hdr) + hash.hdr.length; + tmpbuf = krealloc(iint->ima_hash, length, GFP_NOFS); + if (!tmpbuf) { + result = -ENOMEM; + goto out; } + + iint->ima_hash = tmpbuf; + memcpy(iint->ima_hash, &hash, length); + iint->version = i_version; + + /* Possibly temporary failure due to type of read (eg. O_DIRECT) */ + if (!result) + iint->flags |= IMA_COLLECTED; out: - if (result) + if (result) { + if (file->f_flags & O_DIRECT) + audit_cause = "failed(directio)"; + integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, filename, "collect_data", audit_cause, result, 0); + } return result; } @@ -278,7 +295,7 @@ void ima_store_measurement(struct integrity_iint_cache *iint, } result = ima_store_template(entry, violation, inode, filename, pcr); - if (!result || result == -EEXIST) { + if ((!result || result == -EEXIST) && !(file->f_flags & O_DIRECT)) { iint->flags |= IMA_MEASURED; iint->measured_pcrs |= (0x1 << pcr); } diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c index 90453aa1c8132..cb041af9eddb2 100644 --- a/security/integrity/ima/ima_crypto.c +++ b/security/integrity/ima/ima_crypto.c @@ -443,6 +443,16 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash) loff_t i_size; int rc; + /* + * For consistency, fail file's opened with the O_DIRECT flag on + * filesystems mounted with/without DAX option. + */ + if (file->f_flags & O_DIRECT) { + hash->length = hash_digest_size[ima_hash_algo]; + hash->algo = ima_hash_algo; + return -EINVAL; + } + i_size = i_size_read(file_inode(file)); if (ima_ahash_minsize && i_size >= ima_ahash_minsize) { diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index f8553179bdd78..06c4cf125f2a9 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -242,11 +242,8 @@ static int process_measurement(struct file *file, char *buf, loff_t size, hash_algo = ima_get_hash_algo(xattr_value, xattr_len); rc = ima_collect_measurement(iint, file, buf, size, hash_algo); - if (rc != 0) { - if (file->f_flags & O_DIRECT) - rc = (iint->flags & IMA_PERMIT_DIRECTIO) ? 0 : -EACCES; + if (rc != 0 && rc != -EBADF && rc != -EINVAL) goto out_digsig; - } if (!pathbuf) /* ima_rdwr_violation possibly pre-fetched */ pathname = ima_d_path(&file->f_path, &pathbuf, filename); @@ -254,12 +251,14 @@ static int process_measurement(struct file *file, char *buf, loff_t size, if (action & IMA_MEASURE) ima_store_measurement(iint, file, pathname, xattr_value, xattr_len, pcr); - if (action & IMA_APPRAISE_SUBMASK) + if (rc == 0 && (action & IMA_APPRAISE_SUBMASK)) rc = ima_appraise_measurement(func, iint, file, pathname, xattr_value, xattr_len, opened); if (action & IMA_AUDIT) ima_audit_measurement(iint, pathname); + if ((file->f_flags & O_DIRECT) && (iint->flags & IMA_PERMIT_DIRECTIO)) + rc = 0; out_digsig: if ((mask & MAY_WRITE) && (iint->flags & IMA_DIGSIG) && !(iint->flags & IMA_NEW_FILE)) -- GitLab From e099863340d86741c0957217971c29becf24b881 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 7 Nov 2017 07:17:42 -0800 Subject: [PATCH 1786/2269] EVM: Add support for portable signature format commit 50b977481fce90aa5fbda55e330b9d722733e358 upstream. The EVM signature includes the inode number and (optionally) the filesystem UUID, making it impractical to ship EVM signatures in packages. This patch adds a new portable format intended to allow distributions to include EVM signatures. It is identical to the existing format but hardcodes the inode and generation numbers to 0 and does not include the filesystem UUID even if the kernel is configured to do so. Removing the inode means that the metadata and signature from one file could be copied to another file without invalidating it. This is avoided by ensuring that an IMA xattr is present during EVM validation. Portable signatures are intended to be immutable - ie, they will never be transformed into HMACs. Based on earlier work by Dmitry Kasatkin and Mikhail Kurinnoi. Signed-off-by: Matthew Garrett Cc: Dmitry Kasatkin Cc: Mikhail Kurinnoi Signed-off-by: Mimi Zohar Cc: Aditya Kali Signed-off-by: Greg Kroah-Hartman --- include/linux/integrity.h | 1 + security/integrity/evm/evm.h | 2 +- security/integrity/evm/evm_crypto.c | 75 +++++++++++++++++++++++---- security/integrity/evm/evm_main.c | 29 +++++++---- security/integrity/ima/ima_appraise.c | 4 +- security/integrity/integrity.h | 2 + 6 files changed, 92 insertions(+), 21 deletions(-) diff --git a/include/linux/integrity.h b/include/linux/integrity.h index c2d6082a1a4cf..858d3f4a22415 100644 --- a/include/linux/integrity.h +++ b/include/linux/integrity.h @@ -14,6 +14,7 @@ enum integrity_status { INTEGRITY_PASS = 0, + INTEGRITY_PASS_IMMUTABLE, INTEGRITY_FAIL, INTEGRITY_NOLABEL, INTEGRITY_NOXATTRS, diff --git a/security/integrity/evm/evm.h b/security/integrity/evm/evm.h index f5f12727771a0..2ff02459fcfd6 100644 --- a/security/integrity/evm/evm.h +++ b/security/integrity/evm/evm.h @@ -48,7 +48,7 @@ int evm_calc_hmac(struct dentry *dentry, const char *req_xattr_name, size_t req_xattr_value_len, char *digest); int evm_calc_hash(struct dentry *dentry, const char *req_xattr_name, const char *req_xattr_value, - size_t req_xattr_value_len, char *digest); + size_t req_xattr_value_len, char type, char *digest); int evm_init_hmac(struct inode *inode, const struct xattr *xattr, char *hmac_val); int evm_init_secfs(void); diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c index ee9c3de5065ab..f1f030ae363ba 100644 --- a/security/integrity/evm/evm_crypto.c +++ b/security/integrity/evm/evm_crypto.c @@ -139,7 +139,7 @@ out: * protection.) */ static void hmac_add_misc(struct shash_desc *desc, struct inode *inode, - char *digest) + char type, char *digest) { struct h_misc { unsigned long ino; @@ -150,8 +150,13 @@ static void hmac_add_misc(struct shash_desc *desc, struct inode *inode, } hmac_misc; memset(&hmac_misc, 0, sizeof(hmac_misc)); - hmac_misc.ino = inode->i_ino; - hmac_misc.generation = inode->i_generation; + /* Don't include the inode or generation number in portable + * signatures + */ + if (type != EVM_XATTR_PORTABLE_DIGSIG) { + hmac_misc.ino = inode->i_ino; + hmac_misc.generation = inode->i_generation; + } /* The hmac uid and gid must be encoded in the initial user * namespace (not the filesystems user namespace) as encoding * them in the filesystems user namespace allows an attack @@ -164,7 +169,8 @@ static void hmac_add_misc(struct shash_desc *desc, struct inode *inode, hmac_misc.gid = from_kgid(&init_user_ns, inode->i_gid); hmac_misc.mode = inode->i_mode; crypto_shash_update(desc, (const u8 *)&hmac_misc, sizeof(hmac_misc)); - if (evm_hmac_attrs & EVM_ATTR_FSUUID) + if ((evm_hmac_attrs & EVM_ATTR_FSUUID) && + type != EVM_XATTR_PORTABLE_DIGSIG) crypto_shash_update(desc, &inode->i_sb->s_uuid.b[0], sizeof(inode->i_sb->s_uuid)); crypto_shash_final(desc, digest); @@ -190,6 +196,7 @@ static int evm_calc_hmac_or_hash(struct dentry *dentry, char *xattr_value = NULL; int error; int size; + bool ima_present = false; if (!(inode->i_opflags & IOP_XATTR)) return -EOPNOTSUPP; @@ -200,11 +207,18 @@ static int evm_calc_hmac_or_hash(struct dentry *dentry, error = -ENODATA; for (xattrname = evm_config_xattrnames; *xattrname != NULL; xattrname++) { + bool is_ima = false; + + if (strcmp(*xattrname, XATTR_NAME_IMA) == 0) + is_ima = true; + if ((req_xattr_name && req_xattr_value) && !strcmp(*xattrname, req_xattr_name)) { error = 0; crypto_shash_update(desc, (const u8 *)req_xattr_value, req_xattr_value_len); + if (is_ima) + ima_present = true; continue; } size = vfs_getxattr_alloc(dentry, *xattrname, @@ -219,9 +233,14 @@ static int evm_calc_hmac_or_hash(struct dentry *dentry, error = 0; xattr_size = size; crypto_shash_update(desc, (const u8 *)xattr_value, xattr_size); + if (is_ima) + ima_present = true; } - hmac_add_misc(desc, inode, digest); + hmac_add_misc(desc, inode, type, digest); + /* Portable EVM signatures must include an IMA hash */ + if (type == EVM_XATTR_PORTABLE_DIGSIG && !ima_present) + return -EPERM; out: kfree(xattr_value); kfree(desc); @@ -233,17 +252,45 @@ int evm_calc_hmac(struct dentry *dentry, const char *req_xattr_name, char *digest) { return evm_calc_hmac_or_hash(dentry, req_xattr_name, req_xattr_value, - req_xattr_value_len, EVM_XATTR_HMAC, digest); + req_xattr_value_len, EVM_XATTR_HMAC, digest); } int evm_calc_hash(struct dentry *dentry, const char *req_xattr_name, const char *req_xattr_value, size_t req_xattr_value_len, - char *digest) + char type, char *digest) { return evm_calc_hmac_or_hash(dentry, req_xattr_name, req_xattr_value, - req_xattr_value_len, IMA_XATTR_DIGEST, digest); + req_xattr_value_len, type, digest); +} + +static int evm_is_immutable(struct dentry *dentry, struct inode *inode) +{ + const struct evm_ima_xattr_data *xattr_data = NULL; + struct integrity_iint_cache *iint; + int rc = 0; + + iint = integrity_iint_find(inode); + if (iint && (iint->flags & EVM_IMMUTABLE_DIGSIG)) + return 1; + + /* Do this the hard way */ + rc = vfs_getxattr_alloc(dentry, XATTR_NAME_EVM, (char **)&xattr_data, 0, + GFP_NOFS); + if (rc <= 0) { + if (rc == -ENODATA) + return 0; + return rc; + } + if (xattr_data->type == EVM_XATTR_PORTABLE_DIGSIG) + rc = 1; + else + rc = 0; + + kfree(xattr_data); + return rc; } + /* * Calculate the hmac and update security.evm xattr * @@ -256,6 +303,16 @@ int evm_update_evmxattr(struct dentry *dentry, const char *xattr_name, struct evm_ima_xattr_data xattr_data; int rc = 0; + /* + * Don't permit any transformation of the EVM xattr if the signature + * is of an immutable type + */ + rc = evm_is_immutable(dentry, inode); + if (rc < 0) + return rc; + if (rc) + return -EPERM; + rc = evm_calc_hmac(dentry, xattr_name, xattr_value, xattr_value_len, xattr_data.digest); if (rc == 0) { @@ -281,7 +338,7 @@ int evm_init_hmac(struct inode *inode, const struct xattr *lsm_xattr, } crypto_shash_update(desc, lsm_xattr->value, lsm_xattr->value_len); - hmac_add_misc(desc, inode, hmac_val); + hmac_add_misc(desc, inode, EVM_XATTR_HMAC, hmac_val); kfree(desc); return 0; } diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index 063d38aef64e7..1d1a7053144b4 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -31,7 +31,7 @@ int evm_initialized; static char *integrity_status_msg[] = { - "pass", "fail", "no_label", "no_xattrs", "unknown" + "pass", "pass_immutable", "fail", "no_label", "no_xattrs", "unknown" }; char *evm_hmac = "hmac(sha1)"; char *evm_hash = "sha1"; @@ -120,7 +120,8 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry, enum integrity_status evm_status = INTEGRITY_PASS; int rc, xattr_len; - if (iint && iint->evm_status == INTEGRITY_PASS) + if (iint && (iint->evm_status == INTEGRITY_PASS || + iint->evm_status == INTEGRITY_PASS_IMMUTABLE)) return iint->evm_status; /* if status is not PASS, try to check again - against -ENOMEM */ @@ -161,22 +162,26 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry, rc = -EINVAL; break; case EVM_IMA_XATTR_DIGSIG: + case EVM_XATTR_PORTABLE_DIGSIG: rc = evm_calc_hash(dentry, xattr_name, xattr_value, - xattr_value_len, calc.digest); + xattr_value_len, xattr_data->type, + calc.digest); if (rc) break; rc = integrity_digsig_verify(INTEGRITY_KEYRING_EVM, (const char *)xattr_data, xattr_len, calc.digest, sizeof(calc.digest)); if (!rc) { - /* Replace RSA with HMAC if not mounted readonly and - * not immutable - */ - if (!IS_RDONLY(d_backing_inode(dentry)) && - !IS_IMMUTABLE(d_backing_inode(dentry))) + if (xattr_data->type == EVM_XATTR_PORTABLE_DIGSIG) { + if (iint) + iint->flags |= EVM_IMMUTABLE_DIGSIG; + evm_status = INTEGRITY_PASS_IMMUTABLE; + } else if (!IS_RDONLY(d_backing_inode(dentry)) && + !IS_IMMUTABLE(d_backing_inode(dentry))) { evm_update_evmxattr(dentry, xattr_name, xattr_value, xattr_value_len); + } } break; default: @@ -277,7 +282,7 @@ static enum integrity_status evm_verify_current_integrity(struct dentry *dentry) * affect security.evm. An interesting side affect of writing posix xattr * acls is their modifying of the i_mode, which is included in security.evm. * For posix xattr acls only, permit security.evm, even if it currently - * doesn't exist, to be updated. + * doesn't exist, to be updated unless the EVM signature is immutable. */ static int evm_protect_xattr(struct dentry *dentry, const char *xattr_name, const void *xattr_value, size_t xattr_value_len) @@ -345,7 +350,8 @@ int evm_inode_setxattr(struct dentry *dentry, const char *xattr_name, if (strcmp(xattr_name, XATTR_NAME_EVM) == 0) { if (!xattr_value_len) return -EINVAL; - if (xattr_data->type != EVM_IMA_XATTR_DIGSIG) + if (xattr_data->type != EVM_IMA_XATTR_DIGSIG && + xattr_data->type != EVM_XATTR_PORTABLE_DIGSIG) return -EPERM; } return evm_protect_xattr(dentry, xattr_name, xattr_value, @@ -422,6 +428,9 @@ void evm_inode_post_removexattr(struct dentry *dentry, const char *xattr_name) /** * evm_inode_setattr - prevent updating an invalid EVM extended attribute * @dentry: pointer to the affected dentry + * + * Permit update of file attributes when files have a valid EVM signature, + * except in the case of them having an immutable portable signature. */ int evm_inode_setattr(struct dentry *dentry, struct iattr *attr) { diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index 348db9b786812..e91d28cc05cc3 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -230,7 +230,9 @@ int ima_appraise_measurement(enum ima_hooks func, } status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_value, rc, iint); - if ((status != INTEGRITY_PASS) && (status != INTEGRITY_UNKNOWN)) { + if ((status != INTEGRITY_PASS) && + (status != INTEGRITY_PASS_IMMUTABLE) && + (status != INTEGRITY_UNKNOWN)) { if ((status == INTEGRITY_NOLABEL) || (status == INTEGRITY_NOXATTRS)) cause = "missing-HMAC"; diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h index a53e7e4ab06c7..cbc7de33fac75 100644 --- a/security/integrity/integrity.h +++ b/security/integrity/integrity.h @@ -33,6 +33,7 @@ #define IMA_DIGSIG_REQUIRED 0x02000000 #define IMA_PERMIT_DIRECTIO 0x04000000 #define IMA_NEW_FILE 0x08000000 +#define EVM_IMMUTABLE_DIGSIG 0x10000000 #define IMA_DO_MASK (IMA_MEASURE | IMA_APPRAISE | IMA_AUDIT | \ IMA_APPRAISE_SUBMASK) @@ -58,6 +59,7 @@ enum evm_ima_xattr_type { EVM_XATTR_HMAC, EVM_IMA_XATTR_DIGSIG, IMA_XATTR_DIGEST_NG, + EVM_XATTR_PORTABLE_DIGSIG, IMA_XATTR_LAST }; -- GitLab From 281c07f30f71b4982cb535b633e5286bf0c7d056 Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Tue, 5 Dec 2017 21:06:34 +0200 Subject: [PATCH 1787/2269] ima: re-introduce own integrity cache lock commit 0d73a55208e94fc9fb6deaeea61438cd3280d4c0 upstream. Before IMA appraisal was introduced, IMA was using own integrity cache lock along with i_mutex. process_measurement and ima_file_free took the iint->mutex first and then the i_mutex, while setxattr, chmod and chown took the locks in reverse order. To resolve the potential deadlock, i_mutex was moved to protect entire IMA functionality and the redundant iint->mutex was eliminated. Solution was based on the assumption that filesystem code does not take i_mutex further. But when file is opened with O_DIRECT flag, direct-io implementation takes i_mutex and produces deadlock. Furthermore, certain other filesystem operations, such as llseek, also take i_mutex. More recently some filesystems have replaced their filesystem specific lock with the global i_rwsem to read a file. As a result, when IMA attempts to calculate the file hash, reading the file attempts to take the i_rwsem again. To resolve O_DIRECT related deadlock problem, this patch re-introduces iint->mutex. But to eliminate the original chmod() related deadlock problem, this patch eliminates the requirement for chmod hooks to take the iint->mutex by introducing additional atomic iint->attr_flags to indicate calling of the hooks. The allowed locking order is to take the iint->mutex first and then the i_rwsem. Original flags were cleared in chmod(), setxattr() or removwxattr() hooks and tested when file was closed or opened again. New atomic flags are set or cleared in those hooks and tested to clear iint->flags on close or on open. Atomic flags are following: * IMA_CHANGE_ATTR - indicates that chATTR() was called (chmod, chown, chgrp) and file attributes have changed. On file open, it causes IMA to clear iint->flags to re-evaluate policy and perform IMA functions again. * IMA_CHANGE_XATTR - indicates that setxattr or removexattr was called and extended attributes have changed. On file open, it causes IMA to clear iint->flags IMA_DONE_MASK to re-appraise. * IMA_UPDATE_XATTR - indicates that security.ima needs to be updated. It is cleared if file policy changes and no update is needed. * IMA_DIGSIG - indicates that file security.ima has signature and file security.ima must not update to file has on file close. * IMA_MUST_MEASURE - indicates the file is in the measurement policy. Fixes: Commit 6552321831dc ("xfs: remove i_iolock and use i_rwsem in the VFS inode instead") Signed-off-by: Dmitry Kasatkin Signed-off-by: Mimi Zohar Cc: Aditya Kali Signed-off-by: Greg Kroah-Hartman --- security/integrity/iint.c | 2 + security/integrity/ima/ima_appraise.c | 27 ++++++----- security/integrity/ima/ima_main.c | 70 ++++++++++++++++++--------- security/integrity/integrity.h | 18 +++++-- 4 files changed, 77 insertions(+), 40 deletions(-) diff --git a/security/integrity/iint.c b/security/integrity/iint.c index 6fc888ca468e4..2db461c38795b 100644 --- a/security/integrity/iint.c +++ b/security/integrity/iint.c @@ -155,12 +155,14 @@ static void init_once(void *foo) memset(iint, 0, sizeof(*iint)); iint->version = 0; iint->flags = 0UL; + iint->atomic_flags = 0; iint->ima_file_status = INTEGRITY_UNKNOWN; iint->ima_mmap_status = INTEGRITY_UNKNOWN; iint->ima_bprm_status = INTEGRITY_UNKNOWN; iint->ima_read_status = INTEGRITY_UNKNOWN; iint->evm_status = INTEGRITY_UNKNOWN; iint->measured_pcrs = 0; + mutex_init(&iint->mutex); } static int __init integrity_iintcache_init(void) diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index e91d28cc05cc3..84eb6cc956bfb 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -251,6 +251,7 @@ int ima_appraise_measurement(enum ima_hooks func, status = INTEGRITY_FAIL; break; } + clear_bit(IMA_DIGSIG, &iint->atomic_flags); if (xattr_len - sizeof(xattr_value->type) - hash_start >= iint->ima_hash->length) /* xattr length may be longer. md5 hash in previous @@ -269,7 +270,7 @@ int ima_appraise_measurement(enum ima_hooks func, status = INTEGRITY_PASS; break; case EVM_IMA_XATTR_DIGSIG: - iint->flags |= IMA_DIGSIG; + set_bit(IMA_DIGSIG, &iint->atomic_flags); rc = integrity_digsig_verify(INTEGRITY_KEYRING_IMA, (const char *)xattr_value, rc, iint->ima_hash->digest, @@ -320,7 +321,7 @@ void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file) int rc = 0; /* do not collect and update hash for digital signatures */ - if (iint->flags & IMA_DIGSIG) + if (test_bit(IMA_DIGSIG, &iint->atomic_flags)) return; if (iint->ima_file_status != INTEGRITY_PASS) @@ -330,7 +331,9 @@ void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file) if (rc < 0) return; + inode_lock(file_inode(file)); ima_fix_xattr(dentry, iint); + inode_unlock(file_inode(file)); } /** @@ -353,16 +356,14 @@ void ima_inode_post_setattr(struct dentry *dentry) return; must_appraise = ima_must_appraise(inode, MAY_ACCESS, POST_SETATTR); + if (!must_appraise) + __vfs_removexattr(dentry, XATTR_NAME_IMA); iint = integrity_iint_find(inode); if (iint) { - iint->flags &= ~(IMA_APPRAISE | IMA_APPRAISED | - IMA_APPRAISE_SUBMASK | IMA_APPRAISED_SUBMASK | - IMA_ACTION_RULE_FLAGS); - if (must_appraise) - iint->flags |= IMA_APPRAISE; + set_bit(IMA_CHANGE_ATTR, &iint->atomic_flags); + if (!must_appraise) + clear_bit(IMA_UPDATE_XATTR, &iint->atomic_flags); } - if (!must_appraise) - __vfs_removexattr(dentry, XATTR_NAME_IMA); } /* @@ -391,12 +392,12 @@ static void ima_reset_appraise_flags(struct inode *inode, int digsig) iint = integrity_iint_find(inode); if (!iint) return; - - iint->flags &= ~IMA_DONE_MASK; iint->measured_pcrs = 0; + set_bit(IMA_CHANGE_XATTR, &iint->atomic_flags); if (digsig) - iint->flags |= IMA_DIGSIG; - return; + set_bit(IMA_DIGSIG, &iint->atomic_flags); + else + clear_bit(IMA_DIGSIG, &iint->atomic_flags); } int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name, diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 06c4cf125f2a9..92d0ca7309f9f 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -99,10 +99,13 @@ static void ima_rdwr_violation_check(struct file *file, if (!iint) iint = integrity_iint_find(inode); /* IMA_MEASURE is set from reader side */ - if (iint && (iint->flags & IMA_MEASURE)) + if (iint && test_bit(IMA_MUST_MEASURE, + &iint->atomic_flags)) send_tomtou = true; } } else { + if (must_measure) + set_bit(IMA_MUST_MEASURE, &iint->atomic_flags); if ((atomic_read(&inode->i_writecount) > 0) && must_measure) send_writers = true; } @@ -124,21 +127,24 @@ static void ima_check_last_writer(struct integrity_iint_cache *iint, struct inode *inode, struct file *file) { fmode_t mode = file->f_mode; + bool update; if (!(mode & FMODE_WRITE)) return; - inode_lock(inode); + mutex_lock(&iint->mutex); if (atomic_read(&inode->i_writecount) == 1) { + update = test_and_clear_bit(IMA_UPDATE_XATTR, + &iint->atomic_flags); if ((iint->version != inode->i_version) || (iint->flags & IMA_NEW_FILE)) { iint->flags &= ~(IMA_DONE_MASK | IMA_NEW_FILE); iint->measured_pcrs = 0; - if (iint->flags & IMA_APPRAISE) + if (update) ima_update_xattr(iint, file); } } - inode_unlock(inode); + mutex_unlock(&iint->mutex); } /** @@ -171,7 +177,7 @@ static int process_measurement(struct file *file, char *buf, loff_t size, char *pathbuf = NULL; char filename[NAME_MAX]; const char *pathname = NULL; - int rc = -ENOMEM, action, must_appraise; + int rc = 0, action, must_appraise = 0; int pcr = CONFIG_IMA_MEASURE_PCR_IDX; struct evm_ima_xattr_data *xattr_value = NULL; int xattr_len = 0; @@ -202,17 +208,31 @@ static int process_measurement(struct file *file, char *buf, loff_t size, if (action) { iint = integrity_inode_get(inode); if (!iint) - goto out; + rc = -ENOMEM; } - if (violation_check) { + if (!rc && violation_check) ima_rdwr_violation_check(file, iint, action & IMA_MEASURE, &pathbuf, &pathname); - if (!action) { - rc = 0; - goto out_free; - } - } + + inode_unlock(inode); + + if (rc) + goto out; + if (!action) + goto out; + + mutex_lock(&iint->mutex); + + if (test_and_clear_bit(IMA_CHANGE_ATTR, &iint->atomic_flags)) + /* reset appraisal flags if ima_inode_post_setattr was called */ + iint->flags &= ~(IMA_APPRAISE | IMA_APPRAISED | + IMA_APPRAISE_SUBMASK | IMA_APPRAISED_SUBMASK | + IMA_ACTION_FLAGS); + + if (test_and_clear_bit(IMA_CHANGE_XATTR, &iint->atomic_flags)) + /* reset all flags if ima_inode_setxattr was called */ + iint->flags &= ~IMA_DONE_MASK; /* Determine if already appraised/measured based on bitmask * (IMA_MEASURE, IMA_MEASURED, IMA_XXXX_APPRAISE, IMA_XXXX_APPRAISED, @@ -230,7 +250,7 @@ static int process_measurement(struct file *file, char *buf, loff_t size, if (!action) { if (must_appraise) rc = ima_get_cache_status(iint, func); - goto out_digsig; + goto out_locked; } template_desc = ima_template_desc_current(); @@ -243,7 +263,7 @@ static int process_measurement(struct file *file, char *buf, loff_t size, rc = ima_collect_measurement(iint, file, buf, size, hash_algo); if (rc != 0 && rc != -EBADF && rc != -EINVAL) - goto out_digsig; + goto out_locked; if (!pathbuf) /* ima_rdwr_violation possibly pre-fetched */ pathname = ima_d_path(&file->f_path, &pathbuf, filename); @@ -251,26 +271,32 @@ static int process_measurement(struct file *file, char *buf, loff_t size, if (action & IMA_MEASURE) ima_store_measurement(iint, file, pathname, xattr_value, xattr_len, pcr); - if (rc == 0 && (action & IMA_APPRAISE_SUBMASK)) + if (rc == 0 && (action & IMA_APPRAISE_SUBMASK)) { + inode_lock(inode); rc = ima_appraise_measurement(func, iint, file, pathname, xattr_value, xattr_len, opened); + inode_unlock(inode); + } if (action & IMA_AUDIT) ima_audit_measurement(iint, pathname); if ((file->f_flags & O_DIRECT) && (iint->flags & IMA_PERMIT_DIRECTIO)) rc = 0; -out_digsig: - if ((mask & MAY_WRITE) && (iint->flags & IMA_DIGSIG) && +out_locked: + if ((mask & MAY_WRITE) && test_bit(IMA_DIGSIG, &iint->atomic_flags) && !(iint->flags & IMA_NEW_FILE)) rc = -EACCES; + mutex_unlock(&iint->mutex); kfree(xattr_value); -out_free: +out: if (pathbuf) __putname(pathbuf); -out: - inode_unlock(inode); - if ((rc && must_appraise) && (ima_appraise & IMA_APPRAISE_ENFORCE)) - return -EACCES; + if (must_appraise) { + if (rc && (ima_appraise & IMA_APPRAISE_ENFORCE)) + return -EACCES; + if (file->f_mode & FMODE_WRITE) + set_bit(IMA_UPDATE_XATTR, &iint->atomic_flags); + } return 0; } diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h index cbc7de33fac75..f43ac351c1729 100644 --- a/security/integrity/integrity.h +++ b/security/integrity/integrity.h @@ -29,11 +29,10 @@ /* iint cache flags */ #define IMA_ACTION_FLAGS 0xff000000 #define IMA_ACTION_RULE_FLAGS 0x06000000 -#define IMA_DIGSIG 0x01000000 -#define IMA_DIGSIG_REQUIRED 0x02000000 -#define IMA_PERMIT_DIRECTIO 0x04000000 -#define IMA_NEW_FILE 0x08000000 -#define EVM_IMMUTABLE_DIGSIG 0x10000000 +#define IMA_DIGSIG_REQUIRED 0x01000000 +#define IMA_PERMIT_DIRECTIO 0x02000000 +#define IMA_NEW_FILE 0x04000000 +#define EVM_IMMUTABLE_DIGSIG 0x08000000 #define IMA_DO_MASK (IMA_MEASURE | IMA_APPRAISE | IMA_AUDIT | \ IMA_APPRAISE_SUBMASK) @@ -54,6 +53,13 @@ #define IMA_APPRAISED_SUBMASK (IMA_FILE_APPRAISED | IMA_MMAP_APPRAISED | \ IMA_BPRM_APPRAISED | IMA_READ_APPRAISED) +/* iint cache atomic_flags */ +#define IMA_CHANGE_XATTR 0 +#define IMA_UPDATE_XATTR 1 +#define IMA_CHANGE_ATTR 2 +#define IMA_DIGSIG 3 +#define IMA_MUST_MEASURE 4 + enum evm_ima_xattr_type { IMA_XATTR_DIGEST = 0x01, EVM_XATTR_HMAC, @@ -102,10 +108,12 @@ struct signature_v2_hdr { /* integrity data associated with an inode */ struct integrity_iint_cache { struct rb_node rb_node; /* rooted in integrity_iint_tree */ + struct mutex mutex; /* protects: version, flags, digest */ struct inode *inode; /* back pointer to inode in question */ u64 version; /* track inode changes */ unsigned long flags; unsigned long measured_pcrs; + unsigned long atomic_flags; enum integrity_status ima_file_status:4; enum integrity_status ima_mmap_status:4; enum integrity_status ima_bprm_status:4; -- GitLab From d467320fdaf328fd6f98fc686b3a3fdd226dbaef Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Tue, 23 Jan 2018 10:00:41 -0500 Subject: [PATCH 1788/2269] ima: re-initialize iint->atomic_flags commit e2598077dc6a26c9644393e5c21f22a90dbdccdb upstream. Intermittently security.ima is not being written for new files. This patch re-initializes the new slab iint->atomic_flags field before freeing it. Fixes: commit 0d73a55208e9 ("ima: re-introduce own integrity cache lock") Signed-off-by: Mimi Zohar Signed-off-by: James Morris Cc: Aditya Kali Signed-off-by: Greg Kroah-Hartman --- security/integrity/iint.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/integrity/iint.c b/security/integrity/iint.c index 2db461c38795b..f4a40fb84b1eb 100644 --- a/security/integrity/iint.c +++ b/security/integrity/iint.c @@ -74,6 +74,7 @@ static void iint_free(struct integrity_iint_cache *iint) iint->ima_hash = NULL; iint->version = 0; iint->flags = 0UL; + iint->atomic_flags = 0UL; iint->ima_file_status = INTEGRITY_UNKNOWN; iint->ima_mmap_status = INTEGRITY_UNKNOWN; iint->ima_bprm_status = INTEGRITY_UNKNOWN; -- GitLab From 5ff1ad556aad473952c1caca6092aac4517ac1ae Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 1 Dec 2018 09:43:00 +0100 Subject: [PATCH 1789/2269] Linux 4.14.85 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index cb131e135c425..58a2482640906 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 84 +SUBLEVEL = 85 EXTRAVERSION = NAME = Petit Gorille -- GitLab From e12b67d81b08a79994ee9b1484a7e0c4601bfdad Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:13 -0800 Subject: [PATCH 1790/2269] mm/huge_memory: rename freeze_page() to unmap_page() commit 906f9cdfc2a0800f13683f9e4ebdfd08c12ee81b upstream. The term "freeze" is used in several ways in the kernel, and in mm it has the particular meaning of forcing page refcount temporarily to 0. freeze_page() is just too confusing a name for a function that unmaps a page: rename it unmap_page(), and rename unfreeze_page() remap_page(). Went to change the mention of freeze_page() added later in mm/rmap.c, but found it to be incorrect: ordinary page reclaim reaches there too; but the substance of the comment still seems correct, so edit it down. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261514080.2275@eggly.anvils Fixes: e9b61f19858a5 ("thp: reintroduce split_huge_page()") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Jerome Glisse Cc: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/huge_memory.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index adacfe66cf3d9..09d7e9c4de89f 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2280,7 +2280,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma, } } -static void freeze_page(struct page *page) +static void unmap_page(struct page *page) { enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS | TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD; @@ -2295,7 +2295,7 @@ static void freeze_page(struct page *page) VM_BUG_ON_PAGE(!unmap_success, page); } -static void unfreeze_page(struct page *page) +static void remap_page(struct page *page) { int i; if (PageTransHuge(page)) { @@ -2412,7 +2412,7 @@ static void __split_huge_page(struct page *page, struct list_head *list, spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags); - unfreeze_page(head); + remap_page(head); for (i = 0; i < HPAGE_PMD_NR; i++) { struct page *subpage = head + i; @@ -2593,7 +2593,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) } /* - * Racy check if we can split the page, before freeze_page() will + * Racy check if we can split the page, before unmap_page() will * split PMDs */ if (!can_split_huge_page(head, &extra_pins)) { @@ -2602,7 +2602,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) } mlocked = PageMlocked(page); - freeze_page(head); + unmap_page(head); VM_BUG_ON_PAGE(compound_mapcount(head), head); /* Make sure the page is not on per-CPU pagevec as it takes pin */ @@ -2659,7 +2659,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) fail: if (mapping) spin_unlock(&mapping->tree_lock); spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags); - unfreeze_page(head); + remap_page(head); ret = -EBUSY; } -- GitLab From 30241d721f90ea1cd783b79049024379d4a5b4f0 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 5 Apr 2018 16:23:28 -0700 Subject: [PATCH 1791/2269] mm/huge_memory.c: reorder operations in __split_huge_page_tail() commit 605ca5ede7643a01f4c4a15913f9714ac297f8a6 upstream. THP split makes non-atomic change of tail page flags. This is almost ok because tail pages are locked and isolated but this breaks recent changes in page locking: non-atomic operation could clear bit PG_waiters. As a result concurrent sequence get_page_unless_zero() -> lock_page() might block forever. Especially if this page was truncated later. Fix is trivial: clone flags before unfreezing page reference counter. This race exists since commit 62906027091f ("mm: add PageWaiters indicating tasks are waiting for a page bit") while unsave unfreeze itself was added in commit 8df651c7059e ("thp: cleanup split_huge_page()"). clear_compound_head() also must be called before unfreezing page reference because after successful get_page_unless_zero() might follow put_page() which needs correct compound_head(). And replace page_ref_inc()/page_ref_add() with page_ref_unfreeze() which is made especially for that and has semantic of smp_store_release(). Link: http://lkml.kernel.org/r/151844393341.210639.13162088407980624477.stgit@buzz Signed-off-by: Konstantin Khlebnikov Acked-by: Kirill A. Shutemov Cc: Michal Hocko Cc: Nicholas Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/huge_memory.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 09d7e9c4de89f..8969f5cf31742 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2312,26 +2312,13 @@ static void __split_huge_page_tail(struct page *head, int tail, struct page *page_tail = head + tail; VM_BUG_ON_PAGE(atomic_read(&page_tail->_mapcount) != -1, page_tail); - VM_BUG_ON_PAGE(page_ref_count(page_tail) != 0, page_tail); /* - * tail_page->_refcount is zero and not changing from under us. But - * get_page_unless_zero() may be running from under us on the - * tail_page. If we used atomic_set() below instead of atomic_inc() or - * atomic_add(), we would then run atomic_set() concurrently with - * get_page_unless_zero(), and atomic_set() is implemented in C not - * using locked ops. spin_unlock on x86 sometime uses locked ops - * because of PPro errata 66, 92, so unless somebody can guarantee - * atomic_set() here would be safe on all archs (and not only on x86), - * it's safer to use atomic_inc()/atomic_add(). + * Clone page flags before unfreezing refcount. + * + * After successful get_page_unless_zero() might follow flags change, + * for exmaple lock_page() which set PG_waiters. */ - if (PageAnon(head) && !PageSwapCache(head)) { - page_ref_inc(page_tail); - } else { - /* Additional pin to radix tree */ - page_ref_add(page_tail, 2); - } - page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; page_tail->flags |= (head->flags & ((1L << PG_referenced) | @@ -2344,14 +2331,21 @@ static void __split_huge_page_tail(struct page *head, int tail, (1L << PG_unevictable) | (1L << PG_dirty))); - /* - * After clearing PageTail the gup refcount can be released. - * Page flags also must be visible before we make the page non-compound. - */ + /* Page flags must be visible before we make the page non-compound. */ smp_wmb(); + /* + * Clear PageTail before unfreezing page refcount. + * + * After successful get_page_unless_zero() might follow put_page() + * which needs correct compound_head(). + */ clear_compound_head(page_tail); + /* Finally unfreeze refcount. Additional reference from page cache. */ + page_ref_unfreeze(page_tail, 1 + (!PageAnon(head) || + PageSwapCache(head))); + if (page_is_young(head)) set_page_young(page_tail); if (page_is_idle(head)) -- GitLab From 16d07443b27746fc819600fbe79742335a471d47 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:16 -0800 Subject: [PATCH 1792/2269] mm/huge_memory: splitting set mapping+index before unfreeze commit 173d9d9fd3ddae84c110fea8aedf1f26af6be9ec upstream. Huge tmpfs stress testing has occasionally hit shmem_undo_range()'s VM_BUG_ON_PAGE(page_to_pgoff(page) != index, page). Move the setting of mapping and index up before the page_ref_unfreeze() in __split_huge_page_tail() to fix this: so that a page cache lookup cannot get a reference while the tail's mapping and index are unstable. In fact, might as well move them up before the smp_wmb(): I don't see an actual need for that, but if I'm missing something, this way round is safer than the other, and no less efficient. You might argue that VM_BUG_ON_PAGE(page_to_pgoff(page) != index, page) is misplaced, and should be left until after the trylock_page(); but left as is has not crashed since, and gives more stringent assurance. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261516380.2275@eggly.anvils Fixes: e9b61f19858a5 ("thp: reintroduce split_huge_page()") Requires: 605ca5ede764 ("mm/huge_memory.c: reorder operations in __split_huge_page_tail()") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Jerome Glisse Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/huge_memory.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 8969f5cf31742..69ffeb439b0fe 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2331,6 +2331,12 @@ static void __split_huge_page_tail(struct page *head, int tail, (1L << PG_unevictable) | (1L << PG_dirty))); + /* ->mapping in first tail page is compound_mapcount */ + VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING, + page_tail); + page_tail->mapping = head->mapping; + page_tail->index = head->index + tail; + /* Page flags must be visible before we make the page non-compound. */ smp_wmb(); @@ -2351,12 +2357,6 @@ static void __split_huge_page_tail(struct page *head, int tail, if (page_is_idle(head)) set_page_idle(page_tail); - /* ->mapping in first tail page is compound_mapcount */ - VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING, - page_tail); - page_tail->mapping = head->mapping; - - page_tail->index = head->index + tail; page_cpupid_xchg_last(page_tail, page_cpupid_last(head)); lru_add_page_tail(head, page_tail, lruvec, list); } -- GitLab From 6f75a09833839caef7450275cc227dcef1679493 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:21 -0800 Subject: [PATCH 1793/2269] mm/huge_memory: fix lockdep complaint on 32-bit i_size_read() commit 006d3ff27e884f80bd7d306b041afc415f63598f upstream. Huge tmpfs testing, on 32-bit kernel with lockdep enabled, showed that __split_huge_page() was using i_size_read() while holding the irq-safe lru_lock and page tree lock, but the 32-bit i_size_read() uses an irq-unsafe seqlock which should not be nested inside them. Instead, read the i_size earlier in split_huge_page_to_list(), and pass the end offset down to __split_huge_page(): all while holding head page lock, which is enough to prevent truncation of that extent before the page tree lock has been taken. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261520070.2275@eggly.anvils Fixes: baa355fd33142 ("thp: file pages support for split_huge_page()") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Jerome Glisse Cc: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/huge_memory.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 69ffeb439b0fe..930f2aa3bb4d3 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2362,12 +2362,11 @@ static void __split_huge_page_tail(struct page *head, int tail, } static void __split_huge_page(struct page *page, struct list_head *list, - unsigned long flags) + pgoff_t end, unsigned long flags) { struct page *head = compound_head(page); struct zone *zone = page_zone(head); struct lruvec *lruvec; - pgoff_t end = -1; int i; lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat); @@ -2375,9 +2374,6 @@ static void __split_huge_page(struct page *page, struct list_head *list, /* complete memcg works before add pages to LRU */ mem_cgroup_split_huge_fixup(head); - if (!PageAnon(page)) - end = DIV_ROUND_UP(i_size_read(head->mapping->host), PAGE_SIZE); - for (i = HPAGE_PMD_NR - 1; i >= 1; i--) { __split_huge_page_tail(head, i, lruvec, list); /* Some pages can be beyond i_size: drop them from page cache */ @@ -2549,6 +2545,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) int count, mapcount, extra_pins, ret; bool mlocked; unsigned long flags; + pgoff_t end; VM_BUG_ON_PAGE(is_huge_zero_page(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page); @@ -2571,6 +2568,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) ret = -EBUSY; goto out; } + end = -1; mapping = NULL; anon_vma_lock_write(anon_vma); } else { @@ -2584,6 +2582,15 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) anon_vma = NULL; i_mmap_lock_read(mapping); + + /* + *__split_huge_page() may need to trim off pages beyond EOF: + * but on 32-bit, i_size_read() takes an irq-unsafe seqlock, + * which cannot be nested inside the page tree lock. So note + * end now: i_size itself may be changed at any moment, but + * head page lock is good enough to serialize the trimming. + */ + end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE); } /* @@ -2633,7 +2640,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) if (mapping) __dec_node_page_state(page, NR_SHMEM_THPS); spin_unlock(&pgdata->split_queue_lock); - __split_huge_page(page, list, flags); + __split_huge_page(page, list, end, flags); if (PageSwapCache(head)) { swp_entry_t entry = { .val = page_private(head) }; -- GitLab From 81d2848c99cc4951eb3f7c3ce4313e830b07cc2b Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:25 -0800 Subject: [PATCH 1794/2269] mm/khugepaged: collapse_shmem() stop if punched or truncated commit 701270fa193aadf00bdcf607738f64997275d4c7 upstream. Huge tmpfs testing showed that although collapse_shmem() recognizes a concurrently truncated or hole-punched page correctly, its handling of holes was liable to refill an emptied extent. Add check to stop that. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261522040.2275@eggly.anvils Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages") Signed-off-by: Hugh Dickins Reviewed-by: Matthew Wilcox Cc: Kirill A. Shutemov Cc: Jerome Glisse Cc: Konstantin Khlebnikov Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/khugepaged.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 0a5bb3e8a8a3c..d4a06afbeda42 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1352,6 +1352,16 @@ static void collapse_shmem(struct mm_struct *mm, radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { int n = min(iter.index, end) - index; + /* + * Stop if extent has been hole-punched, and is now completely + * empty (the more obvious i_size_read() check would take an + * irq-unsafe seqlock on 32-bit). + */ + if (n >= HPAGE_PMD_NR) { + result = SCAN_TRUNCATED; + goto tree_locked; + } + /* * Handle holes in the radix tree: charge it from shmem and * insert relevant subpage of new_page into the radix-tree. @@ -1463,6 +1473,11 @@ out_unlock: if (result == SCAN_SUCCEED && index < end) { int n = end - index; + /* Stop if extent has been truncated, and is now empty */ + if (n >= HPAGE_PMD_NR) { + result = SCAN_TRUNCATED; + goto tree_locked; + } if (!shmem_charge(mapping->host, n)) { result = SCAN_FAIL; goto tree_locked; -- GitLab From 98f1ae169c2e7264b198c3df08612f1bcd0fcfac Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:29 -0800 Subject: [PATCH 1795/2269] mm/khugepaged: fix crashes due to misaccounted holes commit aaa52e340073b7f4593b3c4ddafcafa70cf838b5 upstream. Huge tmpfs testing on a shortish file mapped into a pmd-rounded extent hit shmem_evict_inode()'s WARN_ON(inode->i_blocks) followed by clear_inode()'s BUG_ON(inode->i_data.nrpages) when the file was later closed and unlinked. khugepaged's collapse_shmem() was forgetting to update mapping->nrpages on the rollback path, after it had added but then needs to undo some holes. There is indeed an irritating asymmetry between shmem_charge(), whose callers want it to increment nrpages after successfully accounting blocks, and shmem_uncharge(), when __delete_from_page_cache() already decremented nrpages itself: oh well, just add a comment on that to them both. And shmem_recalc_inode() is supposed to be called when the accounting is expected to be in balance (so it can deduce from imbalance that reclaim discarded some pages): so change shmem_charge() to update nrpages earlier (though it's rare for the difference to matter at all). Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261523450.2275@eggly.anvils Fixes: 800d8c63b2e98 ("shmem: add huge pages support") Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Jerome Glisse Cc: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/khugepaged.c | 4 +++- mm/shmem.c | 6 +++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index d4a06afbeda42..be7b0863e33c4 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1539,8 +1539,10 @@ tree_unlocked: *hpage = NULL; } else { /* Something went wrong: rollback changes to the radix-tree */ - shmem_uncharge(mapping->host, nr_none); spin_lock_irq(&mapping->tree_lock); + mapping->nrpages -= nr_none; + shmem_uncharge(mapping->host, nr_none); + radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { if (iter.index >= end) diff --git a/mm/shmem.c b/mm/shmem.c index fa08f56fd5e52..177fef62cbbd8 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -296,12 +296,14 @@ bool shmem_charge(struct inode *inode, long pages) if (!shmem_inode_acct_block(inode, pages)) return false; + /* nrpages adjustment first, then shmem_recalc_inode() when balanced */ + inode->i_mapping->nrpages += pages; + spin_lock_irqsave(&info->lock, flags); info->alloced += pages; inode->i_blocks += pages * BLOCKS_PER_PAGE; shmem_recalc_inode(inode); spin_unlock_irqrestore(&info->lock, flags); - inode->i_mapping->nrpages += pages; return true; } @@ -311,6 +313,8 @@ void shmem_uncharge(struct inode *inode, long pages) struct shmem_inode_info *info = SHMEM_I(inode); unsigned long flags; + /* nrpages adjustment done by __delete_from_page_cache() or caller */ + spin_lock_irqsave(&info->lock, flags); info->alloced -= pages; inode->i_blocks -= pages * BLOCKS_PER_PAGE; -- GitLab From 5021918a515cd86fb8715ec0f24ae66f15f5aba5 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:35 -0800 Subject: [PATCH 1796/2269] mm/khugepaged: collapse_shmem() remember to clear holes commit 2af8ff291848cc4b1cce24b6c943394eb2c761e8 upstream. Huge tmpfs testing reminds us that there is no __GFP_ZERO in the gfp flags khugepaged uses to allocate a huge page - in all common cases it would just be a waste of effort - so collapse_shmem() must remember to clear out any holes that it instantiates. The obvious place to do so, where they are put into the page cache tree, is not a good choice: because interrupts are disabled there. Leave it until further down, once success is assured, where the other pages are copied (before setting PageUptodate). Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261525080.2275@eggly.anvils Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Jerome Glisse Cc: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/khugepaged.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index be7b0863e33c4..f535dbe3f9f52 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1502,7 +1502,12 @@ tree_unlocked: * Replacing old pages with new one has succeed, now we need to * copy the content and free old pages. */ + index = start; list_for_each_entry_safe(page, tmp, &pagelist, lru) { + while (index < page->index) { + clear_highpage(new_page + (index % HPAGE_PMD_NR)); + index++; + } copy_highpage(new_page + (page->index % HPAGE_PMD_NR), page); list_del(&page->lru); @@ -1512,6 +1517,11 @@ tree_unlocked: ClearPageActive(page); ClearPageUnevictable(page); put_page(page); + index++; + } + while (index < end) { + clear_highpage(new_page + (index % HPAGE_PMD_NR)); + index++; } local_irq_save(flags); -- GitLab From b447a6adf423cd80862a5944e4e9d9c91afff05a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:39 -0800 Subject: [PATCH 1797/2269] mm/khugepaged: minor reorderings in collapse_shmem() commit 042a30824871fa3149b0127009074b75cc25863c upstream. Several cleanups in collapse_shmem(): most of which probably do not really matter, beyond doing things in a more familiar and reassuring order. Simplify the failure gotos in the main loop, and on success update stats while interrupts still disabled from the last iteration. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261526400.2275@eggly.anvils Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Jerome Glisse Cc: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/khugepaged.c | 73 ++++++++++++++++++++----------------------------- 1 file changed, 30 insertions(+), 43 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index f535dbe3f9f52..0eac4344477a9 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1333,13 +1333,12 @@ static void collapse_shmem(struct mm_struct *mm, goto out; } + __SetPageLocked(new_page); + __SetPageSwapBacked(new_page); new_page->index = start; new_page->mapping = mapping; - __SetPageSwapBacked(new_page); - __SetPageLocked(new_page); BUG_ON(!page_ref_freeze(new_page, 1)); - /* * At this point the new_page is 'frozen' (page_count() is zero), locked * and not up-to-date. It's safe to insert it into radix tree, because @@ -1368,13 +1367,13 @@ static void collapse_shmem(struct mm_struct *mm, */ if (n && !shmem_charge(mapping->host, n)) { result = SCAN_FAIL; - break; + goto tree_locked; } - nr_none += n; for (; index < min(iter.index, end); index++) { radix_tree_insert(&mapping->page_tree, index, new_page + (index % HPAGE_PMD_NR)); } + nr_none += n; /* We are done. */ if (index >= end) @@ -1390,12 +1389,12 @@ static void collapse_shmem(struct mm_struct *mm, result = SCAN_FAIL; goto tree_unlocked; } - spin_lock_irq(&mapping->tree_lock); } else if (trylock_page(page)) { get_page(page); + spin_unlock_irq(&mapping->tree_lock); } else { result = SCAN_PAGE_LOCK; - break; + goto tree_locked; } /* @@ -1410,11 +1409,10 @@ static void collapse_shmem(struct mm_struct *mm, result = SCAN_TRUNCATED; goto out_unlock; } - spin_unlock_irq(&mapping->tree_lock); if (isolate_lru_page(page)) { result = SCAN_DEL_PAGE_LRU; - goto out_isolate_failed; + goto out_unlock; } if (page_mapped(page)) @@ -1436,7 +1434,9 @@ static void collapse_shmem(struct mm_struct *mm, */ if (!page_ref_freeze(page, 3)) { result = SCAN_PAGE_COUNT; - goto out_lru; + spin_unlock_irq(&mapping->tree_lock); + putback_lru_page(page); + goto out_unlock; } /* @@ -1452,17 +1452,10 @@ static void collapse_shmem(struct mm_struct *mm, slot = radix_tree_iter_resume(slot, &iter); index++; continue; -out_lru: - spin_unlock_irq(&mapping->tree_lock); - putback_lru_page(page); -out_isolate_failed: - unlock_page(page); - put_page(page); - goto tree_unlocked; out_unlock: unlock_page(page); put_page(page); - break; + goto tree_unlocked; } /* @@ -1470,7 +1463,7 @@ out_unlock: * This code only triggers if there's nothing in radix tree * beyond 'end'. */ - if (result == SCAN_SUCCEED && index < end) { + if (index < end) { int n = end - index; /* Stop if extent has been truncated, and is now empty */ @@ -1482,7 +1475,6 @@ out_unlock: result = SCAN_FAIL; goto tree_locked; } - for (; index < end; index++) { radix_tree_insert(&mapping->page_tree, index, new_page + (index % HPAGE_PMD_NR)); @@ -1490,14 +1482,19 @@ out_unlock: nr_none += n; } + __inc_node_page_state(new_page, NR_SHMEM_THPS); + if (nr_none) { + struct zone *zone = page_zone(new_page); + + __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none); + __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none); + } + tree_locked: spin_unlock_irq(&mapping->tree_lock); tree_unlocked: if (result == SCAN_SUCCEED) { - unsigned long flags; - struct zone *zone = page_zone(new_page); - /* * Replacing old pages with new one has succeed, now we need to * copy the content and free old pages. @@ -1511,11 +1508,11 @@ tree_unlocked: copy_highpage(new_page + (page->index % HPAGE_PMD_NR), page); list_del(&page->lru); - unlock_page(page); - page_ref_unfreeze(page, 1); page->mapping = NULL; + page_ref_unfreeze(page, 1); ClearPageActive(page); ClearPageUnevictable(page); + unlock_page(page); put_page(page); index++; } @@ -1524,28 +1521,17 @@ tree_unlocked: index++; } - local_irq_save(flags); - __inc_node_page_state(new_page, NR_SHMEM_THPS); - if (nr_none) { - __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none); - __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none); - } - local_irq_restore(flags); - - /* - * Remove pte page tables, so we can re-faulti - * the page as huge. - */ - retract_page_tables(mapping, start); - /* Everything is ready, let's unfreeze the new_page */ - set_page_dirty(new_page); SetPageUptodate(new_page); page_ref_unfreeze(new_page, HPAGE_PMD_NR); + set_page_dirty(new_page); mem_cgroup_commit_charge(new_page, memcg, false, true); lru_cache_add_anon(new_page); - unlock_page(new_page); + /* + * Remove pte page tables, so we can re-fault the page as huge. + */ + retract_page_tables(mapping, start); *hpage = NULL; } else { /* Something went wrong: rollback changes to the radix-tree */ @@ -1578,8 +1564,8 @@ tree_unlocked: slot, page); slot = radix_tree_iter_resume(slot, &iter); spin_unlock_irq(&mapping->tree_lock); - putback_lru_page(page); unlock_page(page); + putback_lru_page(page); spin_lock_irq(&mapping->tree_lock); } VM_BUG_ON(nr_none); @@ -1588,9 +1574,10 @@ tree_unlocked: /* Unfreeze new_page, caller would take care about freeing it */ page_ref_unfreeze(new_page, 1); mem_cgroup_cancel_charge(new_page, memcg, true); - unlock_page(new_page); new_page->mapping = NULL; } + + unlock_page(new_page); out: VM_BUG_ON(!list_empty(&pagelist)); /* TODO: tracepoints */ -- GitLab From 84c55f8d40102855f3b86ffdfad6b5e4da517858 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:43 -0800 Subject: [PATCH 1798/2269] mm/khugepaged: collapse_shmem() without freezing new_page commit 87c460a0bded56195b5eb497d44709777ef7b415 upstream. khugepaged's collapse_shmem() does almost all of its work, to assemble the huge new_page from 512 scattered old pages, with the new_page's refcount frozen to 0 (and refcounts of all old pages so far also frozen to 0). Including shmem_getpage() to read in any which were out on swap, memory reclaim if necessary to allocate their intermediate pages, and copying over all the data from old to new. Imagine the frozen refcount as a spinlock held, but without any lock debugging to highlight the abuse: it's not good, and under serious load heads into lockups - speculative getters of the page are not expecting to spin while khugepaged is rescheduled. One can get a little further under load by hacking around elsewhere; but fortunately, freezing the new_page turns out to have been entirely unnecessary, with no hacks needed elsewhere. The huge new_page lock is already held throughout, and guards all its subpages as they are brought one by one into the page cache tree; and anything reading the data in that page, without the lock, before it has been marked PageUptodate, would already be in the wrong. So simply eliminate the freezing of the new_page. Each of the old pages remains frozen with refcount 0 after it has been replaced by a new_page subpage in the page cache tree, until they are all unfrozen on success or failure: just as before. They could be unfrozen sooner, but cause no problem once no longer visible to find_get_entry(), filemap_map_pages() and other speculative lookups. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261527570.2275@eggly.anvils Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Jerome Glisse Cc: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/khugepaged.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 0eac4344477a9..45bf0e5775f70 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1288,7 +1288,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) * collapse_shmem - collapse small tmpfs/shmem pages into huge one. * * Basic scheme is simple, details are more complex: - * - allocate and freeze a new huge page; + * - allocate and lock a new huge page; * - scan over radix tree replacing old pages the new one * + swap in pages if necessary; * + fill in gaps; @@ -1296,11 +1296,11 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) * - if replacing succeed: * + copy data over; * + free old pages; - * + unfreeze huge page; + * + unlock huge page; * - if replacing failed; * + put all pages back and unfreeze them; * + restore gaps in the radix-tree; - * + free huge page; + * + unlock and free huge page; */ static void collapse_shmem(struct mm_struct *mm, struct address_space *mapping, pgoff_t start, @@ -1337,13 +1337,11 @@ static void collapse_shmem(struct mm_struct *mm, __SetPageSwapBacked(new_page); new_page->index = start; new_page->mapping = mapping; - BUG_ON(!page_ref_freeze(new_page, 1)); /* - * At this point the new_page is 'frozen' (page_count() is zero), locked - * and not up-to-date. It's safe to insert it into radix tree, because - * nobody would be able to map it or use it in other way until we - * unfreeze it. + * At this point the new_page is locked and not up-to-date. + * It's safe to insert it into the page cache, because nobody would + * be able to map it or use it in another way until we unlock it. */ index = start; @@ -1521,9 +1519,8 @@ tree_unlocked: index++; } - /* Everything is ready, let's unfreeze the new_page */ SetPageUptodate(new_page); - page_ref_unfreeze(new_page, HPAGE_PMD_NR); + page_ref_add(new_page, HPAGE_PMD_NR - 1); set_page_dirty(new_page); mem_cgroup_commit_charge(new_page, memcg, false, true); lru_cache_add_anon(new_page); @@ -1571,8 +1568,6 @@ tree_unlocked: VM_BUG_ON(nr_none); spin_unlock_irq(&mapping->tree_lock); - /* Unfreeze new_page, caller would take care about freeing it */ - page_ref_unfreeze(new_page, 1); mem_cgroup_cancel_charge(new_page, memcg, true); new_page->mapping = NULL; } -- GitLab From 8f85b74fb1c0a7fa267f828a90b582e27fdf56e7 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 30 Nov 2018 14:10:47 -0800 Subject: [PATCH 1799/2269] mm/khugepaged: collapse_shmem() do not crash on Compound commit 06a5e1268a5fb9c2b346a3da6b97e85f2eba0f07 upstream. collapse_shmem()'s VM_BUG_ON_PAGE(PageTransCompound) was unsafe: before it holds page lock of the first page, racing truncation then extension might conceivably have inserted a hugepage there already. Fail with the SCAN_PAGE_COMPOUND result, instead of crashing (CONFIG_DEBUG_VM=y) or otherwise mishandling the unexpected hugepage - though later we might code up a more constructive way of handling it, with SCAN_SUCCESS. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261529310.2275@eggly.anvils Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages") Signed-off-by: Hugh Dickins Cc: Kirill A. Shutemov Cc: Jerome Glisse Cc: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/khugepaged.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 45bf0e5775f70..d27a73737f1ad 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1401,7 +1401,15 @@ static void collapse_shmem(struct mm_struct *mm, */ VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(!PageUptodate(page), page); - VM_BUG_ON_PAGE(PageTransCompound(page), page); + + /* + * If file was truncated then extended, or hole-punched, before + * we locked the first page, then a THP might be there already. + */ + if (PageTransCompound(page)) { + result = SCAN_PAGE_COMPOUND; + goto out_unlock; + } if (page_mapping(page) != mapping) { result = SCAN_TRUNCATED; -- GitLab From 30cdc0c3bac950bebd3ba59f5ff980cdd3710e0f Mon Sep 17 00:00:00 2001 From: Matthias Schwarzott Date: Mon, 30 Oct 2017 06:07:29 -0400 Subject: [PATCH 1800/2269] media: em28xx: Fix use-after-free when disconnecting [ Upstream commit 910b0797fa9e8af09c44a3fa36cb310ba7a7218d ] Fix bug by moving the i2c_unregister_device calls after deregistration of dvb frontend. The new style i2c drivers already destroys the frontend object at i2c_unregister_device time. When the dvb frontend is unregistered afterwards it leads to this oops: [ 6058.866459] BUG: unable to handle kernel NULL pointer dereference at 00000000000001f8 [ 6058.866578] IP: dvb_frontend_stop+0x30/0xd0 [dvb_core] [ 6058.866644] PGD 0 [ 6058.866646] P4D 0 [ 6058.866726] Oops: 0000 [#1] SMP [ 6058.866768] Modules linked in: rc_pinnacle_pctv_hd(O) em28xx_rc(O) si2157(O) si2168(O) em28xx_dvb(O) em28xx(O) si2165(O) a8293(O) tda10071(O) tea5767(O) tuner(O) cx23885(O) tda18271(O) videobuf2_dvb(O) videobuf2_dma_sg(O) m88ds3103(O) tveeprom(O) cx2341x(O) v4l2_common(O) dvb_core(O) rc_core(O) videobuf2_memops(O) videobuf2_v4l2(O) videobuf2_core(O) videodev(O) media(O) bluetooth ecdh_generic ums_realtek uas rtl8192cu rtl_usb rtl8192c_common rtlwifi usb_storage snd_hda_codec_realtek snd_hda_codec_hdmi snd_hda_codec_generic i2c_mux snd_hda_intel snd_hda_codec snd_hwdep x86_pkg_temp_thermal snd_hda_core kvm_intel kvm irqbypass [last unloaded: videobuf2_memops] [ 6058.867497] CPU: 2 PID: 7349 Comm: kworker/2:0 Tainted: G W O 4.13.9-gentoo #1 [ 6058.867595] Hardware name: MEDION E2050 2391/H81H3-EM2, BIOS H81EM2W08.308 08/25/2014 [ 6058.867692] Workqueue: usb_hub_wq hub_event [ 6058.867746] task: ffff88011a15e040 task.stack: ffffc90003074000 [ 6058.867825] RIP: 0010:dvb_frontend_stop+0x30/0xd0 [dvb_core] [ 6058.867896] RSP: 0018:ffffc90003077b58 EFLAGS: 00010293 [ 6058.867964] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 000000010040001f [ 6058.868056] RDX: ffff88011a15e040 RSI: ffffea000464e400 RDI: ffff88001cbe3028 [ 6058.868150] RBP: ffffc90003077b68 R08: ffff880119390380 R09: 000000010040001f [ 6058.868241] R10: ffffc90003077b18 R11: 000000000001e200 R12: ffff88001cbe3028 [ 6058.868330] R13: ffff88001cbe68d0 R14: ffff8800cf734000 R15: ffff8800cf734098 [ 6058.868419] FS: 0000000000000000(0000) GS:ffff88011fb00000(0000) knlGS:0000000000000000 [ 6058.868511] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 6058.868578] CR2: 00000000000001f8 CR3: 00000001113c5000 CR4: 00000000001406e0 [ 6058.868662] Call Trace: [ 6058.868705] dvb_unregister_frontend+0x2a/0x80 [dvb_core] [ 6058.868774] em28xx_dvb_fini+0x132/0x220 [em28xx_dvb] [ 6058.868840] em28xx_close_extension+0x34/0x90 [em28xx] [ 6058.868902] em28xx_usb_disconnect+0x4e/0x70 [em28xx] [ 6058.868968] usb_unbind_interface+0x6d/0x260 [ 6058.869025] device_release_driver_internal+0x150/0x210 [ 6058.869094] device_release_driver+0xd/0x10 [ 6058.869150] bus_remove_device+0xe4/0x160 [ 6058.869204] device_del+0x1ce/0x2f0 [ 6058.869253] usb_disable_device+0x99/0x270 [ 6058.869306] usb_disconnect+0x8d/0x260 [ 6058.869359] hub_event+0x93d/0x1520 [ 6058.869408] ? dequeue_task_fair+0xae5/0xd20 [ 6058.869467] process_one_work+0x1d9/0x3e0 [ 6058.869522] worker_thread+0x43/0x3e0 [ 6058.869576] kthread+0x104/0x140 [ 6058.869602] ? trace_event_raw_event_workqueue_work+0x80/0x80 [ 6058.869640] ? kthread_create_on_node+0x40/0x40 [ 6058.869673] ret_from_fork+0x22/0x30 [ 6058.869698] Code: 54 49 89 fc 53 48 8b 9f 18 03 00 00 0f 1f 44 00 00 41 83 bc 24 04 05 00 00 02 74 0c 41 c7 84 24 04 05 00 00 01 00 00 00 0f ae f0 <48> 8b bb f8 01 00 00 48 85 ff 74 5c e8 df 40 f0 e0 48 8b 93 f8 [ 6058.869850] RIP: dvb_frontend_stop+0x30/0xd0 [dvb_core] RSP: ffffc90003077b58 [ 6058.869894] CR2: 00000000000001f8 [ 6058.875880] ---[ end trace 717eecf7193b3fc6 ]--- Signed-off-by: Matthias Schwarzott Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/usb/em28xx/em28xx-dvb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/usb/em28xx/em28xx-dvb.c b/drivers/media/usb/em28xx/em28xx-dvb.c index 4a7db623fe291..29cdaaf1ed90b 100644 --- a/drivers/media/usb/em28xx/em28xx-dvb.c +++ b/drivers/media/usb/em28xx/em28xx-dvb.c @@ -2105,6 +2105,8 @@ static int em28xx_dvb_fini(struct em28xx *dev) } } + em28xx_unregister_dvb(dvb); + /* remove I2C SEC */ client = dvb->i2c_client_sec; if (client) { @@ -2126,7 +2128,6 @@ static int em28xx_dvb_fini(struct em28xx *dev) i2c_unregister_device(client); } - em28xx_unregister_dvb(dvb); kfree(dvb); dev->dvb = NULL; kref_put(&dev->ref, em28xx_free_device); -- GitLab From ba9606d0121b644019d0699b5973926cc1a91c10 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Tue, 12 Jun 2018 09:33:16 +0200 Subject: [PATCH 1801/2269] ubi: Initialize Fastmap checkmapping correctly commit 25677478474a91fa1b46f19a4a591a9848bca6fb upstream We cannot do it last, otherwithse it will be skipped for dynamic volumes. Reported-by: Lachmann, Juergen Fixes: 34653fd8c46e ("ubi: fastmap: Check each mapping only once") Signed-off-by: Richard Weinberger Signed-off-by: Sudip Mukherjee Signed-off-by: Sasha Levin --- drivers/mtd/ubi/vtbl.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index 94d7a865b135a..7504f430c011b 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -578,6 +578,16 @@ static int init_volumes(struct ubi_device *ubi, vol->ubi = ubi; reserved_pebs += vol->reserved_pebs; + /* + * We use ubi->peb_count and not vol->reserved_pebs because + * we want to keep the code simple. Otherwise we'd have to + * resize/check the bitmap upon volume resize too. + * Allocating a few bytes more does not hurt. + */ + err = ubi_fastmap_init_checkmap(vol, ubi->peb_count); + if (err) + return err; + /* * In case of dynamic volume UBI knows nothing about how many * data is stored there. So assume the whole volume is used. @@ -620,16 +630,6 @@ static int init_volumes(struct ubi_device *ubi, (long long)(vol->used_ebs - 1) * vol->usable_leb_size; vol->used_bytes += av->last_data_size; vol->last_eb_bytes = av->last_data_size; - - /* - * We use ubi->peb_count and not vol->reserved_pebs because - * we want to keep the code simple. Otherwise we'd have to - * resize/check the bitmap upon volume resize too. - * Allocating a few bytes more does not hurt. - */ - err = ubi_fastmap_init_checkmap(vol, ubi->peb_count); - if (err) - return err; } /* And add the layout volume */ -- GitLab From 2fd0d0f9bb59ec5b628622e30f22158cb564c4ea Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 26 Jul 2018 15:17:46 +0200 Subject: [PATCH 1802/2269] libceph: store ceph_auth_handshake pointer in ceph_connection commit 262614c4294d33b1f19e0d18c0091d9c329b544a upstream. We already copy authorizer_reply_buf and authorizer_reply_buf_len into ceph_connection. Factoring out __prepare_write_connect() requires two more: authorizer_buf and authorizer_buf_len. Store the pointer to the handshake in con->auth rather than piling on. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- include/linux/ceph/messenger.h | 3 +- net/ceph/messenger.c | 54 ++++++++++++++++------------------ 2 files changed, 27 insertions(+), 30 deletions(-) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index ead9d85f1c111..9056077c023f2 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -203,9 +203,8 @@ struct ceph_connection { attempt for this connection, client */ u32 peer_global_seq; /* peer's global seq for this connection */ + struct ceph_auth_handshake *auth; int auth_retry; /* true if we need a newer authorizer */ - void *auth_reply_buf; /* where to put the authorizer reply */ - int auth_reply_buf_len; struct mutex mutex; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 5281da82371a6..3a82e6d2864b6 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1411,24 +1411,26 @@ static void prepare_write_keepalive(struct ceph_connection *con) * Connection negotiation. */ -static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection *con, - int *auth_proto) +static int get_connect_authorizer(struct ceph_connection *con) { struct ceph_auth_handshake *auth; + int auth_proto; if (!con->ops->get_authorizer) { + con->auth = NULL; con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN; con->out_connect.authorizer_len = 0; - return NULL; + return 0; } - auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry); + auth = con->ops->get_authorizer(con, &auth_proto, con->auth_retry); if (IS_ERR(auth)) - return auth; + return PTR_ERR(auth); - con->auth_reply_buf = auth->authorizer_reply_buf; - con->auth_reply_buf_len = auth->authorizer_reply_buf_len; - return auth; + con->auth = auth; + con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto); + con->out_connect.authorizer_len = cpu_to_le32(auth->authorizer_buf_len); + return 0; } /* @@ -1448,8 +1450,7 @@ static int prepare_write_connect(struct ceph_connection *con) { unsigned int global_seq = get_global_seq(con->msgr, 0); int proto; - int auth_proto; - struct ceph_auth_handshake *auth; + int ret; switch (con->peer_name.type) { case CEPH_ENTITY_TYPE_MON: @@ -1476,20 +1477,15 @@ static int prepare_write_connect(struct ceph_connection *con) con->out_connect.protocol_version = cpu_to_le32(proto); con->out_connect.flags = 0; - auth_proto = CEPH_AUTH_UNKNOWN; - auth = get_connect_authorizer(con, &auth_proto); - if (IS_ERR(auth)) - return PTR_ERR(auth); - - con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto); - con->out_connect.authorizer_len = auth ? - cpu_to_le32(auth->authorizer_buf_len) : 0; + ret = get_connect_authorizer(con); + if (ret) + return ret; con_out_kvec_add(con, sizeof (con->out_connect), &con->out_connect); - if (auth && auth->authorizer_buf_len) - con_out_kvec_add(con, auth->authorizer_buf_len, - auth->authorizer_buf); + if (con->auth) + con_out_kvec_add(con, con->auth->authorizer_buf_len, + con->auth->authorizer_buf); con->out_more = 0; con_flag_set(con, CON_FLAG_WRITE_PENDING); @@ -1753,11 +1749,14 @@ static int read_partial_connect(struct ceph_connection *con) if (ret <= 0) goto out; - size = le32_to_cpu(con->in_reply.authorizer_len); - end += size; - ret = read_partial(con, end, size, con->auth_reply_buf); - if (ret <= 0) - goto out; + if (con->auth) { + size = le32_to_cpu(con->in_reply.authorizer_len); + end += size; + ret = read_partial(con, end, size, + con->auth->authorizer_reply_buf); + if (ret <= 0) + goto out; + } dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n", con, (int)con->in_reply.tag, @@ -1765,7 +1764,6 @@ static int read_partial_connect(struct ceph_connection *con) le32_to_cpu(con->in_reply.global_seq)); out: return ret; - } /* @@ -2048,7 +2046,7 @@ static int process_connect(struct ceph_connection *con) dout("process_connect on %p tag %d\n", con, (int)con->in_tag); - if (con->auth_reply_buf) { + if (con->auth) { /* * Any connection that defines ->get_authorizer() * should also define ->verify_authorizer_reply(). -- GitLab From 66abd96062b627f0ee20a684ebba48cec80233d7 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 26 Jul 2018 17:43:47 +0200 Subject: [PATCH 1803/2269] libceph: factor out __prepare_write_connect() commit c0f56b483aa09c99bfe97409a43ad786f33b8a5a upstream. Will be used for sending ceph_msg_connect with an updated authorizer, after the server challenges the initial authorizer. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- net/ceph/messenger.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 3a82e6d2864b6..0b121327d32f3 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1446,6 +1446,17 @@ static void prepare_write_banner(struct ceph_connection *con) con_flag_set(con, CON_FLAG_WRITE_PENDING); } +static void __prepare_write_connect(struct ceph_connection *con) +{ + con_out_kvec_add(con, sizeof(con->out_connect), &con->out_connect); + if (con->auth) + con_out_kvec_add(con, con->auth->authorizer_buf_len, + con->auth->authorizer_buf); + + con->out_more = 0; + con_flag_set(con, CON_FLAG_WRITE_PENDING); +} + static int prepare_write_connect(struct ceph_connection *con) { unsigned int global_seq = get_global_seq(con->msgr, 0); @@ -1481,15 +1492,7 @@ static int prepare_write_connect(struct ceph_connection *con) if (ret) return ret; - con_out_kvec_add(con, sizeof (con->out_connect), - &con->out_connect); - if (con->auth) - con_out_kvec_add(con, con->auth->authorizer_buf_len, - con->auth->authorizer_buf); - - con->out_more = 0; - con_flag_set(con, CON_FLAG_WRITE_PENDING); - + __prepare_write_connect(con); return 0; } -- GitLab From 0858417b5c2e7bcc0b9e52c4b76e2af3d69e138b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 26 Jul 2018 18:05:43 +0200 Subject: [PATCH 1804/2269] libceph: factor out __ceph_x_decrypt() commit c571fe24d243bfe7017f0e67fe800b3cc2a1d1f7 upstream. Will be used for decrypting the server challenge which is only preceded by ceph_x_encrypt_header. Drop struct_v check to allow for extending ceph_x_encrypt_header in the future. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- net/ceph/auth_x.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 2f4a1baf5f528..9cac05239346a 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -70,25 +70,40 @@ static int ceph_x_encrypt(struct ceph_crypto_key *secret, void *buf, return sizeof(u32) + ciphertext_len; } +static int __ceph_x_decrypt(struct ceph_crypto_key *secret, void *p, + int ciphertext_len) +{ + struct ceph_x_encrypt_header *hdr = p; + int plaintext_len; + int ret; + + ret = ceph_crypt(secret, false, p, ciphertext_len, ciphertext_len, + &plaintext_len); + if (ret) + return ret; + + if (le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC) { + pr_err("%s bad magic\n", __func__); + return -EINVAL; + } + + return plaintext_len - sizeof(*hdr); +} + static int ceph_x_decrypt(struct ceph_crypto_key *secret, void **p, void *end) { - struct ceph_x_encrypt_header *hdr = *p + sizeof(u32); - int ciphertext_len, plaintext_len; + int ciphertext_len; int ret; ceph_decode_32_safe(p, end, ciphertext_len, e_inval); ceph_decode_need(p, end, ciphertext_len, e_inval); - ret = ceph_crypt(secret, false, *p, end - *p, ciphertext_len, - &plaintext_len); - if (ret) + ret = __ceph_x_decrypt(secret, *p, ciphertext_len); + if (ret < 0) return ret; - if (hdr->struct_v != 1 || le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC) - return -EPERM; - *p += ciphertext_len; - return plaintext_len - sizeof(struct ceph_x_encrypt_header); + return ret; e_inval: return -EINVAL; -- GitLab From a55056e1523990e1ef9d70531ffaf27fd1ddff55 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 27 Jul 2018 16:37:54 +0200 Subject: [PATCH 1805/2269] libceph: factor out encrypt_authorizer() commit 149cac4a50b0b4081b38b2f38de6ef71c27eaa85 upstream. Will be used for encrypting both the initial and updated authorizers. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- net/ceph/auth_x.c | 49 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 9cac05239346a..722791f45b2ac 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -290,6 +290,38 @@ bad: return -EINVAL; } +/* + * Encode and encrypt the second part (ceph_x_authorize_b) of the + * authorizer. The first part (ceph_x_authorize_a) should already be + * encoded. + */ +static int encrypt_authorizer(struct ceph_x_authorizer *au) +{ + struct ceph_x_authorize_a *msg_a; + struct ceph_x_authorize_b *msg_b; + void *p, *end; + int ret; + + msg_a = au->buf->vec.iov_base; + WARN_ON(msg_a->ticket_blob.secret_id != cpu_to_le64(au->secret_id)); + p = (void *)(msg_a + 1) + le32_to_cpu(msg_a->ticket_blob.blob_len); + end = au->buf->vec.iov_base + au->buf->vec.iov_len; + + msg_b = p + ceph_x_encrypt_offset(); + msg_b->struct_v = 1; + msg_b->nonce = cpu_to_le64(au->nonce); + + ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b)); + if (ret < 0) + return ret; + + p += ret; + WARN_ON(p > end); + au->buf->vec.iov_len = p - au->buf->vec.iov_base; + + return 0; +} + static void ceph_x_authorizer_cleanup(struct ceph_x_authorizer *au) { ceph_crypto_key_destroy(&au->session_key); @@ -306,7 +338,6 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, int maxlen; struct ceph_x_authorize_a *msg_a; struct ceph_x_authorize_b *msg_b; - void *p, *end; int ret; int ticket_blob_len = (th->ticket_blob ? th->ticket_blob->vec.iov_len : 0); @@ -350,21 +381,13 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, dout(" th %p secret_id %lld %lld\n", th, th->secret_id, le64_to_cpu(msg_a->ticket_blob.secret_id)); - p = msg_a + 1; - p += ticket_blob_len; - end = au->buf->vec.iov_base + au->buf->vec.iov_len; - - msg_b = p + ceph_x_encrypt_offset(); - msg_b->struct_v = 1; get_random_bytes(&au->nonce, sizeof(au->nonce)); - msg_b->nonce = cpu_to_le64(au->nonce); - ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b)); - if (ret < 0) + ret = encrypt_authorizer(au); + if (ret) { + pr_err("failed to encrypt authorizer: %d", ret); goto out_au; + } - p += ret; - WARN_ON(p > end); - au->buf->vec.iov_len = p - au->buf->vec.iov_base; dout(" built authorizer nonce %llx len %d\n", au->nonce, (int)au->buf->vec.iov_len); return 0; -- GitLab From 3fd73c8a71f299e30359a63add1f33e3fd834831 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 27 Jul 2018 19:18:34 +0200 Subject: [PATCH 1806/2269] libceph: add authorizer challenge commit 6daca13d2e72bedaaacfc08f873114c9307d5aea upstream. When a client authenticates with a service, an authorizer is sent with a nonce to the service (ceph_x_authorize_[ab]) and the service responds with a mutation of that nonce (ceph_x_authorize_reply). This lets the client verify the service is who it says it is but it doesn't protect against a replay: someone can trivially capture the exchange and reuse the same authorizer to authenticate themselves. Allow the service to reject an initial authorizer with a random challenge (ceph_x_authorize_challenge). The client then has to respond with an updated authorizer proving they are able to decrypt the service's challenge and that the new authorizer was produced for this specific connection instance. The accepting side requires this challenge and response unconditionally if the client side advertises they have CEPHX_V2 feature bit. This addresses CVE-2018-1128. Link: http://tracker.ceph.com/issues/24836 Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/ceph/mds_client.c | 11 ++++++ include/linux/ceph/auth.h | 8 ++++ include/linux/ceph/messenger.h | 3 ++ include/linux/ceph/msgr.h | 2 +- net/ceph/auth.c | 16 ++++++++ net/ceph/auth_x.c | 72 +++++++++++++++++++++++++++++++--- net/ceph/auth_x_protocol.h | 7 ++++ net/ceph/messenger.c | 17 +++++++- net/ceph/osd_client.c | 11 ++++++ 9 files changed, 140 insertions(+), 7 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index bf378ddca4dba..a48984dd64262 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4079,6 +4079,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con, return auth; } +static int add_authorizer_challenge(struct ceph_connection *con, + void *challenge_buf, int challenge_buf_len) +{ + struct ceph_mds_session *s = con->private; + struct ceph_mds_client *mdsc = s->s_mdsc; + struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; + + return ceph_auth_add_authorizer_challenge(ac, s->s_auth.authorizer, + challenge_buf, challenge_buf_len); +} static int verify_authorizer_reply(struct ceph_connection *con) { @@ -4142,6 +4152,7 @@ static const struct ceph_connection_operations mds_con_ops = { .put = con_put, .dispatch = dispatch, .get_authorizer = get_authorizer, + .add_authorizer_challenge = add_authorizer_challenge, .verify_authorizer_reply = verify_authorizer_reply, .invalidate_authorizer = invalidate_authorizer, .peer_reset = peer_reset, diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h index e931da8424a44..6728c2ee0205d 100644 --- a/include/linux/ceph/auth.h +++ b/include/linux/ceph/auth.h @@ -64,6 +64,10 @@ struct ceph_auth_client_ops { /* ensure that an existing authorizer is up to date */ int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type, struct ceph_auth_handshake *auth); + int (*add_authorizer_challenge)(struct ceph_auth_client *ac, + struct ceph_authorizer *a, + void *challenge_buf, + int challenge_buf_len); int (*verify_authorizer_reply)(struct ceph_auth_client *ac, struct ceph_authorizer *a); void (*invalidate_authorizer)(struct ceph_auth_client *ac, @@ -118,6 +122,10 @@ void ceph_auth_destroy_authorizer(struct ceph_authorizer *a); extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac, int peer_type, struct ceph_auth_handshake *a); +int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac, + struct ceph_authorizer *a, + void *challenge_buf, + int challenge_buf_len); extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, struct ceph_authorizer *a); extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 9056077c023f2..18fbe910ed552 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -31,6 +31,9 @@ struct ceph_connection_operations { struct ceph_auth_handshake *(*get_authorizer) ( struct ceph_connection *con, int *proto, int force_new); + int (*add_authorizer_challenge)(struct ceph_connection *con, + void *challenge_buf, + int challenge_buf_len); int (*verify_authorizer_reply) (struct ceph_connection *con); int (*invalidate_authorizer)(struct ceph_connection *con); diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h index 73ae2a9265485..9e50aede46c83 100644 --- a/include/linux/ceph/msgr.h +++ b/include/linux/ceph/msgr.h @@ -91,7 +91,7 @@ struct ceph_entity_inst { #define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */ #define CEPH_MSGR_TAG_KEEPALIVE2 14 /* keepalive2 byte + ceph_timespec */ #define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */ - +#define CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER 16 /* cephx v2 doing server challenge */ /* * connection negotiation diff --git a/net/ceph/auth.c b/net/ceph/auth.c index dbde2b3c3c15d..fbeee068ea149 100644 --- a/net/ceph/auth.c +++ b/net/ceph/auth.c @@ -315,6 +315,22 @@ int ceph_auth_update_authorizer(struct ceph_auth_client *ac, } EXPORT_SYMBOL(ceph_auth_update_authorizer); +int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac, + struct ceph_authorizer *a, + void *challenge_buf, + int challenge_buf_len) +{ + int ret = 0; + + mutex_lock(&ac->mutex); + if (ac->ops && ac->ops->add_authorizer_challenge) + ret = ac->ops->add_authorizer_challenge(ac, a, challenge_buf, + challenge_buf_len); + mutex_unlock(&ac->mutex); + return ret; +} +EXPORT_SYMBOL(ceph_auth_add_authorizer_challenge); + int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, struct ceph_authorizer *a) { diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 722791f45b2ac..ce28bb07d8fd7 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -295,7 +295,8 @@ bad: * authorizer. The first part (ceph_x_authorize_a) should already be * encoded. */ -static int encrypt_authorizer(struct ceph_x_authorizer *au) +static int encrypt_authorizer(struct ceph_x_authorizer *au, + u64 *server_challenge) { struct ceph_x_authorize_a *msg_a; struct ceph_x_authorize_b *msg_b; @@ -308,16 +309,28 @@ static int encrypt_authorizer(struct ceph_x_authorizer *au) end = au->buf->vec.iov_base + au->buf->vec.iov_len; msg_b = p + ceph_x_encrypt_offset(); - msg_b->struct_v = 1; + msg_b->struct_v = 2; msg_b->nonce = cpu_to_le64(au->nonce); + if (server_challenge) { + msg_b->have_challenge = 1; + msg_b->server_challenge_plus_one = + cpu_to_le64(*server_challenge + 1); + } else { + msg_b->have_challenge = 0; + msg_b->server_challenge_plus_one = 0; + } ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b)); if (ret < 0) return ret; p += ret; - WARN_ON(p > end); - au->buf->vec.iov_len = p - au->buf->vec.iov_base; + if (server_challenge) { + WARN_ON(p != end); + } else { + WARN_ON(p > end); + au->buf->vec.iov_len = p - au->buf->vec.iov_base; + } return 0; } @@ -382,7 +395,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, le64_to_cpu(msg_a->ticket_blob.secret_id)); get_random_bytes(&au->nonce, sizeof(au->nonce)); - ret = encrypt_authorizer(au); + ret = encrypt_authorizer(au, NULL); if (ret) { pr_err("failed to encrypt authorizer: %d", ret); goto out_au; @@ -664,6 +677,54 @@ static int ceph_x_update_authorizer( return 0; } +static int decrypt_authorize_challenge(struct ceph_x_authorizer *au, + void *challenge_buf, + int challenge_buf_len, + u64 *server_challenge) +{ + struct ceph_x_authorize_challenge *ch = + challenge_buf + sizeof(struct ceph_x_encrypt_header); + int ret; + + /* no leading len */ + ret = __ceph_x_decrypt(&au->session_key, challenge_buf, + challenge_buf_len); + if (ret < 0) + return ret; + if (ret < sizeof(*ch)) { + pr_err("bad size %d for ceph_x_authorize_challenge\n", ret); + return -EINVAL; + } + + *server_challenge = le64_to_cpu(ch->server_challenge); + return 0; +} + +static int ceph_x_add_authorizer_challenge(struct ceph_auth_client *ac, + struct ceph_authorizer *a, + void *challenge_buf, + int challenge_buf_len) +{ + struct ceph_x_authorizer *au = (void *)a; + u64 server_challenge; + int ret; + + ret = decrypt_authorize_challenge(au, challenge_buf, challenge_buf_len, + &server_challenge); + if (ret) { + pr_err("failed to decrypt authorize challenge: %d", ret); + return ret; + } + + ret = encrypt_authorizer(au, &server_challenge); + if (ret) { + pr_err("failed to encrypt authorizer w/ challenge: %d", ret); + return ret; + } + + return 0; +} + static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, struct ceph_authorizer *a) { @@ -816,6 +877,7 @@ static const struct ceph_auth_client_ops ceph_x_ops = { .handle_reply = ceph_x_handle_reply, .create_authorizer = ceph_x_create_authorizer, .update_authorizer = ceph_x_update_authorizer, + .add_authorizer_challenge = ceph_x_add_authorizer_challenge, .verify_authorizer_reply = ceph_x_verify_authorizer_reply, .invalidate_authorizer = ceph_x_invalidate_authorizer, .reset = ceph_x_reset, diff --git a/net/ceph/auth_x_protocol.h b/net/ceph/auth_x_protocol.h index 32c13d763b9a4..24b0b74564d0c 100644 --- a/net/ceph/auth_x_protocol.h +++ b/net/ceph/auth_x_protocol.h @@ -70,6 +70,13 @@ struct ceph_x_authorize_a { struct ceph_x_authorize_b { __u8 struct_v; __le64 nonce; + __u8 have_challenge; + __le64 server_challenge_plus_one; +} __attribute__ ((packed)); + +struct ceph_x_authorize_challenge { + __u8 struct_v; + __le64 server_challenge; } __attribute__ ((packed)); struct ceph_x_authorize_reply { diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 0b121327d32f3..ad33baa2008de 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2052,9 +2052,24 @@ static int process_connect(struct ceph_connection *con) if (con->auth) { /* * Any connection that defines ->get_authorizer() - * should also define ->verify_authorizer_reply(). + * should also define ->add_authorizer_challenge() and + * ->verify_authorizer_reply(). + * * See get_connect_authorizer(). */ + if (con->in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) { + ret = con->ops->add_authorizer_challenge( + con, con->auth->authorizer_reply_buf, + le32_to_cpu(con->in_reply.authorizer_len)); + if (ret < 0) + return ret; + + con_out_kvec_reset(con); + __prepare_write_connect(con); + prepare_read_connect(con); + return 0; + } + ret = con->ops->verify_authorizer_reply(con); if (ret < 0) { con->error_msg = "bad authorize reply"; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 2814dba5902d7..53ea2d48896cb 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -5292,6 +5292,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con, return auth; } +static int add_authorizer_challenge(struct ceph_connection *con, + void *challenge_buf, int challenge_buf_len) +{ + struct ceph_osd *o = con->private; + struct ceph_osd_client *osdc = o->o_osdc; + struct ceph_auth_client *ac = osdc->client->monc.auth; + + return ceph_auth_add_authorizer_challenge(ac, o->o_auth.authorizer, + challenge_buf, challenge_buf_len); +} static int verify_authorizer_reply(struct ceph_connection *con) { @@ -5341,6 +5351,7 @@ static const struct ceph_connection_operations osd_con_ops = { .put = put_osd_con, .dispatch = dispatch, .get_authorizer = get_authorizer, + .add_authorizer_challenge = add_authorizer_challenge, .verify_authorizer_reply = verify_authorizer_reply, .invalidate_authorizer = invalidate_authorizer, .alloc_msg = alloc_msg, -- GitLab From b16d0c5d32468a0624505a7b6b211e20488295e9 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 27 Jul 2018 19:25:32 +0200 Subject: [PATCH 1807/2269] libceph: implement CEPHX_V2 calculation mode commit cc255c76c70f7a87d97939621eae04b600d9f4a1 upstream. Derive the signature from the entire buffer (both AES cipher blocks) instead of using just the first half of the first block, leaving out data_crc entirely. This addresses CVE-2018-1129. Link: http://tracker.ceph.com/issues/24837 Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- include/linux/ceph/ceph_features.h | 7 +-- net/ceph/auth_x.c | 73 +++++++++++++++++++++++------- 2 files changed, 60 insertions(+), 20 deletions(-) diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 59042d5ac5202..70f42eef813b7 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -165,9 +165,9 @@ DEFINE_CEPH_FEATURE(58, 1, FS_FILE_LAYOUT_V2) // overlap DEFINE_CEPH_FEATURE(59, 1, FS_BTIME) DEFINE_CEPH_FEATURE(59, 1, FS_CHANGE_ATTR) // overlap DEFINE_CEPH_FEATURE(59, 1, MSG_ADDR2) // overlap -DEFINE_CEPH_FEATURE(60, 1, BLKIN_TRACING) // *do not share this bit* +DEFINE_CEPH_FEATURE(60, 1, OSD_RECOVERY_DELETES) // *do not share this bit* +DEFINE_CEPH_FEATURE(61, 1, CEPHX_V2) // *do not share this bit* -DEFINE_CEPH_FEATURE(61, 1, RESERVED2) // unused, but slow down! DEFINE_CEPH_FEATURE(62, 1, RESERVED) // do not use; used as a sentinal DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facing @@ -209,7 +209,8 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin CEPH_FEATURE_SERVER_JEWEL | \ CEPH_FEATURE_MON_STATEFUL_SUB | \ CEPH_FEATURE_CRUSH_TUNABLES5 | \ - CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING) + CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING | \ + CEPH_FEATURE_CEPHX_V2) #define CEPH_FEATURES_REQUIRED_DEFAULT \ (CEPH_FEATURE_NOSRCADDR | \ diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index ce28bb07d8fd7..10eb759bbcb49 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -803,26 +804,64 @@ static int calc_signature(struct ceph_x_authorizer *au, struct ceph_msg *msg, __le64 *psig) { void *enc_buf = au->enc_buf; - struct { - __le32 len; - __le32 header_crc; - __le32 front_crc; - __le32 middle_crc; - __le32 data_crc; - } __packed *sigblock = enc_buf + ceph_x_encrypt_offset(); int ret; - sigblock->len = cpu_to_le32(4*sizeof(u32)); - sigblock->header_crc = msg->hdr.crc; - sigblock->front_crc = msg->footer.front_crc; - sigblock->middle_crc = msg->footer.middle_crc; - sigblock->data_crc = msg->footer.data_crc; - ret = ceph_x_encrypt(&au->session_key, enc_buf, CEPHX_AU_ENC_BUF_LEN, - sizeof(*sigblock)); - if (ret < 0) - return ret; + if (!CEPH_HAVE_FEATURE(msg->con->peer_features, CEPHX_V2)) { + struct { + __le32 len; + __le32 header_crc; + __le32 front_crc; + __le32 middle_crc; + __le32 data_crc; + } __packed *sigblock = enc_buf + ceph_x_encrypt_offset(); + + sigblock->len = cpu_to_le32(4*sizeof(u32)); + sigblock->header_crc = msg->hdr.crc; + sigblock->front_crc = msg->footer.front_crc; + sigblock->middle_crc = msg->footer.middle_crc; + sigblock->data_crc = msg->footer.data_crc; + + ret = ceph_x_encrypt(&au->session_key, enc_buf, + CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock)); + if (ret < 0) + return ret; + + *psig = *(__le64 *)(enc_buf + sizeof(u32)); + } else { + struct { + __le32 header_crc; + __le32 front_crc; + __le32 front_len; + __le32 middle_crc; + __le32 middle_len; + __le32 data_crc; + __le32 data_len; + __le32 seq_lower_word; + } __packed *sigblock = enc_buf; + struct { + __le64 a, b, c, d; + } __packed *penc = enc_buf; + int ciphertext_len; + + sigblock->header_crc = msg->hdr.crc; + sigblock->front_crc = msg->footer.front_crc; + sigblock->front_len = msg->hdr.front_len; + sigblock->middle_crc = msg->footer.middle_crc; + sigblock->middle_len = msg->hdr.middle_len; + sigblock->data_crc = msg->footer.data_crc; + sigblock->data_len = msg->hdr.data_len; + sigblock->seq_lower_word = *(__le32 *)&msg->hdr.seq; + + /* no leading len, no ceph_x_encrypt_header */ + ret = ceph_crypt(&au->session_key, true, enc_buf, + CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock), + &ciphertext_len); + if (ret) + return ret; + + *psig = penc->a ^ penc->b ^ penc->c ^ penc->d; + } - *psig = *(__le64 *)(enc_buf + sizeof(u32)); return 0; } -- GitLab From 83b570c004da47b51d7417ac18d8491d9fc91420 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 15 May 2018 09:27:05 -0700 Subject: [PATCH 1808/2269] bpf: Prevent memory disambiguation attack commit af86ca4e3088fe5eacf2f7e58c01fa68ca067672 upstream. Detect code patterns where malicious 'speculative store bypass' can be used and sanitize such patterns. 39: (bf) r3 = r10 40: (07) r3 += -216 41: (79) r8 = *(u64 *)(r7 +0) // slow read 42: (7a) *(u64 *)(r10 -72) = 0 // verifier inserts this instruction 43: (7b) *(u64 *)(r8 +0) = r3 // this store becomes slow due to r8 44: (79) r1 = *(u64 *)(r6 +0) // cpu speculatively executes this load 45: (71) r2 = *(u8 *)(r1 +0) // speculatively arbitrary 'load byte' // is now sanitized Above code after x86 JIT becomes: e5: mov %rbp,%rdx e8: add $0xffffffffffffff28,%rdx ef: mov 0x0(%r13),%r14 f3: movq $0x0,-0x48(%rbp) fb: mov %rdx,0x0(%r14) ff: mov 0x0(%rbx),%rdi 103: movzbq 0x0(%rdi),%rsi Signed-off-by: Alexei Starovoitov Signed-off-by: Thomas Gleixner [bwh: Backported to 4.14: - Add bpf_verifier_env parameter to check_stack_write() - Look up stack slot_types with state->stack_slot_type[] rather than state->stack[].slot_type[] - Drop bpf_verifier_env argument to verbose() - Adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 62 +++++++++++++++++++++++++++++++++--- 2 files changed, 59 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index a3333004fd2b3..8458cc5fbce57 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -113,6 +113,7 @@ struct bpf_insn_aux_data { struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ }; int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ + int sanitize_stack_off; /* stack slot to be cleared */ bool seen; /* this insn was processed by the verifier */ }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 013b0cd1958ea..f6755fd5bae28 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -717,8 +717,9 @@ static bool is_spillable_regtype(enum bpf_reg_type type) /* check_stack_read/write functions track spill/fill of registers, * stack boundary and alignment are checked in check_mem_access() */ -static int check_stack_write(struct bpf_verifier_state *state, int off, - int size, int value_regno) +static int check_stack_write(struct bpf_verifier_env *env, + struct bpf_verifier_state *state, int off, + int size, int value_regno, int insn_idx) { int i, spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE; /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, @@ -738,8 +739,32 @@ static int check_stack_write(struct bpf_verifier_state *state, int off, state->spilled_regs[spi] = state->regs[value_regno]; state->spilled_regs[spi].live |= REG_LIVE_WRITTEN; - for (i = 0; i < BPF_REG_SIZE; i++) + for (i = 0; i < BPF_REG_SIZE; i++) { + if (state->stack_slot_type[MAX_BPF_STACK + off + i] == STACK_MISC && + !env->allow_ptr_leaks) { + int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off; + int soff = (-spi - 1) * BPF_REG_SIZE; + + /* detected reuse of integer stack slot with a pointer + * which means either llvm is reusing stack slot or + * an attacker is trying to exploit CVE-2018-3639 + * (speculative store bypass) + * Have to sanitize that slot with preemptive + * store of zero. + */ + if (*poff && *poff != soff) { + /* disallow programs where single insn stores + * into two different stack slots, since verifier + * cannot sanitize them + */ + verbose("insn %d cannot access two stack slots fp%d and fp%d", + insn_idx, *poff, soff); + return -EINVAL; + } + *poff = soff; + } state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL; + } } else { /* regular write of data into stack */ state->spilled_regs[spi] = (struct bpf_reg_state) {}; @@ -1216,7 +1241,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn verbose("attempt to corrupt spilled pointer on stack\n"); return -EACCES; } - err = check_stack_write(state, off, size, value_regno); + err = check_stack_write(env, state, off, size, + value_regno, insn_idx); } else { err = check_stack_read(state, off, size, value_regno); } @@ -4270,6 +4296,34 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) else continue; + if (type == BPF_WRITE && + env->insn_aux_data[i + delta].sanitize_stack_off) { + struct bpf_insn patch[] = { + /* Sanitize suspicious stack slot with zero. + * There are no memory dependencies for this store, + * since it's only using frame pointer and immediate + * constant of zero + */ + BPF_ST_MEM(BPF_DW, BPF_REG_FP, + env->insn_aux_data[i + delta].sanitize_stack_off, + 0), + /* the original STX instruction will immediately + * overwrite the same stack slot with appropriate value + */ + *insn, + }; + + cnt = ARRAY_SIZE(patch); + new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) continue; -- GitLab From 25f03991a5210f31491e1e2fccec7cf0d080772e Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Mon, 13 Nov 2017 10:22:45 +0200 Subject: [PATCH 1809/2269] tls: Add function to update the TLS socket configuration commit 6d88207fcfddc002afe3e2e4a455e5201089d5d9 upstream. The tx configuration is now stored in ctx->tx_conf. And sk->sk_prot is updated trough a function This will simplify things when we add rx and support for different possible tx and rx cross configurations. Signed-off-by: Ilya Lesokhin Signed-off-by: David S. Miller Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- include/net/tls.h | 2 ++ net/tls/tls_main.c | 46 ++++++++++++++++++++++++++++++++-------------- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index 86ed3dd80fe70..0c3ab2af74d3b 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -89,6 +89,8 @@ struct tls_context { void *priv_ctx; + u8 tx_conf:2; + u16 prepend_size; u16 tag_size; u16 overhead_size; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 4f2971f528dbd..191a8adee3ea8 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -46,8 +46,18 @@ MODULE_DESCRIPTION("Transport Layer Security Support"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS_TCP_ULP("tls"); -static struct proto tls_base_prot; -static struct proto tls_sw_prot; +enum { + TLS_BASE_TX, + TLS_SW_TX, + TLS_NUM_CONFIG, +}; + +static struct proto tls_prots[TLS_NUM_CONFIG]; + +static inline void update_sk_prot(struct sock *sk, struct tls_context *ctx) +{ + sk->sk_prot = &tls_prots[ctx->tx_conf]; +} int wait_on_pending_writer(struct sock *sk, long *timeo) { @@ -364,8 +374,8 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, { struct tls_crypto_info *crypto_info, tmp_crypto_info; struct tls_context *ctx = tls_get_ctx(sk); - struct proto *prot = NULL; int rc = 0; + int tx_conf; if (!optval || (optlen < sizeof(*crypto_info))) { rc = -EINVAL; @@ -422,11 +432,12 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, /* currently SW is default, we will have ethtool in future */ rc = tls_set_sw_offload(sk, ctx); - prot = &tls_sw_prot; + tx_conf = TLS_SW_TX; if (rc) goto err_crypto_info; - sk->sk_prot = prot; + ctx->tx_conf = tx_conf; + update_sk_prot(sk, ctx); goto out; err_crypto_info: @@ -488,7 +499,9 @@ static int tls_init(struct sock *sk) icsk->icsk_ulp_data = ctx; ctx->setsockopt = sk->sk_prot->setsockopt; ctx->getsockopt = sk->sk_prot->getsockopt; - sk->sk_prot = &tls_base_prot; + + ctx->tx_conf = TLS_BASE_TX; + update_sk_prot(sk, ctx); out: return rc; } @@ -499,16 +512,21 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { .init = tls_init, }; +static void build_protos(struct proto *prot, struct proto *base) +{ + prot[TLS_BASE_TX] = *base; + prot[TLS_BASE_TX].setsockopt = tls_setsockopt; + prot[TLS_BASE_TX].getsockopt = tls_getsockopt; + + prot[TLS_SW_TX] = prot[TLS_BASE_TX]; + prot[TLS_SW_TX].close = tls_sk_proto_close; + prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg; + prot[TLS_SW_TX].sendpage = tls_sw_sendpage; +} + static int __init tls_register(void) { - tls_base_prot = tcp_prot; - tls_base_prot.setsockopt = tls_setsockopt; - tls_base_prot.getsockopt = tls_getsockopt; - - tls_sw_prot = tls_base_prot; - tls_sw_prot.sendmsg = tls_sw_sendmsg; - tls_sw_prot.sendpage = tls_sw_sendpage; - tls_sw_prot.close = tls_sk_proto_close; + build_protos(tls_prots, &tcp_prot); tcp_register_ulp(&tcp_tls_ulp_ops); -- GitLab From 797b8bb47fb27ee49a3b59ad110e5264585415aa Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Mon, 13 Nov 2017 10:22:46 +0200 Subject: [PATCH 1810/2269] tls: Fix TLS ulp context leak, when TLS_TX setsockopt is not used. commit ff45d820a2df163957ad8ab459b6eb6976144c18 upstream. Previously the TLS ulp context would leak if we attached a TLS ulp to a socket but did not use the TLS_TX setsockopt, or did use it but it failed. This patch solves the issue by overriding prot[TLS_BASE_TX].close and fixing tls_sk_proto_close to work properly when its called with ctx->tx_conf == TLS_BASE_TX. This patch also removes ctx->free_resources as we can use ctx->tx_conf to obtain the relevant information. Fixes: 3c4d7559159b ('tls: kernel TLS support') Signed-off-by: Ilya Lesokhin Signed-off-by: David S. Miller [bwh: Backported to 4.14: Keep using tls_ctx_free() as introduced by the earlier backport of "tls: zero the crypto information from tls_context before freeing"] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- include/net/tls.h | 2 +- net/tls/tls_main.c | 24 ++++++++++++++++-------- net/tls/tls_sw.c | 3 +-- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index 0c3ab2af74d3b..604fd982da191 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -106,7 +106,6 @@ struct tls_context { u16 pending_open_record_frags; int (*push_pending_record)(struct sock *sk, int flags); - void (*free_resources)(struct sock *sk); void (*sk_write_space)(struct sock *sk); void (*sk_proto_close)(struct sock *sk, long timeout); @@ -131,6 +130,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); void tls_sw_close(struct sock *sk, long timeout); +void tls_sw_free_tx_resources(struct sock *sk); void tls_sk_destruct(struct sock *sk, struct tls_context *ctx); void tls_icsk_clean_acked(struct sock *sk); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 191a8adee3ea8..b5f9c578bcf09 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -249,6 +249,12 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) void (*sk_proto_close)(struct sock *sk, long timeout); lock_sock(sk); + sk_proto_close = ctx->sk_proto_close; + + if (ctx->tx_conf == TLS_BASE_TX) { + tls_ctx_free(ctx); + goto skip_tx_cleanup; + } if (!tls_complete_pending_work(sk, ctx, 0, &timeo)) tls_handle_open_record(sk, 0); @@ -265,13 +271,16 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) sg++; } } - ctx->free_resources(sk); + kfree(ctx->rec_seq); kfree(ctx->iv); - sk_proto_close = ctx->sk_proto_close; - tls_ctx_free(ctx); + if (ctx->tx_conf == TLS_SW_TX) { + tls_sw_free_tx_resources(sk); + tls_ctx_free(ctx); + } +skip_tx_cleanup: release_sock(sk); sk_proto_close(sk, timeout); } @@ -428,8 +437,6 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, ctx->sk_write_space = sk->sk_write_space; sk->sk_write_space = tls_write_space; - ctx->sk_proto_close = sk->sk_prot->close; - /* currently SW is default, we will have ethtool in future */ rc = tls_set_sw_offload(sk, ctx); tx_conf = TLS_SW_TX; @@ -499,6 +506,7 @@ static int tls_init(struct sock *sk) icsk->icsk_ulp_data = ctx; ctx->setsockopt = sk->sk_prot->setsockopt; ctx->getsockopt = sk->sk_prot->getsockopt; + ctx->sk_proto_close = sk->sk_prot->close; ctx->tx_conf = TLS_BASE_TX; update_sk_prot(sk, ctx); @@ -515,11 +523,11 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { static void build_protos(struct proto *prot, struct proto *base) { prot[TLS_BASE_TX] = *base; - prot[TLS_BASE_TX].setsockopt = tls_setsockopt; - prot[TLS_BASE_TX].getsockopt = tls_getsockopt; + prot[TLS_BASE_TX].setsockopt = tls_setsockopt; + prot[TLS_BASE_TX].getsockopt = tls_getsockopt; + prot[TLS_BASE_TX].close = tls_sk_proto_close; prot[TLS_SW_TX] = prot[TLS_BASE_TX]; - prot[TLS_SW_TX].close = tls_sk_proto_close; prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg; prot[TLS_SW_TX].sendpage = tls_sw_sendpage; } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 6ae9ca567d6ca..5996fb5756e11 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -646,7 +646,7 @@ sendpage_end: return ret; } -static void tls_sw_free_resources(struct sock *sk) +void tls_sw_free_tx_resources(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); @@ -685,7 +685,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx) } ctx->priv_ctx = (struct tls_offload_context *)sw_ctx; - ctx->free_resources = tls_sw_free_resources; crypto_info = &ctx->crypto_send.info; switch (crypto_info->cipher_type) { -- GitLab From 93f16446c8ddacbf26ae8408ecd9c9c397b8d5b4 Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Mon, 13 Nov 2017 10:22:48 +0200 Subject: [PATCH 1811/2269] tls: Avoid copying crypto_info again after cipher_type check. commit 196c31b4b54474b31dee3c30352c45c2a93e9226 upstream. Avoid copying crypto_info again after cipher_type check to avoid a TOCTOU exploits. The temporary array on the stack is removed as we don't really need it Fixes: 3c4d7559159b ('tls: kernel TLS support') Signed-off-by: Ilya Lesokhin Signed-off-by: David S. Miller [bwh: Backported to 4.14: Preserve changes made by earlier backports of "tls: return -EBUSY if crypto_info is already set" and "tls: zero the crypto information from tls_context before freeing"] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- net/tls/tls_main.c | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index b5f9c578bcf09..f88df514ad5fc 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -381,7 +381,7 @@ static int tls_getsockopt(struct sock *sk, int level, int optname, static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, unsigned int optlen) { - struct tls_crypto_info *crypto_info, tmp_crypto_info; + struct tls_crypto_info *crypto_info; struct tls_context *ctx = tls_get_ctx(sk); int rc = 0; int tx_conf; @@ -391,38 +391,33 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, goto out; } - rc = copy_from_user(&tmp_crypto_info, optval, sizeof(*crypto_info)); + crypto_info = &ctx->crypto_send.info; + /* Currently we don't support set crypto info more than one time */ + if (TLS_CRYPTO_INFO_READY(crypto_info)) { + rc = -EBUSY; + goto out; + } + + rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info)); if (rc) { rc = -EFAULT; goto out; } /* check version */ - if (tmp_crypto_info.version != TLS_1_2_VERSION) { + if (crypto_info->version != TLS_1_2_VERSION) { rc = -ENOTSUPP; - goto out; - } - - /* get user crypto info */ - crypto_info = &ctx->crypto_send.info; - - /* Currently we don't support set crypto info more than one time */ - if (TLS_CRYPTO_INFO_READY(crypto_info)) { - rc = -EBUSY; - goto out; + goto err_crypto_info; } - switch (tmp_crypto_info.cipher_type) { + switch (crypto_info->cipher_type) { case TLS_CIPHER_AES_GCM_128: { if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) { rc = -EINVAL; goto err_crypto_info; } - rc = copy_from_user( - crypto_info, - optval, - sizeof(struct tls12_crypto_info_aes_gcm_128)); - + rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info), + optlen - sizeof(*crypto_info)); if (rc) { rc = -EFAULT; goto err_crypto_info; -- GitLab From 2b8b2e76222f1241cec381eb0ec599ee33e9cd00 Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Mon, 13 Nov 2017 10:22:49 +0200 Subject: [PATCH 1812/2269] tls: don't override sk_write_space if tls_set_sw_offload fails. commit ee181e5201e640a4b92b217e9eab2531dab57d2c upstream. If we fail to enable tls in the kernel we shouldn't override the sk_write_space callback Fixes: 3c4d7559159b ('tls: kernel TLS support') Signed-off-by: Ilya Lesokhin Signed-off-by: David S. Miller Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- net/tls/tls_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index f88df514ad5fc..33187e34599bc 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -429,9 +429,6 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, goto err_crypto_info; } - ctx->sk_write_space = sk->sk_write_space; - sk->sk_write_space = tls_write_space; - /* currently SW is default, we will have ethtool in future */ rc = tls_set_sw_offload(sk, ctx); tx_conf = TLS_SW_TX; @@ -440,6 +437,8 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, ctx->tx_conf = tx_conf; update_sk_prot(sk, ctx); + ctx->sk_write_space = sk->sk_write_space; + sk->sk_write_space = tls_write_space; goto out; err_crypto_info: -- GitLab From 2a0f5919e1e6a1c0423d895ab75eb15f94a67c69 Mon Sep 17 00:00:00 2001 From: Boris Pismenny Date: Tue, 27 Feb 2018 14:18:39 +0200 Subject: [PATCH 1813/2269] tls: Use correct sk->sk_prot for IPV6 commit c113187d38ff85dc302a1bb55864b203ebb2ba10 upstream. The tls ulp overrides sk->prot with a new tls specific proto structs. The tls specific structs were previously based on the ipv4 specific tcp_prot sturct. As a result, attaching the tls ulp to an ipv6 tcp socket replaced some ipv6 callback with the ipv4 equivalents. This patch adds ipv6 tls proto structs and uses them when attached to ipv6 sockets. Fixes: 3c4d7559159b ('tls: kernel TLS support') Signed-off-by: Boris Pismenny Signed-off-by: Ilya Lesokhin Signed-off-by: David S. Miller Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- net/tls/tls_main.c | 52 +++++++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 33187e34599bc..e903bdd39b9f4 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -46,17 +46,27 @@ MODULE_DESCRIPTION("Transport Layer Security Support"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS_TCP_ULP("tls"); +enum { + TLSV4, + TLSV6, + TLS_NUM_PROTS, +}; + enum { TLS_BASE_TX, TLS_SW_TX, TLS_NUM_CONFIG, }; -static struct proto tls_prots[TLS_NUM_CONFIG]; +static struct proto *saved_tcpv6_prot; +static DEFINE_MUTEX(tcpv6_prot_mutex); +static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG]; static inline void update_sk_prot(struct sock *sk, struct tls_context *ctx) { - sk->sk_prot = &tls_prots[ctx->tx_conf]; + int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4; + + sk->sk_prot = &tls_prots[ip_ver][ctx->tx_conf]; } int wait_on_pending_writer(struct sock *sk, long *timeo) @@ -476,8 +486,21 @@ static int tls_setsockopt(struct sock *sk, int level, int optname, return do_tls_setsockopt(sk, optname, optval, optlen); } +static void build_protos(struct proto *prot, struct proto *base) +{ + prot[TLS_BASE_TX] = *base; + prot[TLS_BASE_TX].setsockopt = tls_setsockopt; + prot[TLS_BASE_TX].getsockopt = tls_getsockopt; + prot[TLS_BASE_TX].close = tls_sk_proto_close; + + prot[TLS_SW_TX] = prot[TLS_BASE_TX]; + prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg; + prot[TLS_SW_TX].sendpage = tls_sw_sendpage; +} + static int tls_init(struct sock *sk) { + int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4; struct inet_connection_sock *icsk = inet_csk(sk); struct tls_context *ctx; int rc = 0; @@ -502,6 +525,17 @@ static int tls_init(struct sock *sk) ctx->getsockopt = sk->sk_prot->getsockopt; ctx->sk_proto_close = sk->sk_prot->close; + /* Build IPv6 TLS whenever the address of tcpv6_prot changes */ + if (ip_ver == TLSV6 && + unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) { + mutex_lock(&tcpv6_prot_mutex); + if (likely(sk->sk_prot != saved_tcpv6_prot)) { + build_protos(tls_prots[TLSV6], sk->sk_prot); + smp_store_release(&saved_tcpv6_prot, sk->sk_prot); + } + mutex_unlock(&tcpv6_prot_mutex); + } + ctx->tx_conf = TLS_BASE_TX; update_sk_prot(sk, ctx); out: @@ -514,21 +548,9 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { .init = tls_init, }; -static void build_protos(struct proto *prot, struct proto *base) -{ - prot[TLS_BASE_TX] = *base; - prot[TLS_BASE_TX].setsockopt = tls_setsockopt; - prot[TLS_BASE_TX].getsockopt = tls_getsockopt; - prot[TLS_BASE_TX].close = tls_sk_proto_close; - - prot[TLS_SW_TX] = prot[TLS_BASE_TX]; - prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg; - prot[TLS_SW_TX].sendpage = tls_sw_sendpage; -} - static int __init tls_register(void) { - build_protos(tls_prots, &tcp_prot); + build_protos(tls_prots[TLSV4], &tcp_prot); tcp_register_ulp(&tcp_tls_ulp_ops); -- GitLab From 39d9e1c62e3f8b5569a0d9bf4d47a3d5d8ae02e2 Mon Sep 17 00:00:00 2001 From: Vakul Garg Date: Mon, 10 Sep 2018 22:53:46 +0530 Subject: [PATCH 1814/2269] net/tls: Fixed return value when tls_complete_pending_work() fails commit 150085791afb8054e11d2e080d4b9cd755dd7f69 upstream. In tls_sw_sendmsg() and tls_sw_sendpage(), the variable 'ret' has been set to return value of tls_complete_pending_work(). This allows return of proper error code if tls_complete_pending_work() fails. Fixes: 3c4d7559159b ("tls: kernel TLS support") Signed-off-by: Vakul Garg Signed-off-by: David S. Miller [bwh: Backported to 4.14: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 5996fb5756e11..d18d4a478e4f1 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -388,7 +388,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); - int ret = 0; + int ret; int required_size; long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); bool eor = !(msg->msg_flags & MSG_MORE); @@ -403,7 +403,8 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) lock_sock(sk); - if (tls_complete_pending_work(sk, tls_ctx, msg->msg_flags, &timeo)) + ret = tls_complete_pending_work(sk, tls_ctx, msg->msg_flags, &timeo); + if (ret) goto send_end; if (unlikely(msg->msg_controllen)) { @@ -539,7 +540,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page, { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); - int ret = 0; + int ret; long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); bool eor; size_t orig_size = size; @@ -559,7 +560,8 @@ int tls_sw_sendpage(struct sock *sk, struct page *page, sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); - if (tls_complete_pending_work(sk, tls_ctx, flags, &timeo)) + ret = tls_complete_pending_work(sk, tls_ctx, flags, &timeo); + if (ret) goto sendpage_end; /* Call the sk_stream functions to manage the sndbuf mem. */ -- GitLab From 107b02c81a8761f1f7efc1e8b54d435324ccd13e Mon Sep 17 00:00:00 2001 From: Lior David Date: Tue, 14 Nov 2017 15:25:39 +0200 Subject: [PATCH 1815/2269] wil6210: missing length check in wmi_set_ie commit b5a8ffcae4103a9d823ea3aa3a761f65779fbe2a upstream. Add a length check in wmi_set_ie to detect unsigned integer overflow. Signed-off-by: Lior David Signed-off-by: Maya Erez Signed-off-by: Kalle Valo Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/wil6210/wmi.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c index ffdd2fa401b15..d63d7c3268018 100644 --- a/drivers/net/wireless/ath/wil6210/wmi.c +++ b/drivers/net/wireless/ath/wil6210/wmi.c @@ -1380,8 +1380,14 @@ int wmi_set_ie(struct wil6210_priv *wil, u8 type, u16 ie_len, const void *ie) }; int rc; u16 len = sizeof(struct wmi_set_appie_cmd) + ie_len; - struct wmi_set_appie_cmd *cmd = kzalloc(len, GFP_KERNEL); + struct wmi_set_appie_cmd *cmd; + if (len < ie_len) { + rc = -EINVAL; + goto out; + } + + cmd = kzalloc(len, GFP_KERNEL); if (!cmd) { rc = -ENOMEM; goto out; -- GitLab From f7eef132ccc95c9af50b647c5da0511d2b8492f8 Mon Sep 17 00:00:00 2001 From: Gu Jinxiang Date: Wed, 4 Jul 2018 18:16:39 +0800 Subject: [PATCH 1816/2269] btrfs: validate type when reading a chunk commit 315409b0098fb2651d86553f0436b70502b29bb2 upstream. Reported in https://bugzilla.kernel.org/show_bug.cgi?id=199839, with an image that has an invalid chunk type but does not return an error. Add chunk type check in btrfs_check_chunk_valid, to detect the wrong type combinations. Link: https://bugzilla.kernel.org/show_bug.cgi?id=199839 Reported-by: Xu Wen Reviewed-by: Qu Wenruo Signed-off-by: Gu Jinxiang Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/volumes.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a0947f4a3e87f..cfd5728e7519c 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6353,6 +6353,8 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, u16 num_stripes; u16 sub_stripes; u64 type; + u64 features; + bool mixed = false; length = btrfs_chunk_length(leaf, chunk); stripe_len = btrfs_chunk_stripe_len(leaf, chunk); @@ -6391,6 +6393,32 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, btrfs_chunk_type(leaf, chunk)); return -EIO; } + + if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) { + btrfs_err(fs_info, "missing chunk type flag: 0x%llx", type); + return -EIO; + } + + if ((type & BTRFS_BLOCK_GROUP_SYSTEM) && + (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) { + btrfs_err(fs_info, + "system chunk with data or metadata type: 0x%llx", type); + return -EIO; + } + + features = btrfs_super_incompat_flags(fs_info->super_copy); + if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) + mixed = true; + + if (!mixed) { + if ((type & BTRFS_BLOCK_GROUP_METADATA) && + (type & BTRFS_BLOCK_GROUP_DATA)) { + btrfs_err(fs_info, + "mixed chunk type in non-mixed mode: 0x%llx", type); + return -EIO; + } + } + if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) || (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) || (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) || -- GitLab From 895586ecb7a4528336d41f81d0ce3985e8abbed6 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 1 Aug 2018 10:37:17 +0800 Subject: [PATCH 1817/2269] btrfs: Verify that every chunk has corresponding block group at mount time commit 7ef49515fa6727cb4b6f2f5b0ffbc5fc20a9f8c6 upstream. If a crafted image has missing block group items, it could cause unexpected behavior and breaks the assumption of 1:1 chunk<->block group mapping. Although we have the block group -> chunk mapping check, we still need chunk -> block group mapping check. This patch will do extra check to ensure each chunk has its corresponding block group. Link: https://bugzilla.kernel.org/show_bug.cgi?id=199847 Reported-by: Xu Wen Signed-off-by: Qu Wenruo Reviewed-by: Gu Jinxiang Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/extent-tree.c | 58 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2cb3569ac5481..fdc42eddccc2b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10092,6 +10092,62 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info, return cache; } + +/* + * Iterate all chunks and verify that each of them has the corresponding block + * group + */ +static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info) +{ + struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; + struct extent_map *em; + struct btrfs_block_group_cache *bg; + u64 start = 0; + int ret = 0; + + while (1) { + read_lock(&map_tree->map_tree.lock); + /* + * lookup_extent_mapping will return the first extent map + * intersecting the range, so setting @len to 1 is enough to + * get the first chunk. + */ + em = lookup_extent_mapping(&map_tree->map_tree, start, 1); + read_unlock(&map_tree->map_tree.lock); + if (!em) + break; + + bg = btrfs_lookup_block_group(fs_info, em->start); + if (!bg) { + btrfs_err(fs_info, + "chunk start=%llu len=%llu doesn't have corresponding block group", + em->start, em->len); + ret = -EUCLEAN; + free_extent_map(em); + break; + } + if (bg->key.objectid != em->start || + bg->key.offset != em->len || + (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) != + (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { + btrfs_err(fs_info, +"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx", + em->start, em->len, + em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK, + bg->key.objectid, bg->key.offset, + bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK); + ret = -EUCLEAN; + free_extent_map(em); + btrfs_put_block_group(bg); + break; + } + start = em->start + em->len; + free_extent_map(em); + btrfs_put_block_group(bg); + } + return ret; +} + int btrfs_read_block_groups(struct btrfs_fs_info *info) { struct btrfs_path *path; @@ -10264,7 +10320,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) } init_global_block_rsv(info); - ret = 0; + ret = check_chunk_block_group_mappings(info); error: btrfs_free_path(path); return ret; -- GitLab From a5cc85fe139c181e53cc557230c5ea4001a90c80 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 23 Aug 2017 16:57:56 +0900 Subject: [PATCH 1818/2269] btrfs: Refactor check_leaf function for later expansion commit c3267bbaa9cae09b62960eafe33ad19196803285 upstream. Current check_leaf() function does a good job checking key order and item offset/size. However it only checks from slot 0 to the last but one slot, this is good but makes later expansion hard. So this refactoring iterates from slot 0 to the last slot. For key comparison, it uses a key with all 0 as initial key, so all valid keys should be larger than that. And for item size/offset checks, it compares current item end with previous item offset. For slot 0, use leaf end as a special case. This makes later item/key offset checks and item size checks easier to be implemented. Also, makes check_leaf() to return -EUCLEAN other than -EIO to indicate error. Signed-off-by: Qu Wenruo Reviewed-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/disk-io.c | 50 +++++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0e67cee73c532..4a1e63df1183b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -554,8 +554,9 @@ static noinline int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) { struct btrfs_fs_info *fs_info = root->fs_info; + /* No valid key type is 0, so all key should be larger than this key */ + struct btrfs_key prev_key = {0, 0, 0}; struct btrfs_key key; - struct btrfs_key leaf_key; u32 nritems = btrfs_header_nritems(leaf); int slot; @@ -588,7 +589,7 @@ static noinline int check_leaf(struct btrfs_root *root, CORRUPT("non-root leaf's nritems is 0", leaf, check_root, 0); free_extent_buffer(eb); - return -EIO; + return -EUCLEAN; } free_extent_buffer(eb); } @@ -598,28 +599,23 @@ static noinline int check_leaf(struct btrfs_root *root, if (nritems == 0) return 0; - /* Check the 0 item */ - if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != - BTRFS_LEAF_DATA_SIZE(fs_info)) { - CORRUPT("invalid item offset size pair", leaf, root, 0); - return -EIO; - } - /* - * Check to make sure each items keys are in the correct order and their - * offsets make sense. We only have to loop through nritems-1 because - * we check the current slot against the next slot, which verifies the - * next slot's offset+size makes sense and that the current's slot - * offset is correct. + * Check the following things to make sure this is a good leaf, and + * leaf users won't need to bother with similar sanity checks: + * + * 1) key order + * 2) item offset and size + * No overlap, no hole, all inside the leaf. */ - for (slot = 0; slot < nritems - 1; slot++) { - btrfs_item_key_to_cpu(leaf, &leaf_key, slot); - btrfs_item_key_to_cpu(leaf, &key, slot + 1); + for (slot = 0; slot < nritems; slot++) { + u32 item_end_expected; + + btrfs_item_key_to_cpu(leaf, &key, slot); /* Make sure the keys are in the right order */ - if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) { + if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) { CORRUPT("bad key order", leaf, root, slot); - return -EIO; + return -EUCLEAN; } /* @@ -627,10 +623,14 @@ static noinline int check_leaf(struct btrfs_root *root, * item data starts at the end of the leaf and grows towards the * front. */ - if (btrfs_item_offset_nr(leaf, slot) != - btrfs_item_end_nr(leaf, slot + 1)) { + if (slot == 0) + item_end_expected = BTRFS_LEAF_DATA_SIZE(fs_info); + else + item_end_expected = btrfs_item_offset_nr(leaf, + slot - 1); + if (btrfs_item_end_nr(leaf, slot) != item_end_expected) { CORRUPT("slot offset bad", leaf, root, slot); - return -EIO; + return -EUCLEAN; } /* @@ -641,8 +641,12 @@ static noinline int check_leaf(struct btrfs_root *root, if (btrfs_item_end_nr(leaf, slot) > BTRFS_LEAF_DATA_SIZE(fs_info)) { CORRUPT("slot end outside of leaf", leaf, root, slot); - return -EIO; + return -EUCLEAN; } + + prev_key.objectid = key.objectid; + prev_key.type = key.type; + prev_key.offset = key.offset; } return 0; -- GitLab From ac6ea50bb6306ed056222a5967b2bad3c3a9e2dd Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 23 Aug 2017 16:57:57 +0900 Subject: [PATCH 1819/2269] btrfs: Check if item pointer overlaps with the item itself commit 7f43d4affb2a254d421ab20b0cf65ac2569909fb upstream. Function check_leaf() checks if any item pointer points outside of the leaf, but it doesn't check if the pointer overlaps with the item itself. Normally only the last item may be the victim, but adding such check is never a bad idea anyway. Signed-off-by: Qu Wenruo Reviewed-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/disk-io.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4a1e63df1183b..da7b2039e4cb3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -644,6 +644,13 @@ static noinline int check_leaf(struct btrfs_root *root, return -EUCLEAN; } + /* Also check if the item pointer overlaps with btrfs item. */ + if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) > + btrfs_item_ptr_offset(leaf, slot)) { + CORRUPT("slot overlap with its data", leaf, root, slot); + return -EUCLEAN; + } + prev_key.objectid = key.objectid; prev_key.type = key.type; prev_key.offset = key.offset; -- GitLab From fa5d29e6d7cbfac45e3b7bdb7bb3d48f124bbe55 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 23 Aug 2017 16:57:58 +0900 Subject: [PATCH 1820/2269] btrfs: Add sanity check for EXTENT_DATA when reading out leaf commit 40c3c40947324d9f40bf47830c92c59a9bbadf4a upstream. Add extra checks for item with EXTENT_DATA type. This checks the following thing: 0) Key offset All key offsets must be aligned to sectorsize. Inline extent must have 0 for key offset. 1) Item size Uncompressed inline file extent size must match item size. (Compressed inline file extent has no information about its on-disk size.) Regular/preallocated file extent size must be a fixed value. 2) Every member of regular file extent item Including alignment for bytenr and offset, possible value for compression/encryption/type. 3) Type/compression/encode must be one of the valid values. This should be the most comprehensive and strict check in the context of btrfs_item for EXTENT_DATA. Signed-off-by: Qu Wenruo Reviewed-by: Nikolay Borisov Reviewed-by: David Sterba [ switch to BTRFS_FILE_EXTENT_TYPES, similar to what BTRFS_COMPRESS_TYPES does ] Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/disk-io.c | 103 ++++++++++++++++++++++++++++++++ include/uapi/linux/btrfs_tree.h | 1 + 2 files changed, 104 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index da7b2039e4cb3..ab8925b2efd1b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -550,6 +550,100 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info, btrfs_header_level(eb) == 0 ? "leaf" : "node", \ reason, btrfs_header_bytenr(eb), root->objectid, slot) +static int check_extent_data_item(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + struct btrfs_file_extent_item *fi; + u32 sectorsize = root->fs_info->sectorsize; + u32 item_size = btrfs_item_size_nr(leaf, slot); + + if (!IS_ALIGNED(key->offset, sectorsize)) { + CORRUPT("unaligned key offset for file extent", + leaf, root, slot); + return -EUCLEAN; + } + + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); + + if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) { + CORRUPT("invalid file extent type", leaf, root, slot); + return -EUCLEAN; + } + + /* + * Support for new compression/encrption must introduce incompat flag, + * and must be caught in open_ctree(). + */ + if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) { + CORRUPT("invalid file extent compression", leaf, root, slot); + return -EUCLEAN; + } + if (btrfs_file_extent_encryption(leaf, fi)) { + CORRUPT("invalid file extent encryption", leaf, root, slot); + return -EUCLEAN; + } + if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) { + /* Inline extent must have 0 as key offset */ + if (key->offset) { + CORRUPT("inline extent has non-zero key offset", + leaf, root, slot); + return -EUCLEAN; + } + + /* Compressed inline extent has no on-disk size, skip it */ + if (btrfs_file_extent_compression(leaf, fi) != + BTRFS_COMPRESS_NONE) + return 0; + + /* Uncompressed inline extent size must match item size */ + if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START + + btrfs_file_extent_ram_bytes(leaf, fi)) { + CORRUPT("plaintext inline extent has invalid size", + leaf, root, slot); + return -EUCLEAN; + } + return 0; + } + + /* Regular or preallocated extent has fixed item size */ + if (item_size != sizeof(*fi)) { + CORRUPT( + "regluar or preallocated extent data item size is invalid", + leaf, root, slot); + return -EUCLEAN; + } + if (!IS_ALIGNED(btrfs_file_extent_ram_bytes(leaf, fi), sectorsize) || + !IS_ALIGNED(btrfs_file_extent_disk_bytenr(leaf, fi), sectorsize) || + !IS_ALIGNED(btrfs_file_extent_disk_num_bytes(leaf, fi), sectorsize) || + !IS_ALIGNED(btrfs_file_extent_offset(leaf, fi), sectorsize) || + !IS_ALIGNED(btrfs_file_extent_num_bytes(leaf, fi), sectorsize)) { + CORRUPT( + "regular or preallocated extent data item has unaligned value", + leaf, root, slot); + return -EUCLEAN; + } + + return 0; +} + +/* + * Common point to switch the item-specific validation. + */ +static int check_leaf_item(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + int ret = 0; + + switch (key->type) { + case BTRFS_EXTENT_DATA_KEY: + ret = check_extent_data_item(root, leaf, key, slot); + break; + } + return ret; +} + static noinline int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) { @@ -606,9 +700,13 @@ static noinline int check_leaf(struct btrfs_root *root, * 1) key order * 2) item offset and size * No overlap, no hole, all inside the leaf. + * 3) item content + * If possible, do comprehensive sanity check. + * NOTE: All checks must only rely on the item data itself. */ for (slot = 0; slot < nritems; slot++) { u32 item_end_expected; + int ret; btrfs_item_key_to_cpu(leaf, &key, slot); @@ -651,6 +749,11 @@ static noinline int check_leaf(struct btrfs_root *root, return -EUCLEAN; } + /* Check if the item size and content meet other criteria */ + ret = check_leaf_item(root, leaf, &key, slot); + if (ret < 0) + return ret; + prev_key.objectid = key.objectid; prev_key.type = key.type; prev_key.offset = key.offset; diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index 7115838fbf2a3..38ab0e06259ab 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -734,6 +734,7 @@ struct btrfs_balance_item { #define BTRFS_FILE_EXTENT_INLINE 0 #define BTRFS_FILE_EXTENT_REG 1 #define BTRFS_FILE_EXTENT_PREALLOC 2 +#define BTRFS_FILE_EXTENT_TYPES 2 struct btrfs_file_extent_item { /* -- GitLab From 64948fd63f662180c00f0fe70b623eab51108a89 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 23 Aug 2017 16:57:59 +0900 Subject: [PATCH 1821/2269] btrfs: Add checker for EXTENT_CSUM commit 4b865cab96fe2a30ed512cf667b354bd291b3b0a upstream. EXTENT_CSUM checker is a relatively easy one, only needs to check: 1) Objectid Fixed to BTRFS_EXTENT_CSUM_OBJECTID 2) Key offset alignment Must be aligned to sectorsize 3) Item size alignedment Must be aligned to csum size Signed-off-by: Qu Wenruo Reviewed-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/disk-io.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ab8925b2efd1b..53841d773a403 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -627,6 +627,27 @@ static int check_extent_data_item(struct btrfs_root *root, return 0; } +static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + u32 sectorsize = root->fs_info->sectorsize; + u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy); + + if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) { + CORRUPT("invalid objectid for csum item", leaf, root, slot); + return -EUCLEAN; + } + if (!IS_ALIGNED(key->offset, sectorsize)) { + CORRUPT("unaligned key offset for csum item", leaf, root, slot); + return -EUCLEAN; + } + if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) { + CORRUPT("unaligned csum item size", leaf, root, slot); + return -EUCLEAN; + } + return 0; +} + /* * Common point to switch the item-specific validation. */ @@ -640,6 +661,9 @@ static int check_leaf_item(struct btrfs_root *root, case BTRFS_EXTENT_DATA_KEY: ret = check_extent_data_item(root, leaf, key, slot); break; + case BTRFS_EXTENT_CSUM_KEY: + ret = check_csum_item(root, leaf, key, slot); + break; } return ret; } -- GitLab From eb3493e2476c5ce60c6a376a6b50d465f2864968 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 9 Oct 2017 01:51:02 +0000 Subject: [PATCH 1822/2269] btrfs: Move leaf and node validation checker to tree-checker.c commit 557ea5dd003d371536f6b4e8f7c8209a2b6fd4e3 upstream. It's no doubt the comprehensive tree block checker will become larger, so moving them into their own files is quite reasonable. Signed-off-by: Qu Wenruo [ wording adjustments ] Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/Makefile | 2 +- fs/btrfs/disk-io.c | 285 +----------------------------------- fs/btrfs/tree-checker.c | 309 ++++++++++++++++++++++++++++++++++++++++ fs/btrfs/tree-checker.h | 26 ++++ 4 files changed, 340 insertions(+), 282 deletions(-) create mode 100644 fs/btrfs/tree-checker.c create mode 100644 fs/btrfs/tree-checker.h diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index f2cd9dedb0373..195229df5ba06 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -10,7 +10,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \ compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ - uuid-tree.o props.o hash.o free-space-tree.o + uuid-tree.o props.o hash.o free-space-tree.o tree-checker.o btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 53841d773a403..2df5e906db08b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -50,6 +50,7 @@ #include "sysfs.h" #include "qgroup.h" #include "compression.h" +#include "tree-checker.h" #ifdef CONFIG_X86 #include @@ -544,284 +545,6 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info, return ret; } -#define CORRUPT(reason, eb, root, slot) \ - btrfs_crit(root->fs_info, \ - "corrupt %s, %s: block=%llu, root=%llu, slot=%d", \ - btrfs_header_level(eb) == 0 ? "leaf" : "node", \ - reason, btrfs_header_bytenr(eb), root->objectid, slot) - -static int check_extent_data_item(struct btrfs_root *root, - struct extent_buffer *leaf, - struct btrfs_key *key, int slot) -{ - struct btrfs_file_extent_item *fi; - u32 sectorsize = root->fs_info->sectorsize; - u32 item_size = btrfs_item_size_nr(leaf, slot); - - if (!IS_ALIGNED(key->offset, sectorsize)) { - CORRUPT("unaligned key offset for file extent", - leaf, root, slot); - return -EUCLEAN; - } - - fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); - - if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) { - CORRUPT("invalid file extent type", leaf, root, slot); - return -EUCLEAN; - } - - /* - * Support for new compression/encrption must introduce incompat flag, - * and must be caught in open_ctree(). - */ - if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) { - CORRUPT("invalid file extent compression", leaf, root, slot); - return -EUCLEAN; - } - if (btrfs_file_extent_encryption(leaf, fi)) { - CORRUPT("invalid file extent encryption", leaf, root, slot); - return -EUCLEAN; - } - if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) { - /* Inline extent must have 0 as key offset */ - if (key->offset) { - CORRUPT("inline extent has non-zero key offset", - leaf, root, slot); - return -EUCLEAN; - } - - /* Compressed inline extent has no on-disk size, skip it */ - if (btrfs_file_extent_compression(leaf, fi) != - BTRFS_COMPRESS_NONE) - return 0; - - /* Uncompressed inline extent size must match item size */ - if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START + - btrfs_file_extent_ram_bytes(leaf, fi)) { - CORRUPT("plaintext inline extent has invalid size", - leaf, root, slot); - return -EUCLEAN; - } - return 0; - } - - /* Regular or preallocated extent has fixed item size */ - if (item_size != sizeof(*fi)) { - CORRUPT( - "regluar or preallocated extent data item size is invalid", - leaf, root, slot); - return -EUCLEAN; - } - if (!IS_ALIGNED(btrfs_file_extent_ram_bytes(leaf, fi), sectorsize) || - !IS_ALIGNED(btrfs_file_extent_disk_bytenr(leaf, fi), sectorsize) || - !IS_ALIGNED(btrfs_file_extent_disk_num_bytes(leaf, fi), sectorsize) || - !IS_ALIGNED(btrfs_file_extent_offset(leaf, fi), sectorsize) || - !IS_ALIGNED(btrfs_file_extent_num_bytes(leaf, fi), sectorsize)) { - CORRUPT( - "regular or preallocated extent data item has unaligned value", - leaf, root, slot); - return -EUCLEAN; - } - - return 0; -} - -static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf, - struct btrfs_key *key, int slot) -{ - u32 sectorsize = root->fs_info->sectorsize; - u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy); - - if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) { - CORRUPT("invalid objectid for csum item", leaf, root, slot); - return -EUCLEAN; - } - if (!IS_ALIGNED(key->offset, sectorsize)) { - CORRUPT("unaligned key offset for csum item", leaf, root, slot); - return -EUCLEAN; - } - if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) { - CORRUPT("unaligned csum item size", leaf, root, slot); - return -EUCLEAN; - } - return 0; -} - -/* - * Common point to switch the item-specific validation. - */ -static int check_leaf_item(struct btrfs_root *root, - struct extent_buffer *leaf, - struct btrfs_key *key, int slot) -{ - int ret = 0; - - switch (key->type) { - case BTRFS_EXTENT_DATA_KEY: - ret = check_extent_data_item(root, leaf, key, slot); - break; - case BTRFS_EXTENT_CSUM_KEY: - ret = check_csum_item(root, leaf, key, slot); - break; - } - return ret; -} - -static noinline int check_leaf(struct btrfs_root *root, - struct extent_buffer *leaf) -{ - struct btrfs_fs_info *fs_info = root->fs_info; - /* No valid key type is 0, so all key should be larger than this key */ - struct btrfs_key prev_key = {0, 0, 0}; - struct btrfs_key key; - u32 nritems = btrfs_header_nritems(leaf); - int slot; - - /* - * Extent buffers from a relocation tree have a owner field that - * corresponds to the subvolume tree they are based on. So just from an - * extent buffer alone we can not find out what is the id of the - * corresponding subvolume tree, so we can not figure out if the extent - * buffer corresponds to the root of the relocation tree or not. So skip - * this check for relocation trees. - */ - if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) { - struct btrfs_root *check_root; - - key.objectid = btrfs_header_owner(leaf); - key.type = BTRFS_ROOT_ITEM_KEY; - key.offset = (u64)-1; - - check_root = btrfs_get_fs_root(fs_info, &key, false); - /* - * The only reason we also check NULL here is that during - * open_ctree() some roots has not yet been set up. - */ - if (!IS_ERR_OR_NULL(check_root)) { - struct extent_buffer *eb; - - eb = btrfs_root_node(check_root); - /* if leaf is the root, then it's fine */ - if (leaf != eb) { - CORRUPT("non-root leaf's nritems is 0", - leaf, check_root, 0); - free_extent_buffer(eb); - return -EUCLEAN; - } - free_extent_buffer(eb); - } - return 0; - } - - if (nritems == 0) - return 0; - - /* - * Check the following things to make sure this is a good leaf, and - * leaf users won't need to bother with similar sanity checks: - * - * 1) key order - * 2) item offset and size - * No overlap, no hole, all inside the leaf. - * 3) item content - * If possible, do comprehensive sanity check. - * NOTE: All checks must only rely on the item data itself. - */ - for (slot = 0; slot < nritems; slot++) { - u32 item_end_expected; - int ret; - - btrfs_item_key_to_cpu(leaf, &key, slot); - - /* Make sure the keys are in the right order */ - if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) { - CORRUPT("bad key order", leaf, root, slot); - return -EUCLEAN; - } - - /* - * Make sure the offset and ends are right, remember that the - * item data starts at the end of the leaf and grows towards the - * front. - */ - if (slot == 0) - item_end_expected = BTRFS_LEAF_DATA_SIZE(fs_info); - else - item_end_expected = btrfs_item_offset_nr(leaf, - slot - 1); - if (btrfs_item_end_nr(leaf, slot) != item_end_expected) { - CORRUPT("slot offset bad", leaf, root, slot); - return -EUCLEAN; - } - - /* - * Check to make sure that we don't point outside of the leaf, - * just in case all the items are consistent to each other, but - * all point outside of the leaf. - */ - if (btrfs_item_end_nr(leaf, slot) > - BTRFS_LEAF_DATA_SIZE(fs_info)) { - CORRUPT("slot end outside of leaf", leaf, root, slot); - return -EUCLEAN; - } - - /* Also check if the item pointer overlaps with btrfs item. */ - if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) > - btrfs_item_ptr_offset(leaf, slot)) { - CORRUPT("slot overlap with its data", leaf, root, slot); - return -EUCLEAN; - } - - /* Check if the item size and content meet other criteria */ - ret = check_leaf_item(root, leaf, &key, slot); - if (ret < 0) - return ret; - - prev_key.objectid = key.objectid; - prev_key.type = key.type; - prev_key.offset = key.offset; - } - - return 0; -} - -static int check_node(struct btrfs_root *root, struct extent_buffer *node) -{ - unsigned long nr = btrfs_header_nritems(node); - struct btrfs_key key, next_key; - int slot; - u64 bytenr; - int ret = 0; - - if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) { - btrfs_crit(root->fs_info, - "corrupt node: block %llu root %llu nritems %lu", - node->start, root->objectid, nr); - return -EIO; - } - - for (slot = 0; slot < nr - 1; slot++) { - bytenr = btrfs_node_blockptr(node, slot); - btrfs_node_key_to_cpu(node, &key, slot); - btrfs_node_key_to_cpu(node, &next_key, slot + 1); - - if (!bytenr) { - CORRUPT("invalid item slot", node, root, slot); - ret = -EIO; - goto out; - } - - if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) { - CORRUPT("bad key order", node, root, slot); - ret = -EIO; - goto out; - } - } -out: - return ret; -} - static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, u64 phy_offset, struct page *page, u64 start, u64 end, int mirror) @@ -887,12 +610,12 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, * that we don't try and read the other copies of this block, just * return -EIO. */ - if (found_level == 0 && check_leaf(root, eb)) { + if (found_level == 0 && btrfs_check_leaf(root, eb)) { set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); ret = -EIO; } - if (found_level > 0 && check_node(root, eb)) + if (found_level > 0 && btrfs_check_node(root, eb)) ret = -EIO; if (!ret) @@ -4147,7 +3870,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) buf->len, fs_info->dirty_metadata_batch); #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY - if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) { + if (btrfs_header_level(buf) == 0 && btrfs_check_leaf(root, buf)) { btrfs_print_leaf(buf); ASSERT(0); } diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c new file mode 100644 index 0000000000000..56e25a6301039 --- /dev/null +++ b/fs/btrfs/tree-checker.c @@ -0,0 +1,309 @@ +/* + * Copyright (C) Qu Wenruo 2017. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program. + */ + +/* + * The module is used to catch unexpected/corrupted tree block data. + * Such behavior can be caused either by a fuzzed image or bugs. + * + * The objective is to do leaf/node validation checks when tree block is read + * from disk, and check *every* possible member, so other code won't + * need to checking them again. + * + * Due to the potential and unwanted damage, every checker needs to be + * carefully reviewed otherwise so it does not prevent mount of valid images. + */ + +#include "ctree.h" +#include "tree-checker.h" +#include "disk-io.h" +#include "compression.h" + +#define CORRUPT(reason, eb, root, slot) \ + btrfs_crit(root->fs_info, \ + "corrupt %s, %s: block=%llu, root=%llu, slot=%d", \ + btrfs_header_level(eb) == 0 ? "leaf" : "node", \ + reason, btrfs_header_bytenr(eb), root->objectid, slot) + +static int check_extent_data_item(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + struct btrfs_file_extent_item *fi; + u32 sectorsize = root->fs_info->sectorsize; + u32 item_size = btrfs_item_size_nr(leaf, slot); + + if (!IS_ALIGNED(key->offset, sectorsize)) { + CORRUPT("unaligned key offset for file extent", + leaf, root, slot); + return -EUCLEAN; + } + + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); + + if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) { + CORRUPT("invalid file extent type", leaf, root, slot); + return -EUCLEAN; + } + + /* + * Support for new compression/encrption must introduce incompat flag, + * and must be caught in open_ctree(). + */ + if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) { + CORRUPT("invalid file extent compression", leaf, root, slot); + return -EUCLEAN; + } + if (btrfs_file_extent_encryption(leaf, fi)) { + CORRUPT("invalid file extent encryption", leaf, root, slot); + return -EUCLEAN; + } + if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) { + /* Inline extent must have 0 as key offset */ + if (key->offset) { + CORRUPT("inline extent has non-zero key offset", + leaf, root, slot); + return -EUCLEAN; + } + + /* Compressed inline extent has no on-disk size, skip it */ + if (btrfs_file_extent_compression(leaf, fi) != + BTRFS_COMPRESS_NONE) + return 0; + + /* Uncompressed inline extent size must match item size */ + if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START + + btrfs_file_extent_ram_bytes(leaf, fi)) { + CORRUPT("plaintext inline extent has invalid size", + leaf, root, slot); + return -EUCLEAN; + } + return 0; + } + + /* Regular or preallocated extent has fixed item size */ + if (item_size != sizeof(*fi)) { + CORRUPT( + "regluar or preallocated extent data item size is invalid", + leaf, root, slot); + return -EUCLEAN; + } + if (!IS_ALIGNED(btrfs_file_extent_ram_bytes(leaf, fi), sectorsize) || + !IS_ALIGNED(btrfs_file_extent_disk_bytenr(leaf, fi), sectorsize) || + !IS_ALIGNED(btrfs_file_extent_disk_num_bytes(leaf, fi), sectorsize) || + !IS_ALIGNED(btrfs_file_extent_offset(leaf, fi), sectorsize) || + !IS_ALIGNED(btrfs_file_extent_num_bytes(leaf, fi), sectorsize)) { + CORRUPT( + "regular or preallocated extent data item has unaligned value", + leaf, root, slot); + return -EUCLEAN; + } + + return 0; +} + +static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + u32 sectorsize = root->fs_info->sectorsize; + u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy); + + if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) { + CORRUPT("invalid objectid for csum item", leaf, root, slot); + return -EUCLEAN; + } + if (!IS_ALIGNED(key->offset, sectorsize)) { + CORRUPT("unaligned key offset for csum item", leaf, root, slot); + return -EUCLEAN; + } + if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) { + CORRUPT("unaligned csum item size", leaf, root, slot); + return -EUCLEAN; + } + return 0; +} + +/* + * Common point to switch the item-specific validation. + */ +static int check_leaf_item(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + int ret = 0; + + switch (key->type) { + case BTRFS_EXTENT_DATA_KEY: + ret = check_extent_data_item(root, leaf, key, slot); + break; + case BTRFS_EXTENT_CSUM_KEY: + ret = check_csum_item(root, leaf, key, slot); + break; + } + return ret; +} + +int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + /* No valid key type is 0, so all key should be larger than this key */ + struct btrfs_key prev_key = {0, 0, 0}; + struct btrfs_key key; + u32 nritems = btrfs_header_nritems(leaf); + int slot; + + /* + * Extent buffers from a relocation tree have a owner field that + * corresponds to the subvolume tree they are based on. So just from an + * extent buffer alone we can not find out what is the id of the + * corresponding subvolume tree, so we can not figure out if the extent + * buffer corresponds to the root of the relocation tree or not. So + * skip this check for relocation trees. + */ + if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) { + struct btrfs_root *check_root; + + key.objectid = btrfs_header_owner(leaf); + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + + check_root = btrfs_get_fs_root(fs_info, &key, false); + /* + * The only reason we also check NULL here is that during + * open_ctree() some roots has not yet been set up. + */ + if (!IS_ERR_OR_NULL(check_root)) { + struct extent_buffer *eb; + + eb = btrfs_root_node(check_root); + /* if leaf is the root, then it's fine */ + if (leaf != eb) { + CORRUPT("non-root leaf's nritems is 0", + leaf, check_root, 0); + free_extent_buffer(eb); + return -EUCLEAN; + } + free_extent_buffer(eb); + } + return 0; + } + + if (nritems == 0) + return 0; + + /* + * Check the following things to make sure this is a good leaf, and + * leaf users won't need to bother with similar sanity checks: + * + * 1) key ordering + * 2) item offset and size + * No overlap, no hole, all inside the leaf. + * 3) item content + * If possible, do comprehensive sanity check. + * NOTE: All checks must only rely on the item data itself. + */ + for (slot = 0; slot < nritems; slot++) { + u32 item_end_expected; + int ret; + + btrfs_item_key_to_cpu(leaf, &key, slot); + + /* Make sure the keys are in the right order */ + if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) { + CORRUPT("bad key order", leaf, root, slot); + return -EUCLEAN; + } + + /* + * Make sure the offset and ends are right, remember that the + * item data starts at the end of the leaf and grows towards the + * front. + */ + if (slot == 0) + item_end_expected = BTRFS_LEAF_DATA_SIZE(fs_info); + else + item_end_expected = btrfs_item_offset_nr(leaf, + slot - 1); + if (btrfs_item_end_nr(leaf, slot) != item_end_expected) { + CORRUPT("slot offset bad", leaf, root, slot); + return -EUCLEAN; + } + + /* + * Check to make sure that we don't point outside of the leaf, + * just in case all the items are consistent to each other, but + * all point outside of the leaf. + */ + if (btrfs_item_end_nr(leaf, slot) > + BTRFS_LEAF_DATA_SIZE(fs_info)) { + CORRUPT("slot end outside of leaf", leaf, root, slot); + return -EUCLEAN; + } + + /* Also check if the item pointer overlaps with btrfs item. */ + if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) > + btrfs_item_ptr_offset(leaf, slot)) { + CORRUPT("slot overlap with its data", leaf, root, slot); + return -EUCLEAN; + } + + /* Check if the item size and content meet other criteria */ + ret = check_leaf_item(root, leaf, &key, slot); + if (ret < 0) + return ret; + + prev_key.objectid = key.objectid; + prev_key.type = key.type; + prev_key.offset = key.offset; + } + + return 0; +} + +int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node) +{ + unsigned long nr = btrfs_header_nritems(node); + struct btrfs_key key, next_key; + int slot; + u64 bytenr; + int ret = 0; + + if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) { + btrfs_crit(root->fs_info, + "corrupt node: block %llu root %llu nritems %lu", + node->start, root->objectid, nr); + return -EIO; + } + + for (slot = 0; slot < nr - 1; slot++) { + bytenr = btrfs_node_blockptr(node, slot); + btrfs_node_key_to_cpu(node, &key, slot); + btrfs_node_key_to_cpu(node, &next_key, slot + 1); + + if (!bytenr) { + CORRUPT("invalid item slot", node, root, slot); + ret = -EIO; + goto out; + } + + if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) { + CORRUPT("bad key order", node, root, slot); + ret = -EIO; + goto out; + } + } +out: + return ret; +} diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h new file mode 100644 index 0000000000000..96c486e95d704 --- /dev/null +++ b/fs/btrfs/tree-checker.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) Qu Wenruo 2017. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program. + */ + +#ifndef __BTRFS_TREE_CHECKER__ +#define __BTRFS_TREE_CHECKER__ + +#include "ctree.h" +#include "extent_io.h" + +int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf); +int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node); + +#endif -- GitLab From b3032dc25fb4d7ffdbb61df875cc09b3ec662c85 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 9 Oct 2017 01:51:03 +0000 Subject: [PATCH 1823/2269] btrfs: tree-checker: Enhance btrfs_check_node output commit bba4f29896c986c4cec17bc0f19f2ce644fceae1 upstream. Use inline function to replace macro since we don't need stringification. (Macro still exists until all callers get updated) And add more info about the error, and replace EIO with EUCLEAN. For nr_items error, report if it's too large or too small, and output the valid value range. For node block pointer, added a new alignment checker. For key order, also output the next key to make the problem more obvious. Signed-off-by: Qu Wenruo [ wording adjustments, unindented long strings ] Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/tree-checker.c | 68 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 61 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 56e25a6301039..5acdf3355a3ff 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -37,6 +37,46 @@ btrfs_header_level(eb) == 0 ? "leaf" : "node", \ reason, btrfs_header_bytenr(eb), root->objectid, slot) +/* + * Error message should follow the following format: + * corrupt : , [, ] + * + * @type: leaf or node + * @identifier: the necessary info to locate the leaf/node. + * It's recommened to decode key.objecitd/offset if it's + * meaningful. + * @reason: describe the error + * @bad_value: optional, it's recommened to output bad value and its + * expected value (range). + * + * Since comma is used to separate the components, only space is allowed + * inside each component. + */ + +/* + * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt. + * Allows callers to customize the output. + */ +__printf(4, 5) +static void generic_err(const struct btrfs_root *root, + const struct extent_buffer *eb, int slot, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + btrfs_crit(root->fs_info, + "corrupt %s: root=%llu block=%llu slot=%d, %pV", + btrfs_header_level(eb) == 0 ? "leaf" : "node", + root->objectid, btrfs_header_bytenr(eb), slot, &vaf); + va_end(args); +} + static int check_extent_data_item(struct btrfs_root *root, struct extent_buffer *leaf, struct btrfs_key *key, int slot) @@ -282,9 +322,11 @@ int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node) if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) { btrfs_crit(root->fs_info, - "corrupt node: block %llu root %llu nritems %lu", - node->start, root->objectid, nr); - return -EIO; +"corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]", + root->objectid, node->start, + nr == 0 ? "small" : "large", nr, + BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)); + return -EUCLEAN; } for (slot = 0; slot < nr - 1; slot++) { @@ -293,14 +335,26 @@ int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node) btrfs_node_key_to_cpu(node, &next_key, slot + 1); if (!bytenr) { - CORRUPT("invalid item slot", node, root, slot); - ret = -EIO; + generic_err(root, node, slot, + "invalid NULL node pointer"); + ret = -EUCLEAN; + goto out; + } + if (!IS_ALIGNED(bytenr, root->fs_info->sectorsize)) { + generic_err(root, node, slot, + "unaligned pointer, have %llu should be aligned to %u", + bytenr, root->fs_info->sectorsize); + ret = -EUCLEAN; goto out; } if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) { - CORRUPT("bad key order", node, root, slot); - ret = -EIO; + generic_err(root, node, slot, + "bad key order, current (%llu %u %llu) next (%llu %u %llu)", + key.objectid, key.type, key.offset, + next_key.objectid, next_key.type, + next_key.offset); + ret = -EUCLEAN; goto out; } } -- GitLab From b6a07f903543e45d3660d5fbc8385f89da763c81 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 8 Nov 2017 08:54:24 +0800 Subject: [PATCH 1824/2269] btrfs: tree-checker: Fix false panic for sanity test commit 69fc6cbbac542c349b3d350d10f6e394c253c81d upstream. [BUG] If we run btrfs with CONFIG_BTRFS_FS_RUN_SANITY_TESTS=y, it will instantly cause kernel panic like: ------ ... assertion failed: 0, file: fs/btrfs/disk-io.c, line: 3853 ... Call Trace: btrfs_mark_buffer_dirty+0x187/0x1f0 [btrfs] setup_items_for_insert+0x385/0x650 [btrfs] __btrfs_drop_extents+0x129a/0x1870 [btrfs] ... ----- [Cause] Btrfs will call btrfs_check_leaf() in btrfs_mark_buffer_dirty() to check if the leaf is valid with CONFIG_BTRFS_FS_RUN_SANITY_TESTS=y. However quite some btrfs_mark_buffer_dirty() callers(*) don't really initialize its item data but only initialize its item pointers, leaving item data uninitialized. This makes tree-checker catch uninitialized data as error, causing such panic. *: These callers include but not limited to setup_items_for_insert() btrfs_split_item() btrfs_expand_item() [Fix] Add a new parameter @check_item_data to btrfs_check_leaf(). With @check_item_data set to false, item data check will be skipped and fallback to old btrfs_check_leaf() behavior. So we can still get early warning if we screw up item pointers, and avoid false panic. Cc: Filipe Manana Reported-by: Lakshmipathi.G Signed-off-by: Qu Wenruo Reviewed-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/disk-io.c | 10 ++++++++-- fs/btrfs/tree-checker.c | 27 ++++++++++++++++++++++----- fs/btrfs/tree-checker.h | 14 +++++++++++++- 3 files changed, 43 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2df5e906db08b..e42673477c25e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -610,7 +610,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, * that we don't try and read the other copies of this block, just * return -EIO. */ - if (found_level == 0 && btrfs_check_leaf(root, eb)) { + if (found_level == 0 && btrfs_check_leaf_full(root, eb)) { set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); ret = -EIO; } @@ -3870,7 +3870,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) buf->len, fs_info->dirty_metadata_batch); #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY - if (btrfs_header_level(buf) == 0 && btrfs_check_leaf(root, buf)) { + /* + * Since btrfs_mark_buffer_dirty() can be called with item pointer set + * but item data not updated. + * So here we should only check item pointers, not item data. + */ + if (btrfs_header_level(buf) == 0 && + btrfs_check_leaf_relaxed(root, buf)) { btrfs_print_leaf(buf); ASSERT(0); } diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 5acdf3355a3ff..ff4fa8f905d37 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -195,7 +195,8 @@ static int check_leaf_item(struct btrfs_root *root, return ret; } -int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) +static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf, + bool check_item_data) { struct btrfs_fs_info *fs_info = root->fs_info; /* No valid key type is 0, so all key should be larger than this key */ @@ -299,10 +300,15 @@ int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) return -EUCLEAN; } - /* Check if the item size and content meet other criteria */ - ret = check_leaf_item(root, leaf, &key, slot); - if (ret < 0) - return ret; + if (check_item_data) { + /* + * Check if the item size and content meet other + * criteria + */ + ret = check_leaf_item(root, leaf, &key, slot); + if (ret < 0) + return ret; + } prev_key.objectid = key.objectid; prev_key.type = key.type; @@ -312,6 +318,17 @@ int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) return 0; } +int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf) +{ + return check_leaf(root, leaf, true); +} + +int btrfs_check_leaf_relaxed(struct btrfs_root *root, + struct extent_buffer *leaf) +{ + return check_leaf(root, leaf, false); +} + int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node) { unsigned long nr = btrfs_header_nritems(node); diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h index 96c486e95d704..3d53e8d6fda0c 100644 --- a/fs/btrfs/tree-checker.h +++ b/fs/btrfs/tree-checker.h @@ -20,7 +20,19 @@ #include "ctree.h" #include "extent_io.h" -int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf); +/* + * Comprehensive leaf checker. + * Will check not only the item pointers, but also every possible member + * in item data. + */ +int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf); + +/* + * Less strict leaf checker. + * Will only check item pointers, not reading item data. + */ +int btrfs_check_leaf_relaxed(struct btrfs_root *root, + struct extent_buffer *leaf); int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node); #endif -- GitLab From fe09fe216e502b17716689721a5c59d6a21277ac Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 8 Nov 2017 08:54:25 +0800 Subject: [PATCH 1825/2269] btrfs: tree-checker: Add checker for dir item commit ad7b0368f33cffe67fecd302028915926e50ef7e upstream. Add checker for dir item, for key types DIR_ITEM, DIR_INDEX and XATTR_ITEM. This checker does comprehensive checks for: 1) dir_item header and its data size Against item boundary and maximum name/xattr length. This part is mostly the same as old verify_dir_item(). 2) dir_type Against maximum file types, and against key type. Since XATTR key should only have FT_XATTR dir item, and normal dir item type should not have XATTR key. The check between key->type and dir_type is newly introduced by this patch. 3) name hash For XATTR and DIR_ITEM key, key->offset is name hash (crc32c). Check the hash of the name against the key to ensure it's correct. The name hash check is only found in btrfs-progs before this patch. Signed-off-by: Qu Wenruo Reviewed-by: Nikolay Borisov Reviewed-by: Su Yue Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/tree-checker.c | 141 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index ff4fa8f905d37..b32df86de9bf6 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -30,6 +30,7 @@ #include "tree-checker.h" #include "disk-io.h" #include "compression.h" +#include "hash.h" #define CORRUPT(reason, eb, root, slot) \ btrfs_crit(root->fs_info, \ @@ -175,6 +176,141 @@ static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf, return 0; } +/* + * Customized reported for dir_item, only important new info is key->objectid, + * which represents inode number + */ +__printf(4, 5) +static void dir_item_err(const struct btrfs_root *root, + const struct extent_buffer *eb, int slot, + const char *fmt, ...) +{ + struct btrfs_key key; + struct va_format vaf; + va_list args; + + btrfs_item_key_to_cpu(eb, &key, slot); + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + btrfs_crit(root->fs_info, + "corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV", + btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid, + btrfs_header_bytenr(eb), slot, key.objectid, &vaf); + va_end(args); +} + +static int check_dir_item(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + struct btrfs_dir_item *di; + u32 item_size = btrfs_item_size_nr(leaf, slot); + u32 cur = 0; + + di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); + while (cur < item_size) { + char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)]; + u32 name_len; + u32 data_len; + u32 max_name_len; + u32 total_size; + u32 name_hash; + u8 dir_type; + + /* header itself should not cross item boundary */ + if (cur + sizeof(*di) > item_size) { + dir_item_err(root, leaf, slot, + "dir item header crosses item boundary, have %lu boundary %u", + cur + sizeof(*di), item_size); + return -EUCLEAN; + } + + /* dir type check */ + dir_type = btrfs_dir_type(leaf, di); + if (dir_type >= BTRFS_FT_MAX) { + dir_item_err(root, leaf, slot, + "invalid dir item type, have %u expect [0, %u)", + dir_type, BTRFS_FT_MAX); + return -EUCLEAN; + } + + if (key->type == BTRFS_XATTR_ITEM_KEY && + dir_type != BTRFS_FT_XATTR) { + dir_item_err(root, leaf, slot, + "invalid dir item type for XATTR key, have %u expect %u", + dir_type, BTRFS_FT_XATTR); + return -EUCLEAN; + } + if (dir_type == BTRFS_FT_XATTR && + key->type != BTRFS_XATTR_ITEM_KEY) { + dir_item_err(root, leaf, slot, + "xattr dir type found for non-XATTR key"); + return -EUCLEAN; + } + if (dir_type == BTRFS_FT_XATTR) + max_name_len = XATTR_NAME_MAX; + else + max_name_len = BTRFS_NAME_LEN; + + /* Name/data length check */ + name_len = btrfs_dir_name_len(leaf, di); + data_len = btrfs_dir_data_len(leaf, di); + if (name_len > max_name_len) { + dir_item_err(root, leaf, slot, + "dir item name len too long, have %u max %u", + name_len, max_name_len); + return -EUCLEAN; + } + if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root->fs_info)) { + dir_item_err(root, leaf, slot, + "dir item name and data len too long, have %u max %u", + name_len + data_len, + BTRFS_MAX_XATTR_SIZE(root->fs_info)); + return -EUCLEAN; + } + + if (data_len && dir_type != BTRFS_FT_XATTR) { + dir_item_err(root, leaf, slot, + "dir item with invalid data len, have %u expect 0", + data_len); + return -EUCLEAN; + } + + total_size = sizeof(*di) + name_len + data_len; + + /* header and name/data should not cross item boundary */ + if (cur + total_size > item_size) { + dir_item_err(root, leaf, slot, + "dir item data crosses item boundary, have %u boundary %u", + cur + total_size, item_size); + return -EUCLEAN; + } + + /* + * Special check for XATTR/DIR_ITEM, as key->offset is name + * hash, should match its name + */ + if (key->type == BTRFS_DIR_ITEM_KEY || + key->type == BTRFS_XATTR_ITEM_KEY) { + read_extent_buffer(leaf, namebuf, + (unsigned long)(di + 1), name_len); + name_hash = btrfs_name_hash(namebuf, name_len); + if (key->offset != name_hash) { + dir_item_err(root, leaf, slot, + "name hash mismatch with key, have 0x%016x expect 0x%016llx", + name_hash, key->offset); + return -EUCLEAN; + } + } + cur += total_size; + di = (struct btrfs_dir_item *)((void *)di + total_size); + } + return 0; +} + /* * Common point to switch the item-specific validation. */ @@ -191,6 +327,11 @@ static int check_leaf_item(struct btrfs_root *root, case BTRFS_EXTENT_CSUM_KEY: ret = check_csum_item(root, leaf, key, slot); break; + case BTRFS_DIR_ITEM_KEY: + case BTRFS_DIR_INDEX_KEY: + case BTRFS_XATTR_ITEM_KEY: + ret = check_dir_item(root, leaf, key, slot); + break; } return ret; } -- GitLab From 52ea16655aeed2571b9042c18db3eb089c37910c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 6 Dec 2017 15:18:14 +0100 Subject: [PATCH 1826/2269] btrfs: tree-checker: use %zu format string for size_t commit 7cfad65297bfe0aa2996cd72d21c898aa84436d9 upstream. The return value of sizeof() is of type size_t, so we must print it using the %z format modifier rather than %l to avoid this warning on some architectures: fs/btrfs/tree-checker.c: In function 'check_dir_item': fs/btrfs/tree-checker.c:273:50: error: format '%lu' expects argument of type 'long unsigned int', but argument 5 has type 'u32' {aka 'unsigned int'} [-Werror=format=] Fixes: 005887f2e3e0 ("btrfs: tree-checker: Add checker for dir item") Signed-off-by: Arnd Bergmann Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/tree-checker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index b32df86de9bf6..9376739a4cc81 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -223,7 +223,7 @@ static int check_dir_item(struct btrfs_root *root, /* header itself should not cross item boundary */ if (cur + sizeof(*di) > item_size) { dir_item_err(root, leaf, slot, - "dir item header crosses item boundary, have %lu boundary %u", + "dir item header crosses item boundary, have %zu boundary %u", cur + sizeof(*di), item_size); return -EUCLEAN; } -- GitLab From e07e1c7561a7e087a5e512c972aa7d3e1388c057 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 10 Jan 2018 15:13:07 +0100 Subject: [PATCH 1827/2269] btrfs: tree-check: reduce stack consumption in check_dir_item commit e2683fc9d219430f5b78889b50cde7f40efeba7b upstream. I've noticed that the updated item checker stack consumption increased dramatically in 542f5385e20cf97447 ("btrfs: tree-checker: Add checker for dir item") tree-checker.c:check_leaf +552 (176 -> 728) The array is 255 bytes long, dynamic allocation would slow down the sanity checks so it's more reasonable to keep it on-stack. Moving the variable to the scope of use reduces the stack usage again tree-checker.c:check_leaf -264 (728 -> 464) Reviewed-by: Josef Bacik Reviewed-by: Qu Wenruo Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/tree-checker.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 9376739a4cc81..f7e7a455b710e 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -212,7 +212,6 @@ static int check_dir_item(struct btrfs_root *root, di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); while (cur < item_size) { - char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)]; u32 name_len; u32 data_len; u32 max_name_len; @@ -295,6 +294,8 @@ static int check_dir_item(struct btrfs_root *root, */ if (key->type == BTRFS_DIR_ITEM_KEY || key->type == BTRFS_XATTR_ITEM_KEY) { + char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)]; + read_extent_buffer(leaf, namebuf, (unsigned long)(di + 1), name_len); name_hash = btrfs_name_hash(namebuf, name_len); -- GitLab From 9f268b5cf2d6a716779dfe11f4bc02d6461db693 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 3 Jul 2018 17:10:05 +0800 Subject: [PATCH 1828/2269] btrfs: tree-checker: Verify block_group_item commit fce466eab7ac6baa9d2dcd88abcf945be3d4a089 upstream. A crafted image with invalid block group items could make free space cache code to cause panic. We could detect such invalid block group item by checking: 1) Item size Known fixed value. 2) Block group size (key.offset) We have an upper limit on block group item (10G) 3) Chunk objectid Known fixed value. 4) Type Only 4 valid type values, DATA, METADATA, SYSTEM and DATA|METADATA. No more than 1 bit set for profile type. 5) Used space No more than the block group size. This should allow btrfs to detect and refuse to mount the crafted image. Link: https://bugzilla.kernel.org/show_bug.cgi?id=199849 Reported-by: Xu Wen Signed-off-by: Qu Wenruo Reviewed-by: Gu Jinxiang Reviewed-by: Nikolay Borisov Tested-by: Gu Jinxiang Reviewed-by: David Sterba Signed-off-by: David Sterba [bwh: Backported to 4.14: - In check_leaf_item(), pass root->fs_info to check_block_group_item() - Adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/tree-checker.c | 100 ++++++++++++++++++++++++++++++++++++++++ fs/btrfs/volumes.c | 2 +- fs/btrfs/volumes.h | 2 + 3 files changed, 103 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index f7e7a455b710e..cf9b10a071349 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -31,6 +31,7 @@ #include "disk-io.h" #include "compression.h" #include "hash.h" +#include "volumes.h" #define CORRUPT(reason, eb, root, slot) \ btrfs_crit(root->fs_info, \ @@ -312,6 +313,102 @@ static int check_dir_item(struct btrfs_root *root, return 0; } +__printf(4, 5) +__cold +static void block_group_err(const struct btrfs_fs_info *fs_info, + const struct extent_buffer *eb, int slot, + const char *fmt, ...) +{ + struct btrfs_key key; + struct va_format vaf; + va_list args; + + btrfs_item_key_to_cpu(eb, &key, slot); + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + btrfs_crit(fs_info, + "corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV", + btrfs_header_level(eb) == 0 ? "leaf" : "node", + btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, + key.objectid, key.offset, &vaf); + va_end(args); +} + +static int check_block_group_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + struct btrfs_block_group_item bgi; + u32 item_size = btrfs_item_size_nr(leaf, slot); + u64 flags; + u64 type; + + /* + * Here we don't really care about alignment since extent allocator can + * handle it. We care more about the size, as if one block group is + * larger than maximum size, it's must be some obvious corruption. + */ + if (key->offset > BTRFS_MAX_DATA_CHUNK_SIZE || key->offset == 0) { + block_group_err(fs_info, leaf, slot, + "invalid block group size, have %llu expect (0, %llu]", + key->offset, BTRFS_MAX_DATA_CHUNK_SIZE); + return -EUCLEAN; + } + + if (item_size != sizeof(bgi)) { + block_group_err(fs_info, leaf, slot, + "invalid item size, have %u expect %zu", + item_size, sizeof(bgi)); + return -EUCLEAN; + } + + read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot), + sizeof(bgi)); + if (btrfs_block_group_chunk_objectid(&bgi) != + BTRFS_FIRST_CHUNK_TREE_OBJECTID) { + block_group_err(fs_info, leaf, slot, + "invalid block group chunk objectid, have %llu expect %llu", + btrfs_block_group_chunk_objectid(&bgi), + BTRFS_FIRST_CHUNK_TREE_OBJECTID); + return -EUCLEAN; + } + + if (btrfs_block_group_used(&bgi) > key->offset) { + block_group_err(fs_info, leaf, slot, + "invalid block group used, have %llu expect [0, %llu)", + btrfs_block_group_used(&bgi), key->offset); + return -EUCLEAN; + } + + flags = btrfs_block_group_flags(&bgi); + if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) { + block_group_err(fs_info, leaf, slot, +"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set", + flags & BTRFS_BLOCK_GROUP_PROFILE_MASK, + hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK)); + return -EUCLEAN; + } + + type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; + if (type != BTRFS_BLOCK_GROUP_DATA && + type != BTRFS_BLOCK_GROUP_METADATA && + type != BTRFS_BLOCK_GROUP_SYSTEM && + type != (BTRFS_BLOCK_GROUP_METADATA | + BTRFS_BLOCK_GROUP_DATA)) { + block_group_err(fs_info, leaf, slot, +"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llu or 0x%llx", + type, hweight64(type), + BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA, + BTRFS_BLOCK_GROUP_SYSTEM, + BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA); + return -EUCLEAN; + } + return 0; +} + /* * Common point to switch the item-specific validation. */ @@ -333,6 +430,9 @@ static int check_leaf_item(struct btrfs_root *root, case BTRFS_XATTR_ITEM_KEY: ret = check_dir_item(root, leaf, key, slot); break; + case BTRFS_BLOCK_GROUP_ITEM_KEY: + ret = check_block_group_item(root->fs_info, leaf, key, slot); + break; } return ret; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index cfd5728e7519c..9663b6aa2a56e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4647,7 +4647,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (type & BTRFS_BLOCK_GROUP_DATA) { max_stripe_size = SZ_1G; - max_chunk_size = 10 * max_stripe_size; + max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE; if (!devs_max) devs_max = BTRFS_MAX_DEVS(info->chunk_root); } else if (type & BTRFS_BLOCK_GROUP_METADATA) { diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index c5dd48eb7b3d5..76fb6e84f2017 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -24,6 +24,8 @@ #include #include "async-thread.h" +#define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G) + extern struct mutex uuid_mutex; #define BTRFS_STRIPE_LEN SZ_64K -- GitLab From c0dfb99847851fb830d1e8ea7d5e0571f50c325a Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 3 Jul 2018 17:10:06 +0800 Subject: [PATCH 1829/2269] btrfs: tree-checker: Detect invalid and empty essential trees commit ba480dd4db9f1798541eb2d1c423fc95feee8d36 upstream. A crafted image has empty root tree block, which will later cause NULL pointer dereference. The following trees should never be empty: 1) Tree root Must contain at least root items for extent tree, device tree and fs tree 2) Chunk tree Or we can't even bootstrap as it contains the mapping. 3) Fs tree At least inode item for top level inode (.). 4) Device tree Dev extents for chunks 5) Extent tree Must have corresponding extent for each chunk. If any of them is empty, we are sure the fs is corrupted and no need to mount it. Link: https://bugzilla.kernel.org/show_bug.cgi?id=199847 Reported-by: Xu Wen Signed-off-by: Qu Wenruo Tested-by: Gu Jinxiang Reviewed-by: David Sterba Signed-off-by: David Sterba [bwh: Backported to 4.14: Pass root instead of fs_info to generic_err()] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/tree-checker.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index cf9b10a071349..31756bac75b46 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -456,9 +456,22 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf, * skip this check for relocation trees. */ if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) { + u64 owner = btrfs_header_owner(leaf); struct btrfs_root *check_root; - key.objectid = btrfs_header_owner(leaf); + /* These trees must never be empty */ + if (owner == BTRFS_ROOT_TREE_OBJECTID || + owner == BTRFS_CHUNK_TREE_OBJECTID || + owner == BTRFS_EXTENT_TREE_OBJECTID || + owner == BTRFS_DEV_TREE_OBJECTID || + owner == BTRFS_FS_TREE_OBJECTID || + owner == BTRFS_DATA_RELOC_TREE_OBJECTID) { + generic_err(root, leaf, 0, + "invalid root, root %llu must never be empty", + owner); + return -EUCLEAN; + } + key.objectid = owner; key.type = BTRFS_ROOT_ITEM_KEY; key.offset = (u64)-1; -- GitLab From 34407a175a59b668a1a2bbf0d0e495d87a7777d8 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 1 Aug 2018 10:37:16 +0800 Subject: [PATCH 1830/2269] btrfs: Check that each block group has corresponding chunk at mount time commit 514c7dca85a0bf40be984dab0b477403a6db901f upstream. A crafted btrfs image with incorrect chunk<->block group mapping will trigger a lot of unexpected things as the mapping is essential. Although the problem can be caught by block group item checker added in "btrfs: tree-checker: Verify block_group_item", it's still not sufficient. A sufficiently valid block group item can pass the check added by the mentioned patch but could fail to match the existing chunk. This patch will add extra block group -> chunk mapping check, to ensure we have a completely matching (start, len, flags) chunk for each block group at mount time. Here we reuse the original helper find_first_block_group(), which is already doing the basic bg -> chunk checks, adding further checks of the start/len and type flags. Link: https://bugzilla.kernel.org/show_bug.cgi?id=199837 Reported-by: Xu Wen Signed-off-by: Qu Wenruo Reviewed-by: Su Yue Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/extent-tree.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fdc42eddccc2b..83791d13c204c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9828,6 +9828,8 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info, int ret = 0; struct btrfs_key found_key; struct extent_buffer *leaf; + struct btrfs_block_group_item bg; + u64 flags; int slot; ret = btrfs_search_slot(NULL, root, key, path, 0, 0); @@ -9862,8 +9864,32 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info, "logical %llu len %llu found bg but no related chunk", found_key.objectid, found_key.offset); ret = -ENOENT; + } else if (em->start != found_key.objectid || + em->len != found_key.offset) { + btrfs_err(fs_info, + "block group %llu len %llu mismatch with chunk %llu len %llu", + found_key.objectid, found_key.offset, + em->start, em->len); + ret = -EUCLEAN; } else { - ret = 0; + read_extent_buffer(leaf, &bg, + btrfs_item_ptr_offset(leaf, slot), + sizeof(bg)); + flags = btrfs_block_group_flags(&bg) & + BTRFS_BLOCK_GROUP_TYPE_MASK; + + if (flags != (em->map_lookup->type & + BTRFS_BLOCK_GROUP_TYPE_MASK)) { + btrfs_err(fs_info, +"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx", + found_key.objectid, + found_key.offset, flags, + (BTRFS_BLOCK_GROUP_TYPE_MASK & + em->map_lookup->type)); + ret = -EUCLEAN; + } else { + ret = 0; + } } free_extent_map(em); goto out; -- GitLab From cf968bbccba9860eef9480ae560711fbc5cbc1b5 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Sep 2018 07:59:34 +0800 Subject: [PATCH 1831/2269] btrfs: tree-checker: Check level for leaves and nodes commit f556faa46eb4e96d0d0772e74ecf66781e132f72 upstream. Although we have tree level check at tree read runtime, it's completely based on its parent level. We still need to do accurate level check to avoid invalid tree blocks sneak into kernel space. The check itself is simple, for leaf its level should always be 0. For nodes its level should be in range [1, BTRFS_MAX_LEVEL - 1]. Signed-off-by: Qu Wenruo Reviewed-by: Su Yue Reviewed-by: David Sterba Signed-off-by: David Sterba [bwh: Backported to 4.14: - Pass root instead of fs_info to generic_err() - Adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/tree-checker.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 31756bac75b46..fa8f64119e6f0 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -447,6 +447,13 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf, u32 nritems = btrfs_header_nritems(leaf); int slot; + if (btrfs_header_level(leaf) != 0) { + generic_err(root, leaf, 0, + "invalid level for leaf, have %d expect 0", + btrfs_header_level(leaf)); + return -EUCLEAN; + } + /* * Extent buffers from a relocation tree have a owner field that * corresponds to the subvolume tree they are based on. So just from an @@ -589,9 +596,16 @@ int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node) unsigned long nr = btrfs_header_nritems(node); struct btrfs_key key, next_key; int slot; + int level = btrfs_header_level(node); u64 bytenr; int ret = 0; + if (level <= 0 || level >= BTRFS_MAX_LEVEL) { + generic_err(root, node, 0, + "invalid level for node, have %d expect [1, %d]", + level, BTRFS_MAX_LEVEL - 1); + return -EUCLEAN; + } if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) { btrfs_crit(root->fs_info, "corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]", -- GitLab From 4b356df11ba2b44627e2fad7441e05abee46a628 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Mon, 5 Nov 2018 18:49:09 +0800 Subject: [PATCH 1832/2269] btrfs: tree-checker: Fix misleading group system information commit 761333f2f50ccc887aa9957ae829300262c0d15b upstream. block_group_err shows the group system as a decimal value with a '0x' prefix, which is somewhat misleading. Fix it to print hexadecimal, as was intended. Fixes: fce466eab7ac6 ("btrfs: tree-checker: Verify block_group_item") Reviewed-by: Nikolay Borisov Reviewed-by: Qu Wenruo Signed-off-by: Shaokun Zhang Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/btrfs/tree-checker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index fa8f64119e6f0..f206aec1525d4 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -399,7 +399,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info, type != (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA)) { block_group_err(fs_info, leaf, slot, -"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llu or 0x%llx", +"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx", type, hweight64(type), BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA, BTRFS_BLOCK_GROUP_SYSTEM, -- GitLab From aec6ccb3dcc1ad9b8cfe12d086430ffcce887dda Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Thu, 8 Mar 2018 16:29:13 +0800 Subject: [PATCH 1833/2269] f2fs: check blkaddr more accuratly before issue a bio commit 0833721ec3658a4e9d5e58b6fa82cf9edc431e59 upstream. This patch check blkaddr more accuratly before issue a write or read bio. Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/checkpoint.c | 2 ++ fs/f2fs/data.c | 5 +++-- fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.h | 25 +++++++++++++++++++------ 4 files changed, 25 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 41fce930f44ce..1bd4cd8c79c60 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -69,6 +69,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, .old_blkaddr = index, .new_blkaddr = index, .encrypted_page = NULL, + .is_meta = is_meta, }; if (unlikely(!is_meta)) @@ -163,6 +164,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, .op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD, .encrypted_page = NULL, .in_list = false, + .is_meta = (type != META_POR), }; struct blk_plug plug; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6fbb6d75318a3..5913de3f661d0 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -369,6 +369,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page; + verify_block_addr(fio, fio->new_blkaddr); trace_f2fs_submit_page_bio(page, fio); f2fs_trace_ios(fio, 0); @@ -413,8 +414,8 @@ next: } if (fio->old_blkaddr != NEW_ADDR) - verify_block_addr(sbi, fio->old_blkaddr); - verify_block_addr(sbi, fio->new_blkaddr); + verify_block_addr(fio, fio->old_blkaddr); + verify_block_addr(fio, fio->new_blkaddr); bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 54f8520ad7a2f..aa7b033af1b08 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -910,6 +910,7 @@ struct f2fs_io_info { bool submitted; /* indicate IO submission */ int need_lock; /* indicate we need to lock cp_rwsem */ bool in_list; /* indicate fio is in io_list */ + bool is_meta; /* indicate borrow meta inode mapping or not */ enum iostat_type io_type; /* io type */ }; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 4dfb5080098ff..4b635e8c91b0c 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -53,13 +53,19 @@ ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ (sbi)->segs_per_sec)) \ -#define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr) -#define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr) +#define MAIN_BLKADDR(sbi) \ + (SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \ + le32_to_cpu(F2FS_RAW_SUPER(sbi)->main_blkaddr)) +#define SEG0_BLKADDR(sbi) \ + (SM_I(sbi) ? SM_I(sbi)->seg0_blkaddr : \ + le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment0_blkaddr)) #define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments) #define MAIN_SECS(sbi) ((sbi)->total_sections) -#define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count) +#define TOTAL_SEGS(sbi) \ + (SM_I(sbi) ? SM_I(sbi)->segment_count : \ + le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count)) #define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << (sbi)->log_blocks_per_seg) #define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi)) @@ -619,10 +625,17 @@ static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1); } -static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) +static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr) { - BUG_ON(blk_addr < SEG0_BLKADDR(sbi) - || blk_addr >= MAX_BLKADDR(sbi)); + struct f2fs_sb_info *sbi = fio->sbi; + + if (PAGE_TYPE_OF_BIO(fio->type) == META && + (!is_read_io(fio->op) || fio->is_meta)) + BUG_ON(blk_addr < SEG0_BLKADDR(sbi) || + blk_addr >= MAIN_BLKADDR(sbi)); + else + BUG_ON(blk_addr < MAIN_BLKADDR(sbi) || + blk_addr >= MAX_BLKADDR(sbi)); } /* -- GitLab From a8f40be69f80c7c5f969ae5cd93497dc7a764358 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 24 Apr 2018 15:44:16 -0600 Subject: [PATCH 1834/2269] f2fs: sanity check on sit entry commit b2ca374f33bd33fd822eb871876e4888cf79dc97 upstream. syzbot hit the following crash on upstream commit 87ef12027b9b1dd0e0b12cf311fbcb19f9d92539 (Wed Apr 18 19:48:17 2018 +0000) Merge tag 'ceph-for-4.17-rc2' of git://github.com/ceph/ceph-client syzbot dashboard link: https://syzkaller.appspot.com/bug?extid=83699adeb2d13579c31e C reproducer: https://syzkaller.appspot.com/x/repro.c?id=5805208181407744 syzkaller reproducer: https://syzkaller.appspot.com/x/repro.syz?id=6005073343676416 Raw console output: https://syzkaller.appspot.com/x/log.txt?id=6555047731134464 Kernel config: https://syzkaller.appspot.com/x/.config?id=1808800213120130118 compiler: gcc (GCC) 8.0.1 20180413 (experimental) IMPORTANT: if you fix the bug, please add the following tag to the commit: Reported-by: syzbot+83699adeb2d13579c31e@syzkaller.appspotmail.com It will help syzbot understand when the bug is fixed. See footer for details. If you forward the report, please keep this part and the footer. F2FS-fs (loop0): Magic Mismatch, valid(0xf2f52010) - read(0x0) F2FS-fs (loop0): Can't find valid F2FS filesystem in 1th superblock F2FS-fs (loop0): invalid crc value BUG: unable to handle kernel paging request at ffffed006b2a50c0 PGD 21ffee067 P4D 21ffee067 PUD 21fbeb067 PMD 0 Oops: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 4514 Comm: syzkaller989480 Not tainted 4.17.0-rc1+ #8 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:build_sit_entries fs/f2fs/segment.c:3653 [inline] RIP: 0010:build_segment_manager+0x7ef7/0xbf70 fs/f2fs/segment.c:3852 RSP: 0018:ffff8801b102e5b0 EFLAGS: 00010a06 RAX: 1ffff1006b2a50c0 RBX: 0000000000000004 RCX: 0000000000000001 RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff8801ac74243e RBP: ffff8801b102f410 R08: ffff8801acbd46c0 R09: fffffbfff14d9af8 R10: fffffbfff14d9af8 R11: ffff8801acbd46c0 R12: ffff8801ac742a80 R13: ffff8801d9519100 R14: dffffc0000000000 R15: ffff880359528600 FS: 0000000001e04880(0000) GS:ffff8801dae00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffed006b2a50c0 CR3: 00000001ac6ac000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: f2fs_fill_super+0x4095/0x7bf0 fs/f2fs/super.c:2803 mount_bdev+0x30c/0x3e0 fs/super.c:1165 f2fs_mount+0x34/0x40 fs/f2fs/super.c:3020 mount_fs+0xae/0x328 fs/super.c:1268 vfs_kern_mount.part.34+0xd4/0x4d0 fs/namespace.c:1037 vfs_kern_mount fs/namespace.c:1027 [inline] do_new_mount fs/namespace.c:2517 [inline] do_mount+0x564/0x3070 fs/namespace.c:2847 ksys_mount+0x12d/0x140 fs/namespace.c:3063 __do_sys_mount fs/namespace.c:3077 [inline] __se_sys_mount fs/namespace.c:3074 [inline] __x64_sys_mount+0xbe/0x150 fs/namespace.c:3074 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x443d6a RSP: 002b:00007ffd312813c8 EFLAGS: 00000297 ORIG_RAX: 00000000000000a5 RAX: ffffffffffffffda RBX: 0000000020000c00 RCX: 0000000000443d6a RDX: 0000000020000000 RSI: 0000000020000100 RDI: 00007ffd312813d0 RBP: 0000000000000003 R08: 0000000020016a00 R09: 000000000000000a R10: 0000000000000000 R11: 0000000000000297 R12: 0000000000000004 R13: 0000000000402c60 R14: 0000000000000000 R15: 0000000000000000 RIP: build_sit_entries fs/f2fs/segment.c:3653 [inline] RSP: ffff8801b102e5b0 RIP: build_segment_manager+0x7ef7/0xbf70 fs/f2fs/segment.c:3852 RSP: ffff8801b102e5b0 CR2: ffffed006b2a50c0 ---[ end trace a2034989e196ff17 ]--- Reported-and-tested-by: syzbot+83699adeb2d13579c31e@syzkaller.appspotmail.com Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/segment.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3c7bbbae0afa4..1104a6c802515 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3304,6 +3304,15 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) unsigned int old_valid_blocks; start = le32_to_cpu(segno_in_journal(journal, i)); + if (start >= MAIN_SEGS(sbi)) { + f2fs_msg(sbi->sb, KERN_ERR, + "Wrong journal entry on segno %u", + start); + set_sbi_flag(sbi, SBI_NEED_FSCK); + err = -EINVAL; + break; + } + se = &sit_i->sentries[start]; sit = sit_in_journal(journal, i); -- GitLab From e60b97231950fb5b55d2a981b080e3a6f7516a00 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 27 Apr 2018 19:03:22 -0700 Subject: [PATCH 1835/2269] f2fs: enhance sanity_check_raw_super() to avoid potential overflow commit 0cfe75c5b011994651a4ca6d74f20aa997bfc69a upstream. In order to avoid the below overflow issue, we should have checked the boundaries in superblock before reaching out to allocation. As Linus suggested, the right place should be sanity_check_raw_super(). Dr Silvio Cesare of InfoSect reported: There are integer overflows with using the cp_payload superblock field in the f2fs filesystem potentially leading to memory corruption. include/linux/f2fs_fs.h struct f2fs_super_block { ... __le32 cp_payload; fs/f2fs/f2fs.h typedef u32 block_t; /* * should not change u32, since it is the on-disk block * address format, __le32. */ ... static inline block_t __cp_payload(struct f2fs_sb_info *sbi) { return le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); } fs/f2fs/checkpoint.c block_t start_blk, orphan_blocks, i, j; ... start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi); +++ integer overflows ... unsigned int cp_blks = 1 + __cp_payload(sbi); ... sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL); +++ integer overflow leading to incorrect heap allocation. int cp_payload_blks = __cp_payload(sbi); ... ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + orphan_blocks); +++ sign bug and integer overflow ... for (i = 1; i < 1 + cp_payload_blks; i++) +++ integer overflow ... sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - NR_CURSEG_TYPE - __cp_payload(sbi)) * F2FS_ORPHANS_PER_BLOCK; +++ integer overflow Reported-by: Greg KH Reported-by: Silvio Cesare Suggested-by: Linus Torvalds Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim [bwh: Backported to 4.14: No hot file extension support] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/super.c | 71 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 7cda685296b27..c3e1090e7c0af 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1807,6 +1807,8 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, static int sanity_check_raw_super(struct f2fs_sb_info *sbi, struct buffer_head *bh) { + block_t segment_count, segs_per_sec, secs_per_zone; + block_t total_sections, blocks_per_seg; struct f2fs_super_block *raw_super = (struct f2fs_super_block *) (bh->b_data + F2FS_SUPER_OFFSET); struct super_block *sb = sbi->sb; @@ -1863,6 +1865,68 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, return 1; } + segment_count = le32_to_cpu(raw_super->segment_count); + segs_per_sec = le32_to_cpu(raw_super->segs_per_sec); + secs_per_zone = le32_to_cpu(raw_super->secs_per_zone); + total_sections = le32_to_cpu(raw_super->section_count); + + /* blocks_per_seg should be 512, given the above check */ + blocks_per_seg = 1 << le32_to_cpu(raw_super->log_blocks_per_seg); + + if (segment_count > F2FS_MAX_SEGMENT || + segment_count < F2FS_MIN_SEGMENTS) { + f2fs_msg(sb, KERN_INFO, + "Invalid segment count (%u)", + segment_count); + return 1; + } + + if (total_sections > segment_count || + total_sections < F2FS_MIN_SEGMENTS || + segs_per_sec > segment_count || !segs_per_sec) { + f2fs_msg(sb, KERN_INFO, + "Invalid segment/section count (%u, %u x %u)", + segment_count, total_sections, segs_per_sec); + return 1; + } + + if ((segment_count / segs_per_sec) < total_sections) { + f2fs_msg(sb, KERN_INFO, + "Small segment_count (%u < %u * %u)", + segment_count, segs_per_sec, total_sections); + return 1; + } + + if (segment_count > (le32_to_cpu(raw_super->block_count) >> 9)) { + f2fs_msg(sb, KERN_INFO, + "Wrong segment_count / block_count (%u > %u)", + segment_count, le32_to_cpu(raw_super->block_count)); + return 1; + } + + if (secs_per_zone > total_sections) { + f2fs_msg(sb, KERN_INFO, + "Wrong secs_per_zone (%u > %u)", + secs_per_zone, total_sections); + return 1; + } + if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION) { + f2fs_msg(sb, KERN_INFO, + "Corrupted extension count (%u > %u)", + le32_to_cpu(raw_super->extension_count), + F2FS_MAX_EXTENSION); + return 1; + } + + if (le32_to_cpu(raw_super->cp_payload) > + (blocks_per_seg - F2FS_CP_PACKS)) { + f2fs_msg(sb, KERN_INFO, + "Insane cp_payload (%u > %u)", + le32_to_cpu(raw_super->cp_payload), + blocks_per_seg - F2FS_CP_PACKS); + return 1; + } + /* check reserved ino info */ if (le32_to_cpu(raw_super->node_ino) != 1 || le32_to_cpu(raw_super->meta_ino) != 2 || @@ -1875,13 +1939,6 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, return 1; } - if (le32_to_cpu(raw_super->segment_count) > F2FS_MAX_SEGMENT) { - f2fs_msg(sb, KERN_INFO, - "Invalid segment count (%u)", - le32_to_cpu(raw_super->segment_count)); - return 1; - } - /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */ if (sanity_check_area_boundary(sbi, bh)) return 1; -- GitLab From 9e6c4a8557e27c4f074a47e27ce981edfb1f8b91 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 23 May 2018 22:25:08 +0800 Subject: [PATCH 1836/2269] f2fs: clean up with is_valid_blkaddr() commit 7b525dd01365c6764018e374d391c92466be1b7a upstream. - rename is_valid_blkaddr() to is_valid_meta_blkaddr() for readability. - introduce is_valid_blkaddr() for cleanup. No logic change in this patch. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/checkpoint.c | 4 ++-- fs/f2fs/data.c | 18 +++++------------- fs/f2fs/f2fs.h | 9 ++++++++- fs/f2fs/file.c | 2 +- fs/f2fs/inode.c | 2 +- fs/f2fs/node.c | 5 ++--- fs/f2fs/recovery.c | 6 +++--- fs/f2fs/segment.c | 4 ++-- fs/f2fs/segment.h | 2 +- 9 files changed, 25 insertions(+), 27 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 1bd4cd8c79c60..2b951046657e0 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -118,7 +118,7 @@ struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index) return __get_meta_page(sbi, index, false); } -bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) +bool is_valid_meta_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { switch (type) { case META_NAT: @@ -174,7 +174,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, blk_start_plug(&plug); for (; nrpages-- > 0; blkno++) { - if (!is_valid_blkaddr(sbi, blkno, type)) + if (!is_valid_meta_blkaddr(sbi, blkno, type)) goto out; switch (type) { diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5913de3f661d0..f7b2909e9c5f2 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -413,7 +413,7 @@ next: spin_unlock(&io->io_lock); } - if (fio->old_blkaddr != NEW_ADDR) + if (is_valid_blkaddr(fio->old_blkaddr)) verify_block_addr(fio, fio->old_blkaddr); verify_block_addr(fio, fio->new_blkaddr); @@ -946,7 +946,7 @@ next_dnode: next_block: blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); - if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) { + if (!is_valid_blkaddr(blkaddr)) { if (create) { if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; @@ -1388,15 +1388,6 @@ static inline bool need_inplace_update(struct f2fs_io_info *fio) return need_inplace_update_policy(inode, fio); } -static inline bool valid_ipu_blkaddr(struct f2fs_io_info *fio) -{ - if (fio->old_blkaddr == NEW_ADDR) - return false; - if (fio->old_blkaddr == NULL_ADDR) - return false; - return true; -} - int do_write_data_page(struct f2fs_io_info *fio) { struct page *page = fio->page; @@ -1411,7 +1402,7 @@ int do_write_data_page(struct f2fs_io_info *fio) f2fs_lookup_extent_cache(inode, page->index, &ei)) { fio->old_blkaddr = ei.blk + page->index - ei.fofs; - if (valid_ipu_blkaddr(fio)) { + if (is_valid_blkaddr(fio->old_blkaddr)) { ipu_force = true; fio->need_lock = LOCK_DONE; goto got_it; @@ -1438,7 +1429,8 @@ got_it: * If current allocation needs SSR, * it had better in-place writes for updated data. */ - if (ipu_force || (valid_ipu_blkaddr(fio) && need_inplace_update(fio))) { + if (ipu_force || (is_valid_blkaddr(fio->old_blkaddr) && + need_inplace_update(fio))) { err = encrypt_one_page(fio); if (err) goto out_writepage; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index aa7b033af1b08..d74c77b51b71a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2355,6 +2355,13 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, spin_unlock(&sbi->iostat_lock); } +static inline bool is_valid_blkaddr(block_t blkaddr) +{ + if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) + return false; + return true; +} + /* * file.c */ @@ -2565,7 +2572,7 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io); struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index); -bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); +bool is_valid_meta_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync); void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6f589730782d5..d368eda462bb0 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -334,7 +334,7 @@ static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs, switch (whence) { case SEEK_DATA: if ((blkaddr == NEW_ADDR && dirty == pgofs) || - (blkaddr != NEW_ADDR && blkaddr != NULL_ADDR)) + is_valid_blkaddr(blkaddr)) return true; break; case SEEK_HOLE: diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 259b0aa283f09..2bcbbf566f0cd 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -66,7 +66,7 @@ static bool __written_first_block(struct f2fs_inode *ri) { block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]); - if (addr != NEW_ADDR && addr != NULL_ADDR) + if (is_valid_blkaddr(addr)) return true; return false; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 712505ec5de48..5f212fb2d62b4 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -334,8 +334,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, new_blkaddr == NULL_ADDR); f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR && new_blkaddr == NEW_ADDR); - f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR && - nat_get_blkaddr(e) != NULL_ADDR && + f2fs_bug_on(sbi, is_valid_blkaddr(nat_get_blkaddr(e)) && new_blkaddr == NEW_ADDR); /* increment version no as node is removed */ @@ -350,7 +349,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, /* change address */ nat_set_blkaddr(e, new_blkaddr); - if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR) + if (!is_valid_blkaddr(new_blkaddr)) set_nat_flag(e, IS_CHECKPOINTED, false); __set_nat_cache_dirty(nm_i, e); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 765fadf954afc..53f41ad4cbe1b 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -236,7 +236,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, while (1) { struct fsync_inode_entry *entry; - if (!is_valid_blkaddr(sbi, blkaddr, META_POR)) + if (!is_valid_meta_blkaddr(sbi, blkaddr, META_POR)) return 0; page = get_tmp_page(sbi, blkaddr); @@ -479,7 +479,7 @@ retry_dn: } /* dest is valid block, try to recover from src to dest */ - if (is_valid_blkaddr(sbi, dest, META_POR)) { + if (is_valid_meta_blkaddr(sbi, dest, META_POR)) { if (src == NULL_ADDR) { err = reserve_new_block(&dn); @@ -540,7 +540,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, while (1) { struct fsync_inode_entry *entry; - if (!is_valid_blkaddr(sbi, blkaddr, META_POR)) + if (!is_valid_meta_blkaddr(sbi, blkaddr, META_POR)) break; ra_meta_pages_cond(sbi, blkaddr); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1104a6c802515..483d7a8696798 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1758,7 +1758,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) struct seg_entry *se; bool is_cp = false; - if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) + if (!is_valid_blkaddr(blkaddr)) return true; mutex_lock(&sit_i->sentry_lock); @@ -2571,7 +2571,7 @@ void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr) { struct page *cpage; - if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) + if (!is_valid_blkaddr(blkaddr)) return; cpage = find_lock_page(META_MAPPING(sbi), blkaddr); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 4b635e8c91b0c..f977774338c33 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -85,7 +85,7 @@ (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1)) #define GET_SEGNO(sbi, blk_addr) \ - ((((blk_addr) == NULL_ADDR) || ((blk_addr) == NEW_ADDR)) ? \ + ((!is_valid_blkaddr(blk_addr)) ? \ NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ GET_SEGNO_FROM_SEG0(sbi, blk_addr))) #define BLKS_PER_SEC(sbi) \ -- GitLab From eea715704673314769f2613b1ead310306b7de05 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 5 Jun 2018 17:44:11 +0800 Subject: [PATCH 1837/2269] f2fs: introduce and spread verify_blkaddr commit e1da7872f6eda977bd812346bf588c35e4495a1e upstream. This patch introduces verify_blkaddr to check meta/data block address with valid range to detect bug earlier. In addition, once we encounter an invalid blkaddr, notice user to run fsck to fix, and let the kernel panic. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim [bwh: Backported to 4.14: I skipped an earlier renaming of is_valid_meta_blkaddr() to f2fs_is_valid_meta_blkaddr()] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/checkpoint.c | 11 +++++++++-- fs/f2fs/data.c | 8 ++++---- fs/f2fs/f2fs.h | 32 +++++++++++++++++++++++++++++--- fs/f2fs/file.c | 9 +++++---- fs/f2fs/inode.c | 7 ++++--- fs/f2fs/node.c | 4 ++-- fs/f2fs/recovery.c | 6 +++--- fs/f2fs/segment.c | 4 ++-- fs/f2fs/segment.h | 8 +++----- 9 files changed, 61 insertions(+), 28 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 2b951046657e0..d7bd9745e8836 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -118,7 +118,8 @@ struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index) return __get_meta_page(sbi, index, false); } -bool is_valid_meta_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) +bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type) { switch (type) { case META_NAT: @@ -138,10 +139,16 @@ bool is_valid_meta_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) return false; break; case META_POR: + case DATA_GENERIC: if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || blkaddr < MAIN_BLKADDR(sbi))) return false; break; + case META_GENERIC: + if (unlikely(blkaddr < SEG0_BLKADDR(sbi) || + blkaddr >= MAIN_BLKADDR(sbi))) + return false; + break; default: BUG(); } @@ -174,7 +181,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, blk_start_plug(&plug); for (; nrpages-- > 0; blkno++) { - if (!is_valid_meta_blkaddr(sbi, blkno, type)) + if (!f2fs_is_valid_blkaddr(sbi, blkno, type)) goto out; switch (type) { diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f7b2909e9c5f2..6158788066110 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -413,7 +413,7 @@ next: spin_unlock(&io->io_lock); } - if (is_valid_blkaddr(fio->old_blkaddr)) + if (__is_valid_data_blkaddr(fio->old_blkaddr)) verify_block_addr(fio, fio->old_blkaddr); verify_block_addr(fio, fio->new_blkaddr); @@ -946,7 +946,7 @@ next_dnode: next_block: blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); - if (!is_valid_blkaddr(blkaddr)) { + if (!is_valid_data_blkaddr(sbi, blkaddr)) { if (create) { if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; @@ -1402,7 +1402,7 @@ int do_write_data_page(struct f2fs_io_info *fio) f2fs_lookup_extent_cache(inode, page->index, &ei)) { fio->old_blkaddr = ei.blk + page->index - ei.fofs; - if (is_valid_blkaddr(fio->old_blkaddr)) { + if (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr)) { ipu_force = true; fio->need_lock = LOCK_DONE; goto got_it; @@ -1429,7 +1429,7 @@ got_it: * If current allocation needs SSR, * it had better in-place writes for updated data. */ - if (ipu_force || (is_valid_blkaddr(fio->old_blkaddr) && + if (ipu_force || (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) && need_inplace_update(fio))) { err = encrypt_one_page(fio); if (err) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d74c77b51b71a..d15d79457f5c3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -162,7 +162,7 @@ struct cp_control { }; /* - * For CP/NAT/SIT/SSA readahead + * indicate meta/data type */ enum { META_CP, @@ -170,6 +170,8 @@ enum { META_SIT, META_SSA, META_POR, + DATA_GENERIC, + META_GENERIC, }; /* for the list of ino */ @@ -2355,13 +2357,36 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, spin_unlock(&sbi->iostat_lock); } -static inline bool is_valid_blkaddr(block_t blkaddr) +bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type); +void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...); +static inline void verify_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type) +{ + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) { + f2fs_msg(sbi->sb, KERN_ERR, + "invalid blkaddr: %u, type: %d, run fsck to fix.", + blkaddr, type); + f2fs_bug_on(sbi, 1); + } +} + +static inline bool __is_valid_data_blkaddr(block_t blkaddr) { if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) return false; return true; } +static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr) +{ + if (!__is_valid_data_blkaddr(blkaddr)) + return false; + verify_blkaddr(sbi, blkaddr, DATA_GENERIC); + return true; +} + /* * file.c */ @@ -2572,7 +2597,8 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io); struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index); -bool is_valid_meta_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); +bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type); int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync); void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d368eda462bb0..e9c575ef70b5c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -328,13 +328,13 @@ static pgoff_t __get_first_dirty_index(struct address_space *mapping, return pgofs; } -static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs, - int whence) +static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr, + pgoff_t dirty, pgoff_t pgofs, int whence) { switch (whence) { case SEEK_DATA: if ((blkaddr == NEW_ADDR && dirty == pgofs) || - is_valid_blkaddr(blkaddr)) + is_valid_data_blkaddr(sbi, blkaddr)) return true; break; case SEEK_HOLE: @@ -397,7 +397,8 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); - if (__found_offset(blkaddr, dirty, pgofs, whence)) { + if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty, + pgofs, whence)) { f2fs_put_dnode(&dn); goto found; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 2bcbbf566f0cd..b1c0eb433841f 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -62,11 +62,12 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) } } -static bool __written_first_block(struct f2fs_inode *ri) +static bool __written_first_block(struct f2fs_sb_info *sbi, + struct f2fs_inode *ri) { block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]); - if (is_valid_blkaddr(addr)) + if (is_valid_data_blkaddr(sbi, addr)) return true; return false; } @@ -235,7 +236,7 @@ static int do_read_inode(struct inode *inode) /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); - if (__written_first_block(ri)) + if (__written_first_block(sbi, ri)) set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); if (!need_inode_block_update(sbi, inode->i_ino)) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 5f212fb2d62b4..999814c8cbead 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -334,7 +334,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, new_blkaddr == NULL_ADDR); f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR && new_blkaddr == NEW_ADDR); - f2fs_bug_on(sbi, is_valid_blkaddr(nat_get_blkaddr(e)) && + f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) && new_blkaddr == NEW_ADDR); /* increment version no as node is removed */ @@ -349,7 +349,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, /* change address */ nat_set_blkaddr(e, new_blkaddr); - if (!is_valid_blkaddr(new_blkaddr)) + if (!is_valid_data_blkaddr(sbi, new_blkaddr)) set_nat_flag(e, IS_CHECKPOINTED, false); __set_nat_cache_dirty(nm_i, e); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 53f41ad4cbe1b..6ea445377767d 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -236,7 +236,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, while (1) { struct fsync_inode_entry *entry; - if (!is_valid_meta_blkaddr(sbi, blkaddr, META_POR)) + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR)) return 0; page = get_tmp_page(sbi, blkaddr); @@ -479,7 +479,7 @@ retry_dn: } /* dest is valid block, try to recover from src to dest */ - if (is_valid_meta_blkaddr(sbi, dest, META_POR)) { + if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) { if (src == NULL_ADDR) { err = reserve_new_block(&dn); @@ -540,7 +540,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, while (1) { struct fsync_inode_entry *entry; - if (!is_valid_meta_blkaddr(sbi, blkaddr, META_POR)) + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR)) break; ra_meta_pages_cond(sbi, blkaddr); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 483d7a8696798..5c698757e116d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1758,7 +1758,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) struct seg_entry *se; bool is_cp = false; - if (!is_valid_blkaddr(blkaddr)) + if (!is_valid_data_blkaddr(sbi, blkaddr)) return true; mutex_lock(&sit_i->sentry_lock); @@ -2571,7 +2571,7 @@ void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr) { struct page *cpage; - if (!is_valid_blkaddr(blkaddr)) + if (!is_valid_data_blkaddr(sbi, blkaddr)) return; cpage = find_lock_page(META_MAPPING(sbi), blkaddr); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index f977774338c33..dd8f977fc273e 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -85,7 +85,7 @@ (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1)) #define GET_SEGNO(sbi, blk_addr) \ - ((!is_valid_blkaddr(blk_addr)) ? \ + ((!is_valid_data_blkaddr(sbi, blk_addr)) ? \ NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ GET_SEGNO_FROM_SEG0(sbi, blk_addr))) #define BLKS_PER_SEC(sbi) \ @@ -631,11 +631,9 @@ static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr) if (PAGE_TYPE_OF_BIO(fio->type) == META && (!is_read_io(fio->op) || fio->is_meta)) - BUG_ON(blk_addr < SEG0_BLKADDR(sbi) || - blk_addr >= MAIN_BLKADDR(sbi)); + verify_blkaddr(sbi, blk_addr, META_GENERIC); else - BUG_ON(blk_addr < MAIN_BLKADDR(sbi) || - blk_addr >= MAX_BLKADDR(sbi)); + verify_blkaddr(sbi, blk_addr, DATA_GENERIC); } /* -- GitLab From f3d6361a96a455c8ba12226a04efa67a0ada4966 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 23 Jun 2018 00:12:36 +0800 Subject: [PATCH 1838/2269] f2fs: fix to do sanity check with secs_per_zone commit 42bf546c1fe3f3654bdf914e977acbc2b80a5be5 upstream. As Wen Xu reported in below link: https://bugzilla.kernel.org/show_bug.cgi?id=200183 - Overview Divide zero in reset_curseg() when mounting a crafted f2fs image - Reproduce - Kernel message [ 588.281510] divide error: 0000 [#1] SMP KASAN PTI [ 588.282701] CPU: 0 PID: 1293 Comm: mount Not tainted 4.18.0-rc1+ #4 [ 588.284000] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 588.286178] RIP: 0010:reset_curseg+0x94/0x1a0 [ 588.298166] RSP: 0018:ffff8801e88d7940 EFLAGS: 00010246 [ 588.299360] RAX: 0000000000000014 RBX: ffff8801e1d46d00 RCX: ffffffffb88bf60b [ 588.300809] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffff8801e1d46d64 [ 588.305272] R13: 0000000000000000 R14: 0000000000000014 R15: 0000000000000000 [ 588.306822] FS: 00007fad85008840(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 [ 588.308456] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 588.309623] CR2: 0000000001705078 CR3: 00000001f30f8000 CR4: 00000000000006f0 [ 588.311085] Call Trace: [ 588.311637] f2fs_build_segment_manager+0x103f/0x3410 [ 588.316136] ? f2fs_commit_super+0x1b0/0x1b0 [ 588.317031] ? set_blocksize+0x90/0x140 [ 588.319473] f2fs_mount+0x15/0x20 [ 588.320166] mount_fs+0x60/0x1a0 [ 588.320847] ? alloc_vfsmnt+0x309/0x360 [ 588.321647] vfs_kern_mount+0x6b/0x1a0 [ 588.322432] do_mount+0x34a/0x18c0 [ 588.323175] ? strndup_user+0x46/0x70 [ 588.323937] ? copy_mount_string+0x20/0x20 [ 588.324793] ? memcg_kmem_put_cache+0x1b/0xa0 [ 588.325702] ? kasan_check_write+0x14/0x20 [ 588.326562] ? _copy_from_user+0x6a/0x90 [ 588.327375] ? memdup_user+0x42/0x60 [ 588.328118] ksys_mount+0x83/0xd0 [ 588.328808] __x64_sys_mount+0x67/0x80 [ 588.329607] do_syscall_64+0x78/0x170 [ 588.330400] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 588.331461] RIP: 0033:0x7fad848e8b9a [ 588.336022] RSP: 002b:00007ffd7c5b6be8 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 [ 588.337547] RAX: ffffffffffffffda RBX: 00000000016f8030 RCX: 00007fad848e8b9a [ 588.338999] RDX: 00000000016f8210 RSI: 00000000016f9f30 RDI: 0000000001700ec0 [ 588.340442] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013 [ 588.341887] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000000001700ec0 [ 588.343341] R13: 00000000016f8210 R14: 0000000000000000 R15: 0000000000000003 [ 588.354891] ---[ end trace 4ce02f25ff7d3df5 ]--- [ 588.355862] RIP: 0010:reset_curseg+0x94/0x1a0 [ 588.360742] RSP: 0018:ffff8801e88d7940 EFLAGS: 00010246 [ 588.361812] RAX: 0000000000000014 RBX: ffff8801e1d46d00 RCX: ffffffffb88bf60b [ 588.363485] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffff8801e1d46d64 [ 588.365213] RBP: ffff8801e88d7968 R08: ffffed003c32266f R09: ffffed003c32266f [ 588.366661] R10: 0000000000000001 R11: ffffed003c32266e R12: ffff8801f0337700 [ 588.368110] R13: 0000000000000000 R14: 0000000000000014 R15: 0000000000000000 [ 588.370057] FS: 00007fad85008840(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 [ 588.372099] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 588.373291] CR2: 0000000001705078 CR3: 00000001f30f8000 CR4: 00000000000006f0 - Location https://elixir.bootlin.com/linux/latest/source/fs/f2fs/segment.c#L2147 curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno); If secs_per_zone is corrupted due to fuzzing test, it will cause divide zero operation when using GET_ZONE_FROM_SEG macro, so we should do more sanity check with secs_per_zone during mount to avoid this issue. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c3e1090e7c0af..084ff9c82a37e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1904,9 +1904,9 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, return 1; } - if (secs_per_zone > total_sections) { + if (secs_per_zone > total_sections || !secs_per_zone) { f2fs_msg(sb, KERN_INFO, - "Wrong secs_per_zone (%u > %u)", + "Wrong secs_per_zone / total_sections (%u, %u)", secs_per_zone, total_sections); return 1; } -- GitLab From 2598fc56ed65f69e303ccee6d5fe756a56f87779 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 29 Nov 2018 19:17:34 +0000 Subject: [PATCH 1839/2269] f2fs: Add sanity_check_inode() function This was done as part of commit 5d64600d4f33 "f2fs: avoid bug_on on corrupted inode" upstream, but the specific check that commit added is not applicable to 4.14. Cc: Jaegeuk Kim Cc: Chao Yu Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/inode.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index b1c0eb433841f..917a6c3d56494 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -180,6 +180,13 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page) ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page)); } +static bool sanity_check_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + return true; +} + static int do_read_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -281,6 +288,10 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) ret = do_read_inode(inode); if (ret) goto bad_inode; + if (!sanity_check_inode(inode)) { + ret = -EINVAL; + goto bad_inode; + } make_now: if (ino == F2FS_NODE_INO(sbi)) { inode->i_mapping->a_ops = &f2fs_node_aops; -- GitLab From 0081c90ebacebb3a82d0d24bf0f42273ce2d902e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 25 Jun 2018 23:29:49 +0800 Subject: [PATCH 1840/2269] f2fs: fix to do sanity check with extra_attr feature commit 76d56d4ab4f2a9e4f085c7d77172194ddaccf7d2 upstream. If FI_EXTRA_ATTR is set in inode by fuzzing, inode.i_addr[0] will be parsed as inode.i_extra_isize, then in __recover_inline_status, inline data address will beyond boundary of page, result in accessing invalid memory. So in this condition, during reading inode page, let's do sanity check with EXTRA_ATTR feature of fs and extra_attr bit of inode, if they're inconsistent, deny to load this inode. - Overview Out-of-bound access in f2fs_iget() when mounting a corrupted f2fs image - Reproduce The following message will be got in KASAN build of 4.18 upstream kernel. [ 819.392227] ================================================================== [ 819.393901] BUG: KASAN: slab-out-of-bounds in f2fs_iget+0x736/0x1530 [ 819.395329] Read of size 4 at addr ffff8801f099c968 by task mount/1292 [ 819.397079] CPU: 1 PID: 1292 Comm: mount Not tainted 4.18.0-rc1+ #4 [ 819.397082] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 819.397088] Call Trace: [ 819.397124] dump_stack+0x7b/0xb5 [ 819.397154] print_address_description+0x70/0x290 [ 819.397159] kasan_report+0x291/0x390 [ 819.397163] ? f2fs_iget+0x736/0x1530 [ 819.397176] check_memory_region+0x139/0x190 [ 819.397182] __asan_loadN+0xf/0x20 [ 819.397185] f2fs_iget+0x736/0x1530 [ 819.397197] f2fs_fill_super+0x1b4f/0x2b40 [ 819.397202] ? f2fs_fill_super+0x1b4f/0x2b40 [ 819.397208] ? f2fs_commit_super+0x1b0/0x1b0 [ 819.397227] ? set_blocksize+0x90/0x140 [ 819.397241] mount_bdev+0x1c5/0x210 [ 819.397245] ? f2fs_commit_super+0x1b0/0x1b0 [ 819.397252] f2fs_mount+0x15/0x20 [ 819.397256] mount_fs+0x60/0x1a0 [ 819.397267] ? alloc_vfsmnt+0x309/0x360 [ 819.397272] vfs_kern_mount+0x6b/0x1a0 [ 819.397282] do_mount+0x34a/0x18c0 [ 819.397300] ? lockref_put_or_lock+0xcf/0x160 [ 819.397306] ? copy_mount_string+0x20/0x20 [ 819.397318] ? memcg_kmem_put_cache+0x1b/0xa0 [ 819.397324] ? kasan_check_write+0x14/0x20 [ 819.397334] ? _copy_from_user+0x6a/0x90 [ 819.397353] ? memdup_user+0x42/0x60 [ 819.397359] ksys_mount+0x83/0xd0 [ 819.397365] __x64_sys_mount+0x67/0x80 [ 819.397388] do_syscall_64+0x78/0x170 [ 819.397403] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 819.397422] RIP: 0033:0x7f54c667cb9a [ 819.397424] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48 [ 819.397483] RSP: 002b:00007ffd8f46cd08 EFLAGS: 00000202 ORIG_RAX: 00000000000000a5 [ 819.397496] RAX: ffffffffffffffda RBX: 0000000000dfa030 RCX: 00007f54c667cb9a [ 819.397498] RDX: 0000000000dfa210 RSI: 0000000000dfbf30 RDI: 0000000000e02ec0 [ 819.397501] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013 [ 819.397503] R10: 00000000c0ed0000 R11: 0000000000000202 R12: 0000000000e02ec0 [ 819.397505] R13: 0000000000dfa210 R14: 0000000000000000 R15: 0000000000000003 [ 819.397866] Allocated by task 139: [ 819.398702] save_stack+0x46/0xd0 [ 819.398705] kasan_kmalloc+0xad/0xe0 [ 819.398709] kasan_slab_alloc+0x11/0x20 [ 819.398713] kmem_cache_alloc+0xd1/0x1e0 [ 819.398717] dup_fd+0x50/0x4c0 [ 819.398740] copy_process.part.37+0xbed/0x32e0 [ 819.398744] _do_fork+0x16e/0x590 [ 819.398748] __x64_sys_clone+0x69/0x80 [ 819.398752] do_syscall_64+0x78/0x170 [ 819.398756] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 819.399097] Freed by task 159: [ 819.399743] save_stack+0x46/0xd0 [ 819.399747] __kasan_slab_free+0x13c/0x1a0 [ 819.399750] kasan_slab_free+0xe/0x10 [ 819.399754] kmem_cache_free+0x89/0x1e0 [ 819.399757] put_files_struct+0x132/0x150 [ 819.399761] exit_files+0x62/0x70 [ 819.399766] do_exit+0x47b/0x1390 [ 819.399770] do_group_exit+0x86/0x130 [ 819.399774] __x64_sys_exit_group+0x2c/0x30 [ 819.399778] do_syscall_64+0x78/0x170 [ 819.399782] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 819.400115] The buggy address belongs to the object at ffff8801f099c680 which belongs to the cache files_cache of size 704 [ 819.403234] The buggy address is located 40 bytes to the right of 704-byte region [ffff8801f099c680, ffff8801f099c940) [ 819.405689] The buggy address belongs to the page: [ 819.406709] page:ffffea0007c26700 count:1 mapcount:0 mapping:ffff8801f69a3340 index:0xffff8801f099d380 compound_mapcount: 0 [ 819.408984] flags: 0x2ffff0000008100(slab|head) [ 819.409932] raw: 02ffff0000008100 ffffea00077fb600 0000000200000002 ffff8801f69a3340 [ 819.411514] raw: ffff8801f099d380 0000000080130000 00000001ffffffff 0000000000000000 [ 819.413073] page dumped because: kasan: bad access detected [ 819.414539] Memory state around the buggy address: [ 819.415521] ffff8801f099c800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 819.416981] ffff8801f099c880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 819.418454] >ffff8801f099c900: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 819.419921] ^ [ 819.421265] ffff8801f099c980: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb [ 819.422745] ffff8801f099ca00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 819.424206] ================================================================== [ 819.425668] Disabling lock debugging due to kernel taint [ 819.457463] F2FS-fs (loop0): Mounted with checkpoint version = 3 The kernel still mounts the image. If you run the following program on the mounted folder mnt, (poc.c) static void activity(char *mpoint) { char *foo_bar_baz; int err; static int buf[8192]; memset(buf, 0, sizeof(buf)); err = asprintf(&foo_bar_baz, "%s/foo/bar/baz", mpoint); int fd = open(foo_bar_baz, O_RDONLY, 0); if (fd >= 0) { read(fd, (char *)buf, 11); close(fd); } } int main(int argc, char *argv[]) { activity(argv[1]); return 0; } You can get kernel crash: [ 819.457463] F2FS-fs (loop0): Mounted with checkpoint version = 3 [ 918.028501] BUG: unable to handle kernel paging request at ffffed0048000d82 [ 918.044020] PGD 23ffee067 P4D 23ffee067 PUD 23fbef067 PMD 0 [ 918.045207] Oops: 0000 [#1] SMP KASAN PTI [ 918.046048] CPU: 0 PID: 1309 Comm: poc Tainted: G B 4.18.0-rc1+ #4 [ 918.047573] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 918.049552] RIP: 0010:check_memory_region+0x5e/0x190 [ 918.050565] Code: f8 49 c1 e8 03 49 89 db 49 c1 eb 03 4d 01 cb 4d 01 c1 4d 8d 63 01 4c 89 c8 4d 89 e2 4d 29 ca 49 83 fa 10 7f 3d 4d 85 d2 74 32 <41> 80 39 00 75 23 48 b8 01 00 00 00 00 fc ff df 4d 01 d1 49 01 c0 [ 918.054322] RSP: 0018:ffff8801e3a1f258 EFLAGS: 00010202 [ 918.055400] RAX: ffffed0048000d82 RBX: ffff880240006c11 RCX: ffffffffb8867d14 [ 918.056832] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff880240006c10 [ 918.058253] RBP: ffff8801e3a1f268 R08: 1ffff10048000d82 R09: ffffed0048000d82 [ 918.059717] R10: 0000000000000001 R11: ffffed0048000d82 R12: ffffed0048000d83 [ 918.061159] R13: ffff8801e3a1f390 R14: 0000000000000000 R15: ffff880240006c08 [ 918.062614] FS: 00007fac9732c700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 [ 918.064246] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 918.065412] CR2: ffffed0048000d82 CR3: 00000001df77a000 CR4: 00000000000006f0 [ 918.066882] Call Trace: [ 918.067410] __asan_loadN+0xf/0x20 [ 918.068149] f2fs_find_target_dentry+0xf4/0x270 [ 918.069083] ? __get_node_page+0x331/0x5b0 [ 918.069925] f2fs_find_in_inline_dir+0x24b/0x310 [ 918.070881] ? f2fs_recover_inline_data+0x4c0/0x4c0 [ 918.071905] ? unwind_next_frame.part.5+0x34f/0x490 [ 918.072901] ? unwind_dump+0x290/0x290 [ 918.073695] ? is_bpf_text_address+0xe/0x20 [ 918.074566] __f2fs_find_entry+0x599/0x670 [ 918.075408] ? kasan_unpoison_shadow+0x36/0x50 [ 918.076315] ? kasan_kmalloc+0xad/0xe0 [ 918.077100] ? memcg_kmem_put_cache+0x55/0xa0 [ 918.077998] ? f2fs_find_target_dentry+0x270/0x270 [ 918.079006] ? d_set_d_op+0x30/0x100 [ 918.079749] ? __d_lookup_rcu+0x69/0x2e0 [ 918.080556] ? __d_alloc+0x275/0x450 [ 918.081297] ? kasan_check_write+0x14/0x20 [ 918.082135] ? memset+0x31/0x40 [ 918.082820] ? fscrypt_setup_filename+0x1ec/0x4c0 [ 918.083782] ? d_alloc_parallel+0x5bb/0x8c0 [ 918.084640] f2fs_find_entry+0xe9/0x110 [ 918.085432] ? __f2fs_find_entry+0x670/0x670 [ 918.086308] ? kasan_check_write+0x14/0x20 [ 918.087163] f2fs_lookup+0x297/0x590 [ 918.087902] ? f2fs_link+0x2b0/0x2b0 [ 918.088646] ? legitimize_path.isra.29+0x61/0xa0 [ 918.089589] __lookup_slow+0x12e/0x240 [ 918.090371] ? may_delete+0x2b0/0x2b0 [ 918.091123] ? __nd_alloc_stack+0xa0/0xa0 [ 918.091944] lookup_slow+0x44/0x60 [ 918.092642] walk_component+0x3ee/0xa40 [ 918.093428] ? is_bpf_text_address+0xe/0x20 [ 918.094283] ? pick_link+0x3e0/0x3e0 [ 918.095047] ? in_group_p+0xa5/0xe0 [ 918.095771] ? generic_permission+0x53/0x1e0 [ 918.096666] ? security_inode_permission+0x1d/0x70 [ 918.097646] ? inode_permission+0x7a/0x1f0 [ 918.098497] link_path_walk+0x2a2/0x7b0 [ 918.099298] ? apparmor_capget+0x3d0/0x3d0 [ 918.100140] ? walk_component+0xa40/0xa40 [ 918.100958] ? path_init+0x2e6/0x580 [ 918.101695] path_openat+0x1bb/0x2160 [ 918.102471] ? __save_stack_trace+0x92/0x100 [ 918.103352] ? save_stack+0xb5/0xd0 [ 918.104070] ? vfs_unlink+0x250/0x250 [ 918.104822] ? save_stack+0x46/0xd0 [ 918.105538] ? kasan_slab_alloc+0x11/0x20 [ 918.106370] ? kmem_cache_alloc+0xd1/0x1e0 [ 918.107213] ? getname_flags+0x76/0x2c0 [ 918.107997] ? getname+0x12/0x20 [ 918.108677] ? do_sys_open+0x14b/0x2c0 [ 918.109450] ? __x64_sys_open+0x4c/0x60 [ 918.110255] ? do_syscall_64+0x78/0x170 [ 918.111083] ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 918.112148] ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 918.113204] ? f2fs_empty_inline_dir+0x1e0/0x1e0 [ 918.114150] ? timespec64_trunc+0x5c/0x90 [ 918.114993] ? wb_io_lists_depopulated+0x1a/0xc0 [ 918.115937] ? inode_io_list_move_locked+0x102/0x110 [ 918.116949] do_filp_open+0x12b/0x1d0 [ 918.117709] ? may_open_dev+0x50/0x50 [ 918.118475] ? kasan_kmalloc+0xad/0xe0 [ 918.119246] do_sys_open+0x17c/0x2c0 [ 918.119983] ? do_sys_open+0x17c/0x2c0 [ 918.120751] ? filp_open+0x60/0x60 [ 918.121463] ? task_work_run+0x4d/0xf0 [ 918.122237] __x64_sys_open+0x4c/0x60 [ 918.123001] do_syscall_64+0x78/0x170 [ 918.123759] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 918.124802] RIP: 0033:0x7fac96e3e040 [ 918.125537] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 83 3d 09 27 2d 00 00 75 10 b8 02 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 7e e0 01 00 48 89 04 24 [ 918.129341] RSP: 002b:00007fff1b37f848 EFLAGS: 00000246 ORIG_RAX: 0000000000000002 [ 918.130870] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fac96e3e040 [ 918.132295] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 000000000122d080 [ 918.133748] RBP: 00007fff1b37f9b0 R08: 00007fac9710bbd8 R09: 0000000000000001 [ 918.135209] R10: 000000000000069d R11: 0000000000000246 R12: 0000000000400c20 [ 918.136650] R13: 00007fff1b37fab0 R14: 0000000000000000 R15: 0000000000000000 [ 918.138093] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy [ 918.147924] CR2: ffffed0048000d82 [ 918.148619] ---[ end trace 4ce02f25ff7d3df5 ]--- [ 918.149563] RIP: 0010:check_memory_region+0x5e/0x190 [ 918.150576] Code: f8 49 c1 e8 03 49 89 db 49 c1 eb 03 4d 01 cb 4d 01 c1 4d 8d 63 01 4c 89 c8 4d 89 e2 4d 29 ca 49 83 fa 10 7f 3d 4d 85 d2 74 32 <41> 80 39 00 75 23 48 b8 01 00 00 00 00 fc ff df 4d 01 d1 49 01 c0 [ 918.154360] RSP: 0018:ffff8801e3a1f258 EFLAGS: 00010202 [ 918.155411] RAX: ffffed0048000d82 RBX: ffff880240006c11 RCX: ffffffffb8867d14 [ 918.156833] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff880240006c10 [ 918.158257] RBP: ffff8801e3a1f268 R08: 1ffff10048000d82 R09: ffffed0048000d82 [ 918.159722] R10: 0000000000000001 R11: ffffed0048000d82 R12: ffffed0048000d83 [ 918.161149] R13: ffff8801e3a1f390 R14: 0000000000000000 R15: ffff880240006c08 [ 918.162587] FS: 00007fac9732c700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 [ 918.164203] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 918.165356] CR2: ffffed0048000d82 CR3: 00000001df77a000 CR4: 00000000000006f0 Reported-by: Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim [bwh: Backported to 4.14: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/inode.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 917a6c3d56494..428d502f23e82 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -184,6 +184,15 @@ static bool sanity_check_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + if (f2fs_has_extra_attr(inode) && + !f2fs_sb_has_extra_attr(sbi->sb)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: inode (ino=%lx) is with extra_attr, " + "but extra_attr feature is off", + __func__, inode->i_ino); + return false; + } return true; } @@ -233,6 +242,11 @@ static int do_read_inode(struct inode *inode) get_inline_info(inode, ri); + if (!sanity_check_inode(inode)) { + f2fs_put_page(node_page, 1); + return -EINVAL; + } + fi->i_extra_isize = f2fs_has_extra_attr(inode) ? le16_to_cpu(ri->i_extra_isize) : 0; @@ -288,10 +302,6 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) ret = do_read_inode(inode); if (ret) goto bad_inode; - if (!sanity_check_inode(inode)) { - ret = -EINVAL; - goto bad_inode; - } make_now: if (ino == F2FS_NODE_INO(sbi)) { inode->i_mapping->a_ops = &f2fs_node_aops; -- GitLab From f9cf5462b51d98026275cc51437fc531e808b64a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 27 Jun 2018 18:05:54 +0800 Subject: [PATCH 1841/2269] f2fs: fix to do sanity check with user_block_count commit 9dc956b2c8523aed39d1e6508438be9fea28c8fc upstream. This patch fixs to do sanity check with user_block_count. - Overview Divide zero in utilization when mount() a corrupted f2fs image - Reproduce (4.18 upstream kernel) - Kernel message [ 564.099503] F2FS-fs (loop0): invalid crc value [ 564.101991] divide error: 0000 [#1] SMP KASAN PTI [ 564.103103] CPU: 1 PID: 1298 Comm: f2fs_discard-7: Not tainted 4.18.0-rc1+ #4 [ 564.104584] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 564.106624] RIP: 0010:issue_discard_thread+0x248/0x5c0 [ 564.107692] Code: ff ff 48 8b bd e8 fe ff ff 41 8b 9d 4c 04 00 00 e8 cd b8 ad ff 41 8b 85 50 04 00 00 31 d2 48 8d 04 80 48 8d 04 80 48 c1 e0 02 <48> f7 f3 83 f8 50 7e 16 41 c7 86 7c ff ff ff 01 00 00 00 41 c7 86 [ 564.111686] RSP: 0018:ffff8801f3117dc0 EFLAGS: 00010206 [ 564.112775] RAX: 0000000000000384 RBX: 0000000000000000 RCX: ffffffffb88c1e03 [ 564.114250] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffff8801e3aa4850 [ 564.115706] RBP: ffff8801f3117f00 R08: 1ffffffff751a1d0 R09: fffffbfff751a1d0 [ 564.117177] R10: 0000000000000001 R11: fffffbfff751a1d0 R12: 00000000fffffffc [ 564.118634] R13: ffff8801e3aa4400 R14: ffff8801f3117ed8 R15: ffff8801e2050000 [ 564.120094] FS: 0000000000000000(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000 [ 564.121748] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 564.122923] CR2: 000000000202b078 CR3: 00000001f11ac000 CR4: 00000000000006e0 [ 564.124383] Call Trace: [ 564.124924] ? __issue_discard_cmd+0x480/0x480 [ 564.125882] ? __sched_text_start+0x8/0x8 [ 564.126756] ? __kthread_parkme+0xcb/0x100 [ 564.127620] ? kthread_blkcg+0x70/0x70 [ 564.128412] kthread+0x180/0x1d0 [ 564.129105] ? __issue_discard_cmd+0x480/0x480 [ 564.130029] ? kthread_associate_blkcg+0x150/0x150 [ 564.131033] ret_from_fork+0x35/0x40 [ 564.131794] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy [ 564.141798] ---[ end trace 4ce02f25ff7d3df5 ]--- [ 564.142773] RIP: 0010:issue_discard_thread+0x248/0x5c0 [ 564.143885] Code: ff ff 48 8b bd e8 fe ff ff 41 8b 9d 4c 04 00 00 e8 cd b8 ad ff 41 8b 85 50 04 00 00 31 d2 48 8d 04 80 48 8d 04 80 48 c1 e0 02 <48> f7 f3 83 f8 50 7e 16 41 c7 86 7c ff ff ff 01 00 00 00 41 c7 86 [ 564.147776] RSP: 0018:ffff8801f3117dc0 EFLAGS: 00010206 [ 564.148856] RAX: 0000000000000384 RBX: 0000000000000000 RCX: ffffffffb88c1e03 [ 564.150424] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffff8801e3aa4850 [ 564.151906] RBP: ffff8801f3117f00 R08: 1ffffffff751a1d0 R09: fffffbfff751a1d0 [ 564.153463] R10: 0000000000000001 R11: fffffbfff751a1d0 R12: 00000000fffffffc [ 564.154915] R13: ffff8801e3aa4400 R14: ffff8801f3117ed8 R15: ffff8801e2050000 [ 564.156405] FS: 0000000000000000(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000 [ 564.158070] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 564.159279] CR2: 000000000202b078 CR3: 00000001f11ac000 CR4: 00000000000006e0 [ 564.161043] ================================================================== [ 564.162587] BUG: KASAN: stack-out-of-bounds in from_kuid_munged+0x1d/0x50 [ 564.163994] Read of size 4 at addr ffff8801f3117c84 by task f2fs_discard-7:/1298 [ 564.165852] CPU: 1 PID: 1298 Comm: f2fs_discard-7: Tainted: G D 4.18.0-rc1+ #4 [ 564.167593] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 564.169522] Call Trace: [ 564.170057] dump_stack+0x7b/0xb5 [ 564.170778] print_address_description+0x70/0x290 [ 564.171765] kasan_report+0x291/0x390 [ 564.172540] ? from_kuid_munged+0x1d/0x50 [ 564.173408] __asan_load4+0x78/0x80 [ 564.174148] from_kuid_munged+0x1d/0x50 [ 564.174962] do_notify_parent+0x1f5/0x4f0 [ 564.175808] ? send_sigqueue+0x390/0x390 [ 564.176639] ? css_set_move_task+0x152/0x340 [ 564.184197] do_exit+0x1290/0x1390 [ 564.184950] ? __issue_discard_cmd+0x480/0x480 [ 564.185884] ? mm_update_next_owner+0x380/0x380 [ 564.186829] ? __sched_text_start+0x8/0x8 [ 564.187672] ? __kthread_parkme+0xcb/0x100 [ 564.188528] ? kthread_blkcg+0x70/0x70 [ 564.189333] ? kthread+0x180/0x1d0 [ 564.190052] ? __issue_discard_cmd+0x480/0x480 [ 564.190983] rewind_stack_do_exit+0x17/0x20 [ 564.192190] The buggy address belongs to the page: [ 564.193213] page:ffffea0007cc45c0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 [ 564.194856] flags: 0x2ffff0000000000() [ 564.195644] raw: 02ffff0000000000 0000000000000000 dead000000000200 0000000000000000 [ 564.197247] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 [ 564.198826] page dumped because: kasan: bad access detected [ 564.200299] Memory state around the buggy address: [ 564.201306] ffff8801f3117b80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 564.202779] ffff8801f3117c00: 00 00 00 00 00 00 00 00 00 00 00 f3 f3 f3 f3 f3 [ 564.204252] >ffff8801f3117c80: f3 f3 f3 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 [ 564.205742] ^ [ 564.206424] ffff8801f3117d00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 564.207908] ffff8801f3117d80: f3 f3 f3 f3 f3 f3 f3 f3 00 00 00 00 00 00 00 00 [ 564.209389] ================================================================== [ 564.231795] F2FS-fs (loop0): Mounted with checkpoint version = 2 - Location https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/f2fs/segment.h#L586 return div_u64((u64)valid_user_blocks(sbi) * 100, sbi->user_block_count); Missing checks on sbi->user_block_count. Reported-by: Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/super.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 084ff9c82a37e..9fafb1404f392 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1956,6 +1956,8 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) unsigned int sit_segs, nat_segs; unsigned int sit_bitmap_size, nat_bitmap_size; unsigned int log_blocks_per_seg; + unsigned int segment_count_main; + block_t user_block_count; int i; total = le32_to_cpu(raw_super->segment_count); @@ -1980,6 +1982,16 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) return 1; } + user_block_count = le64_to_cpu(ckpt->user_block_count); + segment_count_main = le32_to_cpu(raw_super->segment_count_main); + log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); + if (!user_block_count || user_block_count >= + segment_count_main << log_blocks_per_seg) { + f2fs_msg(sbi->sb, KERN_ERR, + "Wrong user_block_count: %u", user_block_count); + return 1; + } + main_segs = le32_to_cpu(raw_super->segment_count_main); blocks_per_seg = sbi->blocks_per_seg; @@ -1996,7 +2008,6 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize); nat_bitmap_size = le32_to_cpu(ckpt->nat_ver_bitmap_bytesize); - log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 || nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) { -- GitLab From b8321ccd045710ee04fd5322c34cadd13a5e58af Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 29 Jun 2018 13:55:22 +0800 Subject: [PATCH 1842/2269] f2fs: fix to do sanity check with node footer and iblocks commit e34438c903b653daca2b2a7de95aed46226f8ed3 upstream. This patch adds to do sanity check with below fields of inode to avoid reported panic. - node footer - iblocks https://bugzilla.kernel.org/show_bug.cgi?id=200223 - Overview BUG() triggered in f2fs_truncate_inode_blocks() when un-mounting a mounted f2fs image after writing to it - Reproduce - POC (poc.c) static void activity(char *mpoint) { char *foo_bar_baz; int err; static int buf[8192]; memset(buf, 0, sizeof(buf)); err = asprintf(&foo_bar_baz, "%s/foo/bar/baz", mpoint); // open / write / read int fd = open(foo_bar_baz, O_RDWR | O_TRUNC, 0777); if (fd >= 0) { write(fd, (char *)buf, 517); write(fd, (char *)buf, sizeof(buf)); close(fd); } } int main(int argc, char *argv[]) { activity(argv[1]); return 0; } - Kernel meesage [ 552.479723] F2FS-fs (loop0): Mounted with checkpoint version = 2 [ 556.451891] ------------[ cut here ]------------ [ 556.451899] kernel BUG at fs/f2fs/node.c:987! [ 556.452920] invalid opcode: 0000 [#1] SMP KASAN PTI [ 556.453936] CPU: 1 PID: 1310 Comm: umount Not tainted 4.18.0-rc1+ #4 [ 556.455213] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 556.457140] RIP: 0010:f2fs_truncate_inode_blocks+0x4a7/0x6f0 [ 556.458280] Code: e8 ae ea ff ff 41 89 c7 c1 e8 1f 84 c0 74 0a 41 83 ff fe 0f 85 35 ff ff ff 81 85 b0 fe ff ff fb 03 00 00 e9 f7 fd ff ff 0f 0b <0f> 0b e8 62 b7 9a 00 48 8b bd a0 fe ff ff e8 56 54 ae ff 48 8b b5 [ 556.462015] RSP: 0018:ffff8801f292f808 EFLAGS: 00010286 [ 556.463068] RAX: ffffed003e73242d RBX: ffff8801f292f958 RCX: ffffffffb88b81bc [ 556.464479] RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffff8801f3992164 [ 556.465901] RBP: ffff8801f292f980 R08: ffffed003e73242d R09: ffffed003e73242d [ 556.467311] R10: 0000000000000001 R11: ffffed003e73242c R12: 00000000fffffc64 [ 556.468706] R13: ffff8801f3992000 R14: 0000000000000058 R15: 00000000ffff8801 [ 556.470117] FS: 00007f8029297840(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000 [ 556.471702] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 556.472838] CR2: 000055f5f57305d8 CR3: 00000001f18b0000 CR4: 00000000000006e0 [ 556.474265] Call Trace: [ 556.474782] ? f2fs_alloc_nid_failed+0xf0/0xf0 [ 556.475686] ? truncate_nodes+0x980/0x980 [ 556.476516] ? pagecache_get_page+0x21f/0x2f0 [ 556.477412] ? __asan_loadN+0xf/0x20 [ 556.478153] ? __get_node_page+0x331/0x5b0 [ 556.478992] ? reweight_entity+0x1e6/0x3b0 [ 556.479826] f2fs_truncate_blocks+0x55e/0x740 [ 556.480709] ? f2fs_truncate_data_blocks+0x20/0x20 [ 556.481689] ? __radix_tree_lookup+0x34/0x160 [ 556.482630] ? radix_tree_lookup+0xd/0x10 [ 556.483445] f2fs_truncate+0xd4/0x1a0 [ 556.484206] f2fs_evict_inode+0x5ce/0x630 [ 556.485032] evict+0x16f/0x290 [ 556.485664] iput+0x280/0x300 [ 556.486300] dentry_unlink_inode+0x165/0x1e0 [ 556.487169] __dentry_kill+0x16a/0x260 [ 556.487936] dentry_kill+0x70/0x250 [ 556.488651] shrink_dentry_list+0x125/0x260 [ 556.489504] shrink_dcache_parent+0xc1/0x110 [ 556.490379] ? shrink_dcache_sb+0x200/0x200 [ 556.491231] ? bit_wait_timeout+0xc0/0xc0 [ 556.492047] do_one_tree+0x12/0x40 [ 556.492743] shrink_dcache_for_umount+0x3f/0xa0 [ 556.493656] generic_shutdown_super+0x43/0x1c0 [ 556.494561] kill_block_super+0x52/0x80 [ 556.495341] kill_f2fs_super+0x62/0x70 [ 556.496105] deactivate_locked_super+0x6f/0xa0 [ 556.497004] deactivate_super+0x5e/0x80 [ 556.497785] cleanup_mnt+0x61/0xa0 [ 556.498492] __cleanup_mnt+0x12/0x20 [ 556.499218] task_work_run+0xc8/0xf0 [ 556.499949] exit_to_usermode_loop+0x125/0x130 [ 556.500846] do_syscall_64+0x138/0x170 [ 556.501609] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 556.502659] RIP: 0033:0x7f8028b77487 [ 556.503384] Code: 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 31 f6 e9 09 00 00 00 66 0f 1f 84 00 00 00 00 00 b8 a6 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e1 c9 2b 00 f7 d8 64 89 01 48 [ 556.507137] RSP: 002b:00007fff9f2e3598 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [ 556.508637] RAX: 0000000000000000 RBX: 0000000000ebd030 RCX: 00007f8028b77487 [ 556.510069] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000ec41e0 [ 556.511481] RBP: 0000000000ec41e0 R08: 0000000000000000 R09: 0000000000000014 [ 556.512892] R10: 00000000000006b2 R11: 0000000000000246 R12: 00007f802908083c [ 556.514320] R13: 0000000000000000 R14: 0000000000ebd210 R15: 00007fff9f2e3820 [ 556.515745] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy [ 556.529276] ---[ end trace 4ce02f25ff7d3df5 ]--- [ 556.530340] RIP: 0010:f2fs_truncate_inode_blocks+0x4a7/0x6f0 [ 556.531513] Code: e8 ae ea ff ff 41 89 c7 c1 e8 1f 84 c0 74 0a 41 83 ff fe 0f 85 35 ff ff ff 81 85 b0 fe ff ff fb 03 00 00 e9 f7 fd ff ff 0f 0b <0f> 0b e8 62 b7 9a 00 48 8b bd a0 fe ff ff e8 56 54 ae ff 48 8b b5 [ 556.535330] RSP: 0018:ffff8801f292f808 EFLAGS: 00010286 [ 556.536395] RAX: ffffed003e73242d RBX: ffff8801f292f958 RCX: ffffffffb88b81bc [ 556.537824] RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffff8801f3992164 [ 556.539290] RBP: ffff8801f292f980 R08: ffffed003e73242d R09: ffffed003e73242d [ 556.540709] R10: 0000000000000001 R11: ffffed003e73242c R12: 00000000fffffc64 [ 556.542131] R13: ffff8801f3992000 R14: 0000000000000058 R15: 00000000ffff8801 [ 556.543579] FS: 00007f8029297840(0000) GS:ffff8801f6f00000(0000) knlGS:0000000000000000 [ 556.545180] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 556.546338] CR2: 000055f5f57305d8 CR3: 00000001f18b0000 CR4: 00000000000006e0 [ 556.547809] ================================================================== [ 556.549248] BUG: KASAN: stack-out-of-bounds in arch_tlb_gather_mmu+0x52/0x170 [ 556.550672] Write of size 8 at addr ffff8801f292fd10 by task umount/1310 [ 556.552338] CPU: 1 PID: 1310 Comm: umount Tainted: G D 4.18.0-rc1+ #4 [ 556.553886] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 556.555756] Call Trace: [ 556.556264] dump_stack+0x7b/0xb5 [ 556.556944] print_address_description+0x70/0x290 [ 556.557903] kasan_report+0x291/0x390 [ 556.558649] ? arch_tlb_gather_mmu+0x52/0x170 [ 556.559537] __asan_store8+0x57/0x90 [ 556.560268] arch_tlb_gather_mmu+0x52/0x170 [ 556.561110] tlb_gather_mmu+0x12/0x40 [ 556.561862] exit_mmap+0x123/0x2a0 [ 556.562555] ? __ia32_sys_munmap+0x50/0x50 [ 556.563384] ? exit_aio+0x98/0x230 [ 556.564079] ? __x32_compat_sys_io_submit+0x260/0x260 [ 556.565099] ? taskstats_exit+0x1f4/0x640 [ 556.565925] ? kasan_check_read+0x11/0x20 [ 556.566739] ? mm_update_next_owner+0x322/0x380 [ 556.567652] mmput+0x8b/0x1d0 [ 556.568260] do_exit+0x43a/0x1390 [ 556.568937] ? mm_update_next_owner+0x380/0x380 [ 556.569855] ? deactivate_super+0x5e/0x80 [ 556.570668] ? cleanup_mnt+0x61/0xa0 [ 556.571395] ? __cleanup_mnt+0x12/0x20 [ 556.572156] ? task_work_run+0xc8/0xf0 [ 556.572917] ? exit_to_usermode_loop+0x125/0x130 [ 556.573861] rewind_stack_do_exit+0x17/0x20 [ 556.574707] RIP: 0033:0x7f8028b77487 [ 556.575428] Code: Bad RIP value. [ 556.576106] RSP: 002b:00007fff9f2e3598 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [ 556.577599] RAX: 0000000000000000 RBX: 0000000000ebd030 RCX: 00007f8028b77487 [ 556.579020] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000ec41e0 [ 556.580422] RBP: 0000000000ec41e0 R08: 0000000000000000 R09: 0000000000000014 [ 556.581833] R10: 00000000000006b2 R11: 0000000000000246 R12: 00007f802908083c [ 556.583252] R13: 0000000000000000 R14: 0000000000ebd210 R15: 00007fff9f2e3820 [ 556.584983] The buggy address belongs to the page: [ 556.585961] page:ffffea0007ca4bc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 [ 556.587540] flags: 0x2ffff0000000000() [ 556.588296] raw: 02ffff0000000000 0000000000000000 dead000000000200 0000000000000000 [ 556.589822] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 [ 556.591359] page dumped because: kasan: bad access detected [ 556.592786] Memory state around the buggy address: [ 556.593753] ffff8801f292fc00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 556.595191] ffff8801f292fc80: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 00 00 00 [ 556.596613] >ffff8801f292fd00: 00 00 f3 00 00 00 00 f3 f3 00 00 00 00 f4 f4 f4 [ 556.598044] ^ [ 556.598797] ffff8801f292fd80: f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 [ 556.600225] ffff8801f292fe00: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 f4 f4 f4 [ 556.601647] ================================================================== - Location https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/f2fs/node.c#L987 case NODE_DIND_BLOCK: err = truncate_nodes(&dn, nofs, offset[1], 3); cont = 0; break; default: BUG(); <--- } Reported-by Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/inode.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 428d502f23e82..be7d9773d2917 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -180,9 +180,30 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page) ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page)); } -static bool sanity_check_inode(struct inode *inode) +static bool sanity_check_inode(struct inode *inode, struct page *node_page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + unsigned long long iblocks; + + iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks); + if (!iblocks) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, " + "run fsck to fix.", + __func__, inode->i_ino, iblocks); + return false; + } + + if (ino_of_node(node_page) != nid_of_node(node_page)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: corrupted inode footer i_ino=%lx, ino,nid: " + "[%u, %u] run fsck to fix.", + __func__, inode->i_ino, + ino_of_node(node_page), nid_of_node(node_page)); + return false; + } if (f2fs_has_extra_attr(inode) && !f2fs_sb_has_extra_attr(sbi->sb)) { @@ -242,7 +263,7 @@ static int do_read_inode(struct inode *inode) get_inline_info(inode, ri); - if (!sanity_check_inode(inode)) { + if (!sanity_check_inode(inode, node_page)) { f2fs_put_page(node_page, 1); return -EINVAL; } -- GitLab From ad19d1e78fd51f5b4bf3ebbcbf3a133c8c03c49d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 1 Aug 2018 19:13:44 +0800 Subject: [PATCH 1843/2269] f2fs: fix to do sanity check with block address in main area commit c9b60788fc760d136211853f10ce73dc152d1f4a upstream. This patch add to do sanity check with below field: - cp_pack_total_block_count - blkaddr of data/node - extent info - Overview BUG() in verify_block_addr() when writing to a corrupted f2fs image - Reproduce (4.18 upstream kernel) - POC (poc.c) static void activity(char *mpoint) { char *foo_bar_baz; int err; static int buf[8192]; memset(buf, 0, sizeof(buf)); err = asprintf(&foo_bar_baz, "%s/foo/bar/baz", mpoint); int fd = open(foo_bar_baz, O_RDWR | O_TRUNC, 0777); if (fd >= 0) { write(fd, (char *)buf, sizeof(buf)); fdatasync(fd); close(fd); } } int main(int argc, char *argv[]) { activity(argv[1]); return 0; } - Kernel message [ 689.349473] F2FS-fs (loop0): Mounted with checkpoint version = 3 [ 699.728662] WARNING: CPU: 0 PID: 1309 at fs/f2fs/segment.c:2860 f2fs_inplace_write_data+0x232/0x240 [ 699.728670] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy [ 699.729056] CPU: 0 PID: 1309 Comm: a.out Not tainted 4.18.0-rc1+ #4 [ 699.729064] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 699.729074] RIP: 0010:f2fs_inplace_write_data+0x232/0x240 [ 699.729076] Code: ff e9 cf fe ff ff 49 8d 7d 10 e8 39 45 ad ff 4d 8b 7d 10 be 04 00 00 00 49 8d 7f 48 e8 07 49 ad ff 45 8b 7f 48 e9 fb fe ff ff <0f> 0b f0 41 80 4d 48 04 e9 65 fe ff ff 90 66 66 66 66 90 55 48 8d [ 699.729130] RSP: 0018:ffff8801f43af568 EFLAGS: 00010202 [ 699.729139] RAX: 000000000000003f RBX: ffff8801f43af7b8 RCX: ffffffffb88c9113 [ 699.729142] RDX: 0000000000000003 RSI: dffffc0000000000 RDI: ffff8802024e5540 [ 699.729144] RBP: ffff8801f43af590 R08: 0000000000000009 R09: ffffffffffffffe8 [ 699.729147] R10: 0000000000000001 R11: ffffed0039b0596a R12: ffff8802024e5540 [ 699.729149] R13: ffff8801f0335500 R14: ffff8801e3e7a700 R15: ffff8801e1ee4450 [ 699.729154] FS: 00007f9bf97f5700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 [ 699.729156] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 699.729159] CR2: 00007f9bf925d170 CR3: 00000001f0c34000 CR4: 00000000000006f0 [ 699.729171] Call Trace: [ 699.729192] f2fs_do_write_data_page+0x2e2/0xe00 [ 699.729203] ? f2fs_should_update_outplace+0xd0/0xd0 [ 699.729238] ? memcg_drain_all_list_lrus+0x280/0x280 [ 699.729269] ? __radix_tree_replace+0xa3/0x120 [ 699.729276] __write_data_page+0x5c7/0xe30 [ 699.729291] ? kasan_check_read+0x11/0x20 [ 699.729310] ? page_mapped+0x8a/0x110 [ 699.729321] ? page_mkclean+0xe9/0x160 [ 699.729327] ? f2fs_do_write_data_page+0xe00/0xe00 [ 699.729331] ? invalid_page_referenced_vma+0x130/0x130 [ 699.729345] ? clear_page_dirty_for_io+0x332/0x450 [ 699.729351] f2fs_write_cache_pages+0x4ca/0x860 [ 699.729358] ? __write_data_page+0xe30/0xe30 [ 699.729374] ? percpu_counter_add_batch+0x22/0xa0 [ 699.729380] ? kasan_check_write+0x14/0x20 [ 699.729391] ? _raw_spin_lock+0x17/0x40 [ 699.729403] ? f2fs_mark_inode_dirty_sync.part.18+0x16/0x30 [ 699.729413] ? iov_iter_advance+0x113/0x640 [ 699.729418] ? f2fs_write_end+0x133/0x2e0 [ 699.729423] ? balance_dirty_pages_ratelimited+0x239/0x640 [ 699.729428] f2fs_write_data_pages+0x329/0x520 [ 699.729433] ? generic_perform_write+0x250/0x320 [ 699.729438] ? f2fs_write_cache_pages+0x860/0x860 [ 699.729454] ? current_time+0x110/0x110 [ 699.729459] ? f2fs_preallocate_blocks+0x1ef/0x370 [ 699.729464] do_writepages+0x37/0xb0 [ 699.729468] ? f2fs_write_cache_pages+0x860/0x860 [ 699.729472] ? do_writepages+0x37/0xb0 [ 699.729478] __filemap_fdatawrite_range+0x19a/0x1f0 [ 699.729483] ? delete_from_page_cache_batch+0x4e0/0x4e0 [ 699.729496] ? __vfs_write+0x2b2/0x410 [ 699.729501] file_write_and_wait_range+0x66/0xb0 [ 699.729506] f2fs_do_sync_file+0x1f9/0xd90 [ 699.729511] ? truncate_partial_data_page+0x290/0x290 [ 699.729521] ? __sb_end_write+0x30/0x50 [ 699.729526] ? vfs_write+0x20f/0x260 [ 699.729530] f2fs_sync_file+0x9a/0xb0 [ 699.729534] ? f2fs_do_sync_file+0xd90/0xd90 [ 699.729548] vfs_fsync_range+0x68/0x100 [ 699.729554] ? __fget_light+0xc9/0xe0 [ 699.729558] do_fsync+0x3d/0x70 [ 699.729562] __x64_sys_fdatasync+0x24/0x30 [ 699.729585] do_syscall_64+0x78/0x170 [ 699.729595] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 699.729613] RIP: 0033:0x7f9bf930d800 [ 699.729615] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 83 3d 49 bf 2c 00 00 75 10 b8 4b 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be 78 01 00 48 89 04 24 [ 699.729668] RSP: 002b:00007ffee3606c68 EFLAGS: 00000246 ORIG_RAX: 000000000000004b [ 699.729673] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9bf930d800 [ 699.729675] RDX: 0000000000008000 RSI: 00000000006010a0 RDI: 0000000000000003 [ 699.729678] RBP: 00007ffee3606ca0 R08: 0000000001503010 R09: 0000000000000000 [ 699.729680] R10: 00000000000002e8 R11: 0000000000000246 R12: 0000000000400610 [ 699.729683] R13: 00007ffee3606da0 R14: 0000000000000000 R15: 0000000000000000 [ 699.729687] ---[ end trace 4ce02f25ff7d3df5 ]--- [ 699.729782] ------------[ cut here ]------------ [ 699.729785] kernel BUG at fs/f2fs/segment.h:654! [ 699.731055] invalid opcode: 0000 [#1] SMP KASAN PTI [ 699.732104] CPU: 0 PID: 1309 Comm: a.out Tainted: G W 4.18.0-rc1+ #4 [ 699.733684] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 699.735611] RIP: 0010:f2fs_submit_page_bio+0x29b/0x730 [ 699.736649] Code: 54 49 8d bd 18 04 00 00 e8 b2 59 af ff 41 8b 8d 18 04 00 00 8b 45 b8 41 d3 e6 44 01 f0 4c 8d 73 14 41 39 c7 0f 82 37 fe ff ff <0f> 0b 65 8b 05 2c 04 77 47 89 c0 48 0f a3 05 52 c1 d5 01 0f 92 c0 [ 699.740524] RSP: 0018:ffff8801f43af508 EFLAGS: 00010283 [ 699.741573] RAX: 0000000000000000 RBX: ffff8801f43af7b8 RCX: ffffffffb88a7cef [ 699.743006] RDX: 0000000000000007 RSI: dffffc0000000000 RDI: ffff8801e3e7a64c [ 699.744426] RBP: ffff8801f43af558 R08: ffffed003e066b55 R09: ffffed003e066b55 [ 699.745833] R10: 0000000000000001 R11: ffffed003e066b54 R12: ffffea0007876940 [ 699.747256] R13: ffff8801f0335500 R14: ffff8801e3e7a600 R15: 0000000000000001 [ 699.748683] FS: 00007f9bf97f5700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 [ 699.750293] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 699.751462] CR2: 00007f9bf925d170 CR3: 00000001f0c34000 CR4: 00000000000006f0 [ 699.752874] Call Trace: [ 699.753386] ? f2fs_inplace_write_data+0x93/0x240 [ 699.754341] f2fs_inplace_write_data+0xd2/0x240 [ 699.755271] f2fs_do_write_data_page+0x2e2/0xe00 [ 699.756214] ? f2fs_should_update_outplace+0xd0/0xd0 [ 699.757215] ? memcg_drain_all_list_lrus+0x280/0x280 [ 699.758209] ? __radix_tree_replace+0xa3/0x120 [ 699.759164] __write_data_page+0x5c7/0xe30 [ 699.760002] ? kasan_check_read+0x11/0x20 [ 699.760823] ? page_mapped+0x8a/0x110 [ 699.761573] ? page_mkclean+0xe9/0x160 [ 699.762345] ? f2fs_do_write_data_page+0xe00/0xe00 [ 699.763332] ? invalid_page_referenced_vma+0x130/0x130 [ 699.764374] ? clear_page_dirty_for_io+0x332/0x450 [ 699.765347] f2fs_write_cache_pages+0x4ca/0x860 [ 699.766276] ? __write_data_page+0xe30/0xe30 [ 699.767161] ? percpu_counter_add_batch+0x22/0xa0 [ 699.768112] ? kasan_check_write+0x14/0x20 [ 699.768951] ? _raw_spin_lock+0x17/0x40 [ 699.769739] ? f2fs_mark_inode_dirty_sync.part.18+0x16/0x30 [ 699.770885] ? iov_iter_advance+0x113/0x640 [ 699.771743] ? f2fs_write_end+0x133/0x2e0 [ 699.772569] ? balance_dirty_pages_ratelimited+0x239/0x640 [ 699.773680] f2fs_write_data_pages+0x329/0x520 [ 699.774603] ? generic_perform_write+0x250/0x320 [ 699.775544] ? f2fs_write_cache_pages+0x860/0x860 [ 699.776510] ? current_time+0x110/0x110 [ 699.777299] ? f2fs_preallocate_blocks+0x1ef/0x370 [ 699.778279] do_writepages+0x37/0xb0 [ 699.779026] ? f2fs_write_cache_pages+0x860/0x860 [ 699.779978] ? do_writepages+0x37/0xb0 [ 699.780755] __filemap_fdatawrite_range+0x19a/0x1f0 [ 699.781746] ? delete_from_page_cache_batch+0x4e0/0x4e0 [ 699.782820] ? __vfs_write+0x2b2/0x410 [ 699.783597] file_write_and_wait_range+0x66/0xb0 [ 699.784540] f2fs_do_sync_file+0x1f9/0xd90 [ 699.785381] ? truncate_partial_data_page+0x290/0x290 [ 699.786415] ? __sb_end_write+0x30/0x50 [ 699.787204] ? vfs_write+0x20f/0x260 [ 699.787941] f2fs_sync_file+0x9a/0xb0 [ 699.788694] ? f2fs_do_sync_file+0xd90/0xd90 [ 699.789572] vfs_fsync_range+0x68/0x100 [ 699.790360] ? __fget_light+0xc9/0xe0 [ 699.791128] do_fsync+0x3d/0x70 [ 699.791779] __x64_sys_fdatasync+0x24/0x30 [ 699.792614] do_syscall_64+0x78/0x170 [ 699.793371] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 699.794406] RIP: 0033:0x7f9bf930d800 [ 699.795134] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 83 3d 49 bf 2c 00 00 75 10 b8 4b 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be 78 01 00 48 89 04 24 [ 699.798960] RSP: 002b:00007ffee3606c68 EFLAGS: 00000246 ORIG_RAX: 000000000000004b [ 699.800483] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9bf930d800 [ 699.801923] RDX: 0000000000008000 RSI: 00000000006010a0 RDI: 0000000000000003 [ 699.803373] RBP: 00007ffee3606ca0 R08: 0000000001503010 R09: 0000000000000000 [ 699.804798] R10: 00000000000002e8 R11: 0000000000000246 R12: 0000000000400610 [ 699.806233] R13: 00007ffee3606da0 R14: 0000000000000000 R15: 0000000000000000 [ 699.807667] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy [ 699.817079] ---[ end trace 4ce02f25ff7d3df6 ]--- [ 699.818068] RIP: 0010:f2fs_submit_page_bio+0x29b/0x730 [ 699.819114] Code: 54 49 8d bd 18 04 00 00 e8 b2 59 af ff 41 8b 8d 18 04 00 00 8b 45 b8 41 d3 e6 44 01 f0 4c 8d 73 14 41 39 c7 0f 82 37 fe ff ff <0f> 0b 65 8b 05 2c 04 77 47 89 c0 48 0f a3 05 52 c1 d5 01 0f 92 c0 [ 699.822919] RSP: 0018:ffff8801f43af508 EFLAGS: 00010283 [ 699.823977] RAX: 0000000000000000 RBX: ffff8801f43af7b8 RCX: ffffffffb88a7cef [ 699.825436] RDX: 0000000000000007 RSI: dffffc0000000000 RDI: ffff8801e3e7a64c [ 699.826881] RBP: ffff8801f43af558 R08: ffffed003e066b55 R09: ffffed003e066b55 [ 699.828292] R10: 0000000000000001 R11: ffffed003e066b54 R12: ffffea0007876940 [ 699.829750] R13: ffff8801f0335500 R14: ffff8801e3e7a600 R15: 0000000000000001 [ 699.831192] FS: 00007f9bf97f5700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 [ 699.832793] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 699.833981] CR2: 00007f9bf925d170 CR3: 00000001f0c34000 CR4: 00000000000006f0 [ 699.835556] ================================================================== [ 699.837029] BUG: KASAN: stack-out-of-bounds in update_stack_state+0x38c/0x3e0 [ 699.838462] Read of size 8 at addr ffff8801f43af970 by task a.out/1309 [ 699.840086] CPU: 0 PID: 1309 Comm: a.out Tainted: G D W 4.18.0-rc1+ #4 [ 699.841603] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 699.843475] Call Trace: [ 699.843982] dump_stack+0x7b/0xb5 [ 699.844661] print_address_description+0x70/0x290 [ 699.845607] kasan_report+0x291/0x390 [ 699.846351] ? update_stack_state+0x38c/0x3e0 [ 699.853831] __asan_load8+0x54/0x90 [ 699.854569] update_stack_state+0x38c/0x3e0 [ 699.855428] ? __read_once_size_nocheck.constprop.7+0x20/0x20 [ 699.856601] ? __save_stack_trace+0x5e/0x100 [ 699.857476] unwind_next_frame.part.5+0x18e/0x490 [ 699.858448] ? unwind_dump+0x290/0x290 [ 699.859217] ? clear_page_dirty_for_io+0x332/0x450 [ 699.860185] __unwind_start+0x106/0x190 [ 699.860974] __save_stack_trace+0x5e/0x100 [ 699.861808] ? __save_stack_trace+0x5e/0x100 [ 699.862691] ? unlink_anon_vmas+0xba/0x2c0 [ 699.863525] save_stack_trace+0x1f/0x30 [ 699.864312] save_stack+0x46/0xd0 [ 699.864993] ? __alloc_pages_slowpath+0x1420/0x1420 [ 699.865990] ? flush_tlb_mm_range+0x15e/0x220 [ 699.866889] ? kasan_check_write+0x14/0x20 [ 699.867724] ? __dec_node_state+0x92/0xb0 [ 699.868543] ? lock_page_memcg+0x85/0xf0 [ 699.869350] ? unlock_page_memcg+0x16/0x80 [ 699.870185] ? page_remove_rmap+0x198/0x520 [ 699.871048] ? mark_page_accessed+0x133/0x200 [ 699.871930] ? _cond_resched+0x1a/0x50 [ 699.872700] ? unmap_page_range+0xcd4/0xe50 [ 699.873551] ? rb_next+0x58/0x80 [ 699.874217] ? rb_next+0x58/0x80 [ 699.874895] __kasan_slab_free+0x13c/0x1a0 [ 699.875734] ? unlink_anon_vmas+0xba/0x2c0 [ 699.876563] kasan_slab_free+0xe/0x10 [ 699.877315] kmem_cache_free+0x89/0x1e0 [ 699.878095] unlink_anon_vmas+0xba/0x2c0 [ 699.878913] free_pgtables+0x101/0x1b0 [ 699.879677] exit_mmap+0x146/0x2a0 [ 699.880378] ? __ia32_sys_munmap+0x50/0x50 [ 699.881214] ? kasan_check_read+0x11/0x20 [ 699.882052] ? mm_update_next_owner+0x322/0x380 [ 699.882985] mmput+0x8b/0x1d0 [ 699.883602] do_exit+0x43a/0x1390 [ 699.884288] ? mm_update_next_owner+0x380/0x380 [ 699.885212] ? f2fs_sync_file+0x9a/0xb0 [ 699.885995] ? f2fs_do_sync_file+0xd90/0xd90 [ 699.886877] ? vfs_fsync_range+0x68/0x100 [ 699.887694] ? __fget_light+0xc9/0xe0 [ 699.888442] ? do_fsync+0x3d/0x70 [ 699.889118] ? __x64_sys_fdatasync+0x24/0x30 [ 699.889996] rewind_stack_do_exit+0x17/0x20 [ 699.890860] RIP: 0033:0x7f9bf930d800 [ 699.891585] Code: Bad RIP value. [ 699.892268] RSP: 002b:00007ffee3606c68 EFLAGS: 00000246 ORIG_RAX: 000000000000004b [ 699.893781] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9bf930d800 [ 699.895220] RDX: 0000000000008000 RSI: 00000000006010a0 RDI: 0000000000000003 [ 699.896643] RBP: 00007ffee3606ca0 R08: 0000000001503010 R09: 0000000000000000 [ 699.898069] R10: 00000000000002e8 R11: 0000000000000246 R12: 0000000000400610 [ 699.899505] R13: 00007ffee3606da0 R14: 0000000000000000 R15: 0000000000000000 [ 699.901241] The buggy address belongs to the page: [ 699.902215] page:ffffea0007d0ebc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 [ 699.903811] flags: 0x2ffff0000000000() [ 699.904585] raw: 02ffff0000000000 0000000000000000 ffffffff07d00101 0000000000000000 [ 699.906125] raw: 0000000000000000 0000000000240000 00000000ffffffff 0000000000000000 [ 699.907673] page dumped because: kasan: bad access detected [ 699.909108] Memory state around the buggy address: [ 699.910077] ffff8801f43af800: 00 f1 f1 f1 f1 00 f4 f4 f4 f3 f3 f3 f3 00 00 00 [ 699.911528] ffff8801f43af880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 699.912953] >ffff8801f43af900: 00 00 00 00 00 00 00 00 f1 01 f4 f4 f4 f2 f2 f2 [ 699.914392] ^ [ 699.915758] ffff8801f43af980: f2 00 f4 f4 00 00 00 00 f2 00 00 00 00 00 00 00 [ 699.917193] ffff8801f43afa00: 00 00 00 00 00 00 00 00 00 f3 f3 f3 00 00 00 00 [ 699.918634] ================================================================== - Location https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/f2fs/segment.h#L644 Reported-by Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim [bwh: Backported to 4.14: - Error label is different in validate_checkpoint() due to the earlier backport of "f2fs: fix invalid memory access" - Adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/checkpoint.c | 22 +++++++++++++++++++--- fs/f2fs/data.c | 33 +++++++++++++++++++++++++++------ fs/f2fs/f2fs.h | 3 +++ fs/f2fs/file.c | 12 ++++++++++++ fs/f2fs/inode.c | 17 +++++++++++++++++ fs/f2fs/node.c | 4 ++++ fs/f2fs/segment.h | 3 +-- 7 files changed, 83 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index d7bd9745e8836..c81cd5057b8ee 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -86,8 +86,10 @@ repeat: fio.page = page; if (f2fs_submit_page_bio(&fio)) { - f2fs_put_page(page, 1); - goto repeat; + memset(page_address(page), 0, PAGE_SIZE); + f2fs_stop_checkpoint(sbi, false); + f2fs_bug_on(sbi, 1); + return page; } lock_page(page); @@ -141,8 +143,14 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, case META_POR: case DATA_GENERIC: if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || - blkaddr < MAIN_BLKADDR(sbi))) + blkaddr < MAIN_BLKADDR(sbi))) { + if (type == DATA_GENERIC) { + f2fs_msg(sbi->sb, KERN_WARNING, + "access invalid blkaddr:%u", blkaddr); + WARN_ON(1); + } return false; + } break; case META_GENERIC: if (unlikely(blkaddr < SEG0_BLKADDR(sbi) || @@ -746,6 +754,14 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, &cp_page_1, version); if (err) return NULL; + + if (le32_to_cpu(cp_block->cp_pack_total_block_count) > + sbi->blocks_per_seg) { + f2fs_msg(sbi->sb, KERN_WARNING, + "invalid cp_pack_total_block_count:%u", + le32_to_cpu(cp_block->cp_pack_total_block_count)); + goto invalid_cp; + } pre_version = *version; cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6158788066110..8f6e7c3a10f83 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -369,7 +369,10 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page; - verify_block_addr(fio, fio->new_blkaddr); + if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, + __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) + return -EFAULT; + trace_f2fs_submit_page_bio(page, fio); f2fs_trace_ios(fio, 0); @@ -946,6 +949,12 @@ next_dnode: next_block: blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); + if (__is_valid_data_blkaddr(blkaddr) && + !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) { + err = -EFAULT; + goto sync_out; + } + if (!is_valid_data_blkaddr(sbi, blkaddr)) { if (create) { if (unlikely(f2fs_cp_error(sbi))) { @@ -1264,6 +1273,10 @@ got_it: SetPageUptodate(page); goto confused; } + + if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, + DATA_GENERIC)) + goto set_error_page; } else { zero_user_segment(page, 0, PAGE_SIZE); if (!PageUptodate(page)) @@ -1402,11 +1415,13 @@ int do_write_data_page(struct f2fs_io_info *fio) f2fs_lookup_extent_cache(inode, page->index, &ei)) { fio->old_blkaddr = ei.blk + page->index - ei.fofs; - if (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr)) { - ipu_force = true; - fio->need_lock = LOCK_DONE; - goto got_it; - } + if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, + DATA_GENERIC)) + return -EFAULT; + + ipu_force = true; + fio->need_lock = LOCK_DONE; + goto got_it; } /* Deadlock due to between page->lock and f2fs_lock_op */ @@ -1425,6 +1440,12 @@ int do_write_data_page(struct f2fs_io_info *fio) goto out_writepage; } got_it: + if (__is_valid_data_blkaddr(fio->old_blkaddr) && + !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, + DATA_GENERIC)) { + err = -EFAULT; + goto out_writepage; + } /* * If current allocation needs SSR, * it had better in-place writes for updated data. diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d15d79457f5c3..3f1a446960362 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2357,6 +2357,9 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, spin_unlock(&sbi->iostat_lock); } +#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META && \ + (!is_read_io(fio->op) || fio->is_meta)) + bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index e9c575ef70b5c..7d3189f1941cc 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -397,6 +397,13 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); + if (__is_valid_data_blkaddr(blkaddr) && + !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), + blkaddr, DATA_GENERIC)) { + f2fs_put_dnode(&dn); + goto fail; + } + if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty, pgofs, whence)) { f2fs_put_dnode(&dn); @@ -496,6 +503,11 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) dn->data_blkaddr = NULL_ADDR; set_data_blkaddr(dn); + + if (__is_valid_data_blkaddr(blkaddr) && + !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) + continue; + invalidate_blocks(sbi, blkaddr); if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index be7d9773d2917..aeed9943836a3 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -214,6 +214,23 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) __func__, inode->i_ino); return false; } + + if (F2FS_I(inode)->extent_tree) { + struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest; + + if (ei->len && + (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC) || + !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1, + DATA_GENERIC))) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: inode (ino=%lx) extent info [%u, %u, %u] " + "is incorrect, run fsck to fix", + __func__, inode->i_ino, + ei->blk, ei->fofs, ei->len); + return false; + } + } return true; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 999814c8cbead..6adb6c60f017c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1398,6 +1398,10 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, return 0; } + if (__is_valid_data_blkaddr(ni.blk_addr) && + !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) + goto redirty_out; + if (atomic && !test_opt(sbi, NOBARRIER)) fio.op_flags |= REQ_PREFLUSH | REQ_FUA; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index dd8f977fc273e..47348d98165b2 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -629,8 +629,7 @@ static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr) { struct f2fs_sb_info *sbi = fio->sbi; - if (PAGE_TYPE_OF_BIO(fio->type) == META && - (!is_read_io(fio->op) || fio->is_meta)) + if (__is_meta_io(fio)) verify_blkaddr(sbi, blk_addr, META_GENERIC); else verify_blkaddr(sbi, blk_addr, DATA_GENERIC); -- GitLab From d7d9d29a837358636e12fe09c90a7882b53b2220 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 8 Jul 2018 22:16:55 +0800 Subject: [PATCH 1844/2269] f2fs: fix to do sanity check with i_extra_isize commit 18dd6470c2d14d10f5a2dd926925dc80dbd3abfd upstream. If inode.i_extra_isize was fuzzed to an abnormal value, when calculating inline data size, the result will overflow, result in accessing invalid memory area when operating inline data. Let's do sanity check with i_extra_isize during inode loading for fixing. https://bugzilla.kernel.org/show_bug.cgi?id=200421 - Reproduce - POC (poc.c) #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void activity(char *mpoint) { char *foo_bar_baz; char *foo_baz; char *xattr; int err; err = asprintf(&foo_bar_baz, "%s/foo/bar/baz", mpoint); err = asprintf(&foo_baz, "%s/foo/baz", mpoint); err = asprintf(&xattr, "%s/foo/bar/xattr", mpoint); rename(foo_bar_baz, foo_baz); char buf2[113]; memset(buf2, 0, sizeof(buf2)); listxattr(xattr, buf2, sizeof(buf2)); removexattr(xattr, "user.mime_type"); } int main(int argc, char *argv[]) { activity(argv[1]); return 0; } - Kernel message Umount the image will leave the following message [ 2910.995489] F2FS-fs (loop0): Mounted with checkpoint version = 2 [ 2918.416465] ================================================================== [ 2918.416807] BUG: KASAN: slab-out-of-bounds in f2fs_iget+0xcb9/0x1a80 [ 2918.417009] Read of size 4 at addr ffff88018efc2068 by task a.out/1229 [ 2918.417311] CPU: 1 PID: 1229 Comm: a.out Not tainted 4.17.0+ #1 [ 2918.417314] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 2918.417323] Call Trace: [ 2918.417366] dump_stack+0x71/0xab [ 2918.417401] print_address_description+0x6b/0x290 [ 2918.417407] kasan_report+0x28e/0x390 [ 2918.417411] ? f2fs_iget+0xcb9/0x1a80 [ 2918.417415] f2fs_iget+0xcb9/0x1a80 [ 2918.417422] ? f2fs_lookup+0x2e7/0x580 [ 2918.417425] f2fs_lookup+0x2e7/0x580 [ 2918.417433] ? __recover_dot_dentries+0x400/0x400 [ 2918.417447] ? legitimize_path.isra.29+0x5a/0xa0 [ 2918.417453] __lookup_slow+0x11c/0x220 [ 2918.417457] ? may_delete+0x2a0/0x2a0 [ 2918.417475] ? deref_stack_reg+0xe0/0xe0 [ 2918.417479] ? __lookup_hash+0xb0/0xb0 [ 2918.417483] lookup_slow+0x3e/0x60 [ 2918.417488] walk_component+0x3ac/0x990 [ 2918.417492] ? generic_permission+0x51/0x1e0 [ 2918.417495] ? inode_permission+0x51/0x1d0 [ 2918.417499] ? pick_link+0x3e0/0x3e0 [ 2918.417502] ? link_path_walk+0x4b1/0x770 [ 2918.417513] ? _raw_spin_lock_irqsave+0x25/0x50 [ 2918.417518] ? walk_component+0x990/0x990 [ 2918.417522] ? path_init+0x2e6/0x580 [ 2918.417526] path_lookupat+0x13f/0x430 [ 2918.417531] ? trailing_symlink+0x3a0/0x3a0 [ 2918.417534] ? do_renameat2+0x270/0x7b0 [ 2918.417538] ? __kasan_slab_free+0x14c/0x190 [ 2918.417541] ? do_renameat2+0x270/0x7b0 [ 2918.417553] ? kmem_cache_free+0x85/0x1e0 [ 2918.417558] ? do_renameat2+0x270/0x7b0 [ 2918.417563] filename_lookup+0x13c/0x280 [ 2918.417567] ? filename_parentat+0x2b0/0x2b0 [ 2918.417572] ? kasan_unpoison_shadow+0x31/0x40 [ 2918.417575] ? kasan_kmalloc+0xa6/0xd0 [ 2918.417593] ? strncpy_from_user+0xaa/0x1c0 [ 2918.417598] ? getname_flags+0x101/0x2b0 [ 2918.417614] ? path_listxattr+0x87/0x110 [ 2918.417619] path_listxattr+0x87/0x110 [ 2918.417623] ? listxattr+0xc0/0xc0 [ 2918.417637] ? mm_fault_error+0x1b0/0x1b0 [ 2918.417654] do_syscall_64+0x73/0x160 [ 2918.417660] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 2918.417676] RIP: 0033:0x7f2f3a3480d7 [ 2918.417677] Code: f0 ff ff 73 01 c3 48 8b 0d be dd 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 c2 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 dd 2b 00 f7 d8 64 89 01 48 [ 2918.417732] RSP: 002b:00007fff4095b7d8 EFLAGS: 00000206 ORIG_RAX: 00000000000000c2 [ 2918.417744] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f2f3a3480d7 [ 2918.417746] RDX: 0000000000000071 RSI: 00007fff4095b810 RDI: 000000000126a0c0 [ 2918.417749] RBP: 00007fff4095b890 R08: 000000000126a010 R09: 0000000000000000 [ 2918.417751] R10: 00000000000001ab R11: 0000000000000206 R12: 00000000004005e0 [ 2918.417753] R13: 00007fff4095b990 R14: 0000000000000000 R15: 0000000000000000 [ 2918.417853] Allocated by task 329: [ 2918.418002] kasan_kmalloc+0xa6/0xd0 [ 2918.418007] kmem_cache_alloc+0xc8/0x1e0 [ 2918.418023] mempool_init_node+0x194/0x230 [ 2918.418027] mempool_init+0x12/0x20 [ 2918.418042] bioset_init+0x2bd/0x380 [ 2918.418052] blk_alloc_queue_node+0xe9/0x540 [ 2918.418075] dm_create+0x2c0/0x800 [ 2918.418080] dev_create+0xd2/0x530 [ 2918.418083] ctl_ioctl+0x2a3/0x5b0 [ 2918.418087] dm_ctl_ioctl+0xa/0x10 [ 2918.418092] do_vfs_ioctl+0x13e/0x8c0 [ 2918.418095] ksys_ioctl+0x66/0x70 [ 2918.418098] __x64_sys_ioctl+0x3d/0x50 [ 2918.418102] do_syscall_64+0x73/0x160 [ 2918.418106] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 2918.418204] Freed by task 0: [ 2918.418301] (stack is not available) [ 2918.418521] The buggy address belongs to the object at ffff88018efc0000 which belongs to the cache biovec-max of size 8192 [ 2918.418894] The buggy address is located 104 bytes to the right of 8192-byte region [ffff88018efc0000, ffff88018efc2000) [ 2918.419257] The buggy address belongs to the page: [ 2918.419431] page:ffffea00063bf000 count:1 mapcount:0 mapping:ffff8801f2242540 index:0x0 compound_mapcount: 0 [ 2918.419702] flags: 0x17fff8000008100(slab|head) [ 2918.419879] raw: 017fff8000008100 dead000000000100 dead000000000200 ffff8801f2242540 [ 2918.420101] raw: 0000000000000000 0000000000030003 00000001ffffffff 0000000000000000 [ 2918.420322] page dumped because: kasan: bad access detected [ 2918.420599] Memory state around the buggy address: [ 2918.420764] ffff88018efc1f00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2918.420975] ffff88018efc1f80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2918.421194] >ffff88018efc2000: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 2918.421406] ^ [ 2918.421627] ffff88018efc2080: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 2918.421838] ffff88018efc2100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2918.422046] ================================================================== [ 2918.422264] Disabling lock debugging due to kernel taint [ 2923.901641] BUG: unable to handle kernel paging request at ffff88018f0db000 [ 2923.901884] PGD 22226a067 P4D 22226a067 PUD 222273067 PMD 18e642063 PTE 800000018f0db061 [ 2923.902120] Oops: 0003 [#1] SMP KASAN PTI [ 2923.902274] CPU: 1 PID: 1231 Comm: umount Tainted: G B 4.17.0+ #1 [ 2923.902490] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 2923.902761] RIP: 0010:__memset+0x24/0x30 [ 2923.902906] Code: 90 90 90 90 90 90 66 66 90 66 90 49 89 f9 48 89 d1 83 e2 07 48 c1 e9 03 40 0f b6 f6 48 b8 01 01 01 01 01 01 01 01 48 0f af c6 48 ab 89 d1 f3 aa 4c 89 c8 c3 90 49 89 f9 40 88 f0 48 89 d1 f3 [ 2923.903446] RSP: 0018:ffff88018ddf7ae0 EFLAGS: 00010206 [ 2923.903622] RAX: 0000000000000000 RBX: ffff8801d549d888 RCX: 1ffffffffffdaffb [ 2923.903833] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88018f0daffc [ 2923.904062] RBP: ffff88018efc206c R08: 1ffff10031df840d R09: ffff88018efc206c [ 2923.904273] R10: ffffffffffffe1ee R11: ffffed0031df65fa R12: 0000000000000000 [ 2923.904485] R13: ffff8801d549dc98 R14: 00000000ffffc3db R15: ffffea00063bec80 [ 2923.904693] FS: 00007fa8b2f8a840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000 [ 2923.904937] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2923.910080] CR2: ffff88018f0db000 CR3: 000000018f892000 CR4: 00000000000006e0 [ 2923.914930] Call Trace: [ 2923.919724] f2fs_truncate_inline_inode+0x114/0x170 [ 2923.924487] f2fs_truncate_blocks+0x11b/0x7c0 [ 2923.929178] ? f2fs_truncate_data_blocks+0x10/0x10 [ 2923.933834] ? dqget+0x670/0x670 [ 2923.938437] ? f2fs_destroy_extent_tree+0xd6/0x270 [ 2923.943107] ? __radix_tree_lookup+0x2f/0x150 [ 2923.947772] f2fs_truncate+0xd4/0x1a0 [ 2923.952491] f2fs_evict_inode+0x5ab/0x610 [ 2923.957204] evict+0x15f/0x280 [ 2923.961898] __dentry_kill+0x161/0x250 [ 2923.966634] shrink_dentry_list+0xf3/0x250 [ 2923.971897] shrink_dcache_parent+0xa9/0x100 [ 2923.976561] ? shrink_dcache_sb+0x1f0/0x1f0 [ 2923.981177] ? wait_for_completion+0x8a/0x210 [ 2923.985781] ? migrate_swap_stop+0x2d0/0x2d0 [ 2923.990332] do_one_tree+0xe/0x40 [ 2923.994735] shrink_dcache_for_umount+0x3a/0xa0 [ 2923.999077] generic_shutdown_super+0x3e/0x1c0 [ 2924.003350] kill_block_super+0x4b/0x70 [ 2924.007619] deactivate_locked_super+0x65/0x90 [ 2924.011812] cleanup_mnt+0x5c/0xa0 [ 2924.015995] task_work_run+0xce/0xf0 [ 2924.020174] exit_to_usermode_loop+0x115/0x120 [ 2924.024293] do_syscall_64+0x12f/0x160 [ 2924.028479] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 2924.032709] RIP: 0033:0x7fa8b2868487 [ 2924.036888] Code: 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 31 f6 e9 09 00 00 00 66 0f 1f 84 00 00 00 00 00 b8 a6 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e1 c9 2b 00 f7 d8 64 89 01 48 [ 2924.045750] RSP: 002b:00007ffc39824d58 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [ 2924.050190] RAX: 0000000000000000 RBX: 00000000008ea030 RCX: 00007fa8b2868487 [ 2924.054604] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 00000000008f4360 [ 2924.058940] RBP: 00000000008f4360 R08: 0000000000000000 R09: 0000000000000014 [ 2924.063186] R10: 00000000000006b2 R11: 0000000000000246 R12: 00007fa8b2d7183c [ 2924.067418] R13: 0000000000000000 R14: 00000000008ea210 R15: 00007ffc39824fe0 [ 2924.071534] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer joydev input_leds serio_raw snd soundcore mac_hid i2c_piix4 ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi btrfs zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear 8139too qxl ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel psmouse aes_x86_64 8139cp crypto_simd cryptd mii glue_helper pata_acpi floppy [ 2924.098044] CR2: ffff88018f0db000 [ 2924.102520] ---[ end trace a8e0d899985faf31 ]--- [ 2924.107012] RIP: 0010:__memset+0x24/0x30 [ 2924.111448] Code: 90 90 90 90 90 90 66 66 90 66 90 49 89 f9 48 89 d1 83 e2 07 48 c1 e9 03 40 0f b6 f6 48 b8 01 01 01 01 01 01 01 01 48 0f af c6 48 ab 89 d1 f3 aa 4c 89 c8 c3 90 49 89 f9 40 88 f0 48 89 d1 f3 [ 2924.120724] RSP: 0018:ffff88018ddf7ae0 EFLAGS: 00010206 [ 2924.125312] RAX: 0000000000000000 RBX: ffff8801d549d888 RCX: 1ffffffffffdaffb [ 2924.129931] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88018f0daffc [ 2924.134537] RBP: ffff88018efc206c R08: 1ffff10031df840d R09: ffff88018efc206c [ 2924.139175] R10: ffffffffffffe1ee R11: ffffed0031df65fa R12: 0000000000000000 [ 2924.143825] R13: ffff8801d549dc98 R14: 00000000ffffc3db R15: ffffea00063bec80 [ 2924.148500] FS: 00007fa8b2f8a840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000 [ 2924.153247] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2924.158003] CR2: ffff88018f0db000 CR3: 000000018f892000 CR4: 00000000000006e0 [ 2924.164641] BUG: Bad rss-counter state mm:00000000fa04621e idx:0 val:4 [ 2924.170007] BUG: Bad rss-counter tate mm:00000000fa04621e idx:1 val:2 - Location https://elixir.bootlin.com/linux/v4.18-rc3/source/fs/f2fs/inline.c#L78 memset(addr + from, 0, MAX_INLINE_DATA(inode) - from); Here the length can be negative. Reported-by Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim [bwh: Backported to 4.14: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/inode.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index aeed9943836a3..9a40724dbaa62 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -183,6 +183,7 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page) static bool sanity_check_inode(struct inode *inode, struct page *node_page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); unsigned long long iblocks; iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks); @@ -215,6 +216,17 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } + if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE || + fi->i_extra_isize % sizeof(__le32)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, " + "max: %zu", + __func__, inode->i_ino, fi->i_extra_isize, + F2FS_TOTAL_EXTRA_ATTR_SIZE); + return false; + } + if (F2FS_I(inode)->extent_tree) { struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest; @@ -280,14 +292,14 @@ static int do_read_inode(struct inode *inode) get_inline_info(inode, ri); + fi->i_extra_isize = f2fs_has_extra_attr(inode) ? + le16_to_cpu(ri->i_extra_isize) : 0; + if (!sanity_check_inode(inode, node_page)) { f2fs_put_page(node_page, 1); return -EINVAL; } - fi->i_extra_isize = f2fs_has_extra_attr(inode) ? - le16_to_cpu(ri->i_extra_isize) : 0; - /* check data exist */ if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) __recover_inline_status(inode, node_page); -- GitLab From 30130700acfad8a705c109325379f5bbe21b3ccc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 1 Aug 2018 19:16:11 +0800 Subject: [PATCH 1845/2269] f2fs: fix to do sanity check with cp_pack_start_sum commit e494c2f995d6181d6e29c4927d68e0f295ecf75b upstream. After fuzzing, cp_pack_start_sum could be corrupted, so current log's summary info should be wrong due to loading incorrect summary block. Then, if segment's type in current log is exceeded NR_CURSEG_TYPE, it can lead accessing invalid dirty_i->dirty_segmap bitmap finally. Add sanity check for cp_pack_start_sum to fix this issue. https://bugzilla.kernel.org/show_bug.cgi?id=200419 - Reproduce - Kernel message (f2fs-dev w/ KASAN) [ 3117.578432] F2FS-fs (loop0): Invalid log blocks per segment (8) [ 3117.578445] F2FS-fs (loop0): Can't find valid F2FS filesystem in 2th superblock [ 3117.581364] F2FS-fs (loop0): invalid crc_offset: 30716 [ 3117.583564] WARNING: CPU: 1 PID: 1225 at fs/f2fs/checkpoint.c:90 __get_meta_page+0x448/0x4b0 [ 3117.583570] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer joydev input_leds serio_raw snd soundcore mac_hid i2c_piix4 ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi btrfs zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear 8139too qxl ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel psmouse aes_x86_64 8139cp crypto_simd cryptd mii glue_helper pata_acpi floppy [ 3117.584014] CPU: 1 PID: 1225 Comm: mount Not tainted 4.17.0+ #1 [ 3117.584017] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 3117.584022] RIP: 0010:__get_meta_page+0x448/0x4b0 [ 3117.584023] Code: 00 49 8d bc 24 84 00 00 00 e8 74 54 da ff 41 83 8c 24 84 00 00 00 08 4c 89 f6 4c 89 ef e8 c0 d9 95 00 48 89 ef e8 18 e3 00 00 <0f> 0b f0 80 4d 48 04 e9 0f fe ff ff 0f 0b 48 89 c7 48 89 04 24 e8 [ 3117.584072] RSP: 0018:ffff88018eb678c0 EFLAGS: 00010286 [ 3117.584082] RAX: ffff88018f0a6a78 RBX: ffffea0007a46600 RCX: ffffffff9314d1b2 [ 3117.584085] RDX: ffffffff00000001 RSI: 0000000000000000 RDI: ffff88018f0a6a98 [ 3117.584087] RBP: ffff88018ebe9980 R08: 0000000000000002 R09: 0000000000000001 [ 3117.584090] R10: 0000000000000001 R11: ffffed00326e4450 R12: ffff880193722200 [ 3117.584092] R13: ffff88018ebe9afc R14: 0000000000000206 R15: ffff88018eb67900 [ 3117.584096] FS: 00007f5694636840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000 [ 3117.584098] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3117.584101] CR2: 00000000016f21b8 CR3: 0000000191c22000 CR4: 00000000000006e0 [ 3117.584112] Call Trace: [ 3117.584121] ? f2fs_set_meta_page_dirty+0x150/0x150 [ 3117.584127] ? f2fs_build_segment_manager+0xbf9/0x3190 [ 3117.584133] ? f2fs_npages_for_summary_flush+0x75/0x120 [ 3117.584145] f2fs_build_segment_manager+0xda8/0x3190 [ 3117.584151] ? f2fs_get_valid_checkpoint+0x298/0xa00 [ 3117.584156] ? f2fs_flush_sit_entries+0x10e0/0x10e0 [ 3117.584184] ? map_id_range_down+0x17c/0x1b0 [ 3117.584188] ? __put_user_ns+0x30/0x30 [ 3117.584206] ? find_next_bit+0x53/0x90 [ 3117.584237] ? cpumask_next+0x16/0x20 [ 3117.584249] f2fs_fill_super+0x1948/0x2b40 [ 3117.584258] ? f2fs_commit_super+0x1a0/0x1a0 [ 3117.584279] ? sget_userns+0x65e/0x690 [ 3117.584296] ? set_blocksize+0x88/0x130 [ 3117.584302] ? f2fs_commit_super+0x1a0/0x1a0 [ 3117.584305] mount_bdev+0x1c0/0x200 [ 3117.584310] mount_fs+0x5c/0x190 [ 3117.584320] vfs_kern_mount+0x64/0x190 [ 3117.584330] do_mount+0x2e4/0x1450 [ 3117.584343] ? lockref_put_return+0x130/0x130 [ 3117.584347] ? copy_mount_string+0x20/0x20 [ 3117.584357] ? kasan_unpoison_shadow+0x31/0x40 [ 3117.584362] ? kasan_kmalloc+0xa6/0xd0 [ 3117.584373] ? memcg_kmem_put_cache+0x16/0x90 [ 3117.584377] ? __kmalloc_track_caller+0x196/0x210 [ 3117.584383] ? _copy_from_user+0x61/0x90 [ 3117.584396] ? memdup_user+0x3e/0x60 [ 3117.584401] ksys_mount+0x7e/0xd0 [ 3117.584405] __x64_sys_mount+0x62/0x70 [ 3117.584427] do_syscall_64+0x73/0x160 [ 3117.584440] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 3117.584455] RIP: 0033:0x7f5693f14b9a [ 3117.584456] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48 [ 3117.584505] RSP: 002b:00007fff27346488 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 [ 3117.584510] RAX: ffffffffffffffda RBX: 00000000016e2030 RCX: 00007f5693f14b9a [ 3117.584512] RDX: 00000000016e2210 RSI: 00000000016e3f30 RDI: 00000000016ee040 [ 3117.584514] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013 [ 3117.584516] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 00000000016ee040 [ 3117.584519] R13: 00000000016e2210 R14: 0000000000000000 R15: 0000000000000003 [ 3117.584523] ---[ end trace a8e0d899985faf31 ]--- [ 3117.685663] F2FS-fs (loop0): f2fs_check_nid_range: out-of-range nid=2, run fsck to fix. [ 3117.685673] F2FS-fs (loop0): recover_data: ino = 2 (i_size: recover) recovered = 1, err = 0 [ 3117.685707] ================================================================== [ 3117.685955] BUG: KASAN: slab-out-of-bounds in __remove_dirty_segment+0xdd/0x1e0 [ 3117.686175] Read of size 8 at addr ffff88018f0a63d0 by task mount/1225 [ 3117.686477] CPU: 0 PID: 1225 Comm: mount Tainted: G W 4.17.0+ #1 [ 3117.686481] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 3117.686483] Call Trace: [ 3117.686494] dump_stack+0x71/0xab [ 3117.686512] print_address_description+0x6b/0x290 [ 3117.686517] kasan_report+0x28e/0x390 [ 3117.686522] ? __remove_dirty_segment+0xdd/0x1e0 [ 3117.686527] __remove_dirty_segment+0xdd/0x1e0 [ 3117.686532] locate_dirty_segment+0x189/0x190 [ 3117.686538] f2fs_allocate_new_segments+0xa9/0xe0 [ 3117.686543] recover_data+0x703/0x2c20 [ 3117.686547] ? f2fs_recover_fsync_data+0x48f/0xd50 [ 3117.686553] ? ksys_mount+0x7e/0xd0 [ 3117.686564] ? policy_nodemask+0x1a/0x90 [ 3117.686567] ? policy_node+0x56/0x70 [ 3117.686571] ? add_fsync_inode+0xf0/0xf0 [ 3117.686592] ? blk_finish_plug+0x44/0x60 [ 3117.686597] ? f2fs_ra_meta_pages+0x38b/0x5e0 [ 3117.686602] ? find_inode_fast+0xac/0xc0 [ 3117.686606] ? f2fs_is_valid_blkaddr+0x320/0x320 [ 3117.686618] ? __radix_tree_lookup+0x150/0x150 [ 3117.686633] ? dqget+0x670/0x670 [ 3117.686648] ? pagecache_get_page+0x29/0x410 [ 3117.686656] ? kmem_cache_alloc+0x176/0x1e0 [ 3117.686660] ? f2fs_is_valid_blkaddr+0x11d/0x320 [ 3117.686664] f2fs_recover_fsync_data+0xc23/0xd50 [ 3117.686670] ? f2fs_space_for_roll_forward+0x60/0x60 [ 3117.686674] ? rb_insert_color+0x323/0x3d0 [ 3117.686678] ? f2fs_recover_orphan_inodes+0xa5/0x700 [ 3117.686683] ? proc_register+0x153/0x1d0 [ 3117.686686] ? f2fs_remove_orphan_inode+0x10/0x10 [ 3117.686695] ? f2fs_attr_store+0x50/0x50 [ 3117.686700] ? proc_create_single_data+0x52/0x60 [ 3117.686707] f2fs_fill_super+0x1d06/0x2b40 [ 3117.686728] ? f2fs_commit_super+0x1a0/0x1a0 [ 3117.686735] ? sget_userns+0x65e/0x690 [ 3117.686740] ? set_blocksize+0x88/0x130 [ 3117.686745] ? f2fs_commit_super+0x1a0/0x1a0 [ 3117.686748] mount_bdev+0x1c0/0x200 [ 3117.686753] mount_fs+0x5c/0x190 [ 3117.686758] vfs_kern_mount+0x64/0x190 [ 3117.686762] do_mount+0x2e4/0x1450 [ 3117.686769] ? lockref_put_return+0x130/0x130 [ 3117.686773] ? copy_mount_string+0x20/0x20 [ 3117.686777] ? kasan_unpoison_shadow+0x31/0x40 [ 3117.686780] ? kasan_kmalloc+0xa6/0xd0 [ 3117.686786] ? memcg_kmem_put_cache+0x16/0x90 [ 3117.686790] ? __kmalloc_track_caller+0x196/0x210 [ 3117.686795] ? _copy_from_user+0x61/0x90 [ 3117.686801] ? memdup_user+0x3e/0x60 [ 3117.686804] ksys_mount+0x7e/0xd0 [ 3117.686809] __x64_sys_mount+0x62/0x70 [ 3117.686816] do_syscall_64+0x73/0x160 [ 3117.686824] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 3117.686829] RIP: 0033:0x7f5693f14b9a [ 3117.686830] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48 [ 3117.686887] RSP: 002b:00007fff27346488 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 [ 3117.686892] RAX: ffffffffffffffda RBX: 00000000016e2030 RCX: 00007f5693f14b9a [ 3117.686894] RDX: 00000000016e2210 RSI: 00000000016e3f30 RDI: 00000000016ee040 [ 3117.686896] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013 [ 3117.686899] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 00000000016ee040 [ 3117.686901] R13: 00000000016e2210 R14: 0000000000000000 R15: 0000000000000003 [ 3117.687005] Allocated by task 1225: [ 3117.687152] kasan_kmalloc+0xa6/0xd0 [ 3117.687157] kmem_cache_alloc_trace+0xfd/0x200 [ 3117.687161] f2fs_build_segment_manager+0x2d09/0x3190 [ 3117.687165] f2fs_fill_super+0x1948/0x2b40 [ 3117.687168] mount_bdev+0x1c0/0x200 [ 3117.687171] mount_fs+0x5c/0x190 [ 3117.687174] vfs_kern_mount+0x64/0x190 [ 3117.687177] do_mount+0x2e4/0x1450 [ 3117.687180] ksys_mount+0x7e/0xd0 [ 3117.687182] __x64_sys_mount+0x62/0x70 [ 3117.687186] do_syscall_64+0x73/0x160 [ 3117.687190] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 3117.687285] Freed by task 19: [ 3117.687412] __kasan_slab_free+0x137/0x190 [ 3117.687416] kfree+0x8b/0x1b0 [ 3117.687460] ttm_bo_man_put_node+0x61/0x80 [ttm] [ 3117.687476] ttm_bo_cleanup_refs+0x15f/0x250 [ttm] [ 3117.687492] ttm_bo_delayed_delete+0x2f0/0x300 [ttm] [ 3117.687507] ttm_bo_delayed_workqueue+0x17/0x50 [ttm] [ 3117.687528] process_one_work+0x2f9/0x740 [ 3117.687531] worker_thread+0x78/0x6b0 [ 3117.687541] kthread+0x177/0x1c0 [ 3117.687545] ret_from_fork+0x35/0x40 [ 3117.687638] The buggy address belongs to the object at ffff88018f0a6300 which belongs to the cache kmalloc-192 of size 192 [ 3117.688014] The buggy address is located 16 bytes to the right of 192-byte region [ffff88018f0a6300, ffff88018f0a63c0) [ 3117.688382] The buggy address belongs to the page: [ 3117.688554] page:ffffea00063c2980 count:1 mapcount:0 mapping:ffff8801f3403180 index:0x0 [ 3117.688788] flags: 0x17fff8000000100(slab) [ 3117.688944] raw: 017fff8000000100 ffffea00063c2840 0000000e0000000e ffff8801f3403180 [ 3117.689166] raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000 [ 3117.689386] page dumped because: kasan: bad access detected [ 3117.689653] Memory state around the buggy address: [ 3117.689816] ffff88018f0a6280: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 3117.690027] ffff88018f0a6300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 3117.690239] >ffff88018f0a6380: 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 3117.690448] ^ [ 3117.690644] ffff88018f0a6400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 3117.690868] ffff88018f0a6480: 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 3117.691077] ================================================================== [ 3117.691290] Disabling lock debugging due to kernel taint [ 3117.693893] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 [ 3117.694120] PGD 80000001f01bc067 P4D 80000001f01bc067 PUD 1d9638067 PMD 0 [ 3117.694338] Oops: 0002 [#1] SMP KASAN PTI [ 3117.694490] CPU: 1 PID: 1225 Comm: mount Tainted: G B W 4.17.0+ #1 [ 3117.694703] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 3117.695073] RIP: 0010:__remove_dirty_segment+0xe2/0x1e0 [ 3117.695246] Code: c4 48 89 c7 e8 cf bb d7 ff 45 0f b6 24 24 41 83 e4 3f 44 88 64 24 07 41 83 e4 3f 4a 8d 7c e3 08 e8 b3 bc d7 ff 4a 8b 4c e3 08 4c 0f b3 29 0f 82 94 00 00 00 48 8d bd 20 04 00 00 e8 97 bb d7 [ 3117.695793] RSP: 0018:ffff88018eb67638 EFLAGS: 00010292 [ 3117.695969] RAX: 0000000000000000 RBX: ffff88018f0a6300 RCX: 0000000000000000 [ 3117.696182] RDX: 0000000000000000 RSI: 0000000000000297 RDI: 0000000000000297 [ 3117.696391] RBP: ffff88018ebe9980 R08: ffffed003e743ebb R09: ffffed003e743ebb [ 3117.696604] R10: 0000000000000001 R11: ffffed003e743eba R12: 0000000000000019 [ 3117.696813] R13: 0000000000000014 R14: 0000000000000320 R15: ffff88018ebe99e0 [ 3117.697032] FS: 00007f5694636840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000 [ 3117.697280] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3117.702357] CR2: 00007fe89bb1a000 CR3: 0000000191c22000 CR4: 00000000000006e0 [ 3117.707235] Call Trace: [ 3117.712077] locate_dirty_segment+0x189/0x190 [ 3117.716891] f2fs_allocate_new_segments+0xa9/0xe0 [ 3117.721617] recover_data+0x703/0x2c20 [ 3117.726316] ? f2fs_recover_fsync_data+0x48f/0xd50 [ 3117.730957] ? ksys_mount+0x7e/0xd0 [ 3117.735573] ? policy_nodemask+0x1a/0x90 [ 3117.740198] ? policy_node+0x56/0x70 [ 3117.744829] ? add_fsync_inode+0xf0/0xf0 [ 3117.749487] ? blk_finish_plug+0x44/0x60 [ 3117.754152] ? f2fs_ra_meta_pages+0x38b/0x5e0 [ 3117.758831] ? find_inode_fast+0xac/0xc0 [ 3117.763448] ? f2fs_is_valid_blkaddr+0x320/0x320 [ 3117.768046] ? __radix_tree_lookup+0x150/0x150 [ 3117.772603] ? dqget+0x670/0x670 [ 3117.777159] ? pagecache_get_page+0x29/0x410 [ 3117.781648] ? kmem_cache_alloc+0x176/0x1e0 [ 3117.786067] ? f2fs_is_valid_blkaddr+0x11d/0x320 [ 3117.790476] f2fs_recover_fsync_data+0xc23/0xd50 [ 3117.794790] ? f2fs_space_for_roll_forward+0x60/0x60 [ 3117.799086] ? rb_insert_color+0x323/0x3d0 [ 3117.803304] ? f2fs_recover_orphan_inodes+0xa5/0x700 [ 3117.807563] ? proc_register+0x153/0x1d0 [ 3117.811766] ? f2fs_remove_orphan_inode+0x10/0x10 [ 3117.815947] ? f2fs_attr_store+0x50/0x50 [ 3117.820087] ? proc_create_single_data+0x52/0x60 [ 3117.824262] f2fs_fill_super+0x1d06/0x2b40 [ 3117.828367] ? f2fs_commit_super+0x1a0/0x1a0 [ 3117.832432] ? sget_userns+0x65e/0x690 [ 3117.836500] ? set_blocksize+0x88/0x130 [ 3117.840501] ? f2fs_commit_super+0x1a0/0x1a0 [ 3117.844420] mount_bdev+0x1c0/0x200 [ 3117.848275] mount_fs+0x5c/0x190 [ 3117.852053] vfs_kern_mount+0x64/0x190 [ 3117.855810] do_mount+0x2e4/0x1450 [ 3117.859441] ? lockref_put_return+0x130/0x130 [ 3117.862996] ? copy_mount_string+0x20/0x20 [ 3117.866417] ? kasan_unpoison_shadow+0x31/0x40 [ 3117.869719] ? kasan_kmalloc+0xa6/0xd0 [ 3117.872948] ? memcg_kmem_put_cache+0x16/0x90 [ 3117.876121] ? __kmalloc_track_caller+0x196/0x210 [ 3117.879333] ? _copy_from_user+0x61/0x90 [ 3117.882467] ? memdup_user+0x3e/0x60 [ 3117.885604] ksys_mount+0x7e/0xd0 [ 3117.888700] __x64_sys_mount+0x62/0x70 [ 3117.891742] do_syscall_64+0x73/0x160 [ 3117.894692] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 3117.897669] RIP: 0033:0x7f5693f14b9a [ 3117.900563] Code: 48 8b 0d 01 c3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ce c2 2b 00 f7 d8 64 89 01 48 [ 3117.906922] RSP: 002b:00007fff27346488 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 [ 3117.910159] RAX: ffffffffffffffda RBX: 00000000016e2030 RCX: 00007f5693f14b9a [ 3117.913469] RDX: 00000000016e2210 RSI: 00000000016e3f30 RDI: 00000000016ee040 [ 3117.916764] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013 [ 3117.920071] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 00000000016ee040 [ 3117.923393] R13: 00000000016e2210 R14: 0000000000000000 R15: 0000000000000003 [ 3117.926680] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer joydev input_leds serio_raw snd soundcore mac_hid i2c_piix4 ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi btrfs zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear 8139too qxl ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel psmouse aes_x86_64 8139cp crypto_simd cryptd mii glue_helper pata_acpi floppy [ 3117.949979] CR2: 0000000000000000 [ 3117.954283] ---[ end trace a8e0d899985faf32 ]--- [ 3117.958575] RIP: 0010:__remove_dirty_segment+0xe2/0x1e0 [ 3117.962810] Code: c4 48 89 c7 e8 cf bb d7 ff 45 0f b6 24 24 41 83 e4 3f 44 88 64 24 07 41 83 e4 3f 4a 8d 7c e3 08 e8 b3 bc d7 ff 4a 8b 4c e3 08 4c 0f b3 29 0f 82 94 00 00 00 48 8d bd 20 04 00 00 e8 97 bb d7 [ 3117.971789] RSP: 0018:ffff88018eb67638 EFLAGS: 00010292 [ 3117.976333] RAX: 0000000000000000 RBX: ffff88018f0a6300 RCX: 0000000000000000 [ 3117.980926] RDX: 0000000000000000 RSI: 0000000000000297 RDI: 0000000000000297 [ 3117.985497] RBP: ffff88018ebe9980 R08: ffffed003e743ebb R09: ffffed003e743ebb [ 3117.990098] R10: 0000000000000001 R11: ffffed003e743eba R12: 0000000000000019 [ 3117.994761] R13: 0000000000000014 R14: 0000000000000320 R15: ffff88018ebe99e0 [ 3117.999392] FS: 00007f5694636840(0000) GS:ffff8801f3b00000(0000) knlGS:0000000000000000 [ 3118.004096] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3118.008816] CR2: 00007fe89bb1a000 CR3: 0000000191c22000 CR4: 00000000000006e0 - Location https://elixir.bootlin.com/linux/v4.18-rc3/source/fs/f2fs/segment.c#L775 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) dirty_i->nr_dirty[t]--; Here dirty_i->dirty_segmap[t] can be NULL which leads to crash in test_and_clear_bit() Reported-by Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim [bwh: Backported to 4.14: The function is called sanity_check_ckpt()] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/checkpoint.c | 8 ++++---- fs/f2fs/super.c | 12 ++++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index c81cd5057b8ee..624817eeb25e3 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -825,15 +825,15 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) cp_block = (struct f2fs_checkpoint *)page_address(cur_page); memcpy(sbi->ckpt, cp_block, blk_size); - /* Sanity checking of checkpoint */ - if (sanity_check_ckpt(sbi)) - goto free_fail_no_cp; - if (cur_page == cp1) sbi->cur_cp_pack = 1; else sbi->cur_cp_pack = 2; + /* Sanity checking of checkpoint */ + if (sanity_check_ckpt(sbi)) + goto free_fail_no_cp; + if (cp_blks <= 1) goto done; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9fafb1404f392..de4de4ebe64c5 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1957,6 +1957,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) unsigned int sit_bitmap_size, nat_bitmap_size; unsigned int log_blocks_per_seg; unsigned int segment_count_main; + unsigned int cp_pack_start_sum, cp_payload; block_t user_block_count; int i; @@ -2017,6 +2018,17 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) return 1; } + cp_pack_start_sum = __start_sum_addr(sbi); + cp_payload = __cp_payload(sbi); + if (cp_pack_start_sum < cp_payload + 1 || + cp_pack_start_sum > blocks_per_seg - 1 - + NR_CURSEG_TYPE) { + f2fs_msg(sbi->sb, KERN_ERR, + "Wrong cp_pack_start_sum: %u", + cp_pack_start_sum); + return 1; + } + if (unlikely(f2fs_cp_error(sbi))) { f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); return 1; -- GitLab From cb7ccb9924bb3596f211badf0d2becf131a979cd Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 17 Apr 2018 19:10:15 -0700 Subject: [PATCH 1846/2269] xfs: don't fail when converting shortform attr to long form during ATTR_REPLACE commit 7b38460dc8e4eafba06c78f8e37099d3b34d473c upstream. Kanda Motohiro reported that expanding a tiny xattr into a large xattr fails on XFS because we remove the tiny xattr from a shortform fork and then try to re-add it after converting the fork to extents format having not removed the ATTR_REPLACE flag. This fails because the attr is no longer present, causing a fs shutdown. This is derived from the patch in his bug report, but we really shouldn't ignore a nonzero retval from the remove call. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=199119 Reported-by: kanda.motohiro@gmail.com Reviewed-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/xfs/libxfs/xfs_attr.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 6249c92671deb..ea66f04f46f7e 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -501,7 +501,14 @@ xfs_attr_shortform_addname(xfs_da_args_t *args) if (args->flags & ATTR_CREATE) return retval; retval = xfs_attr_shortform_remove(args); - ASSERT(retval == 0); + if (retval) + return retval; + /* + * Since we have removed the old attr, clear ATTR_REPLACE so + * that the leaf format add routine won't trip over the attr + * not being around. + */ + args->flags &= ~ATTR_REPLACE; } if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX || -- GitLab From 04d269610ea35656fe4d9609f5a5fa043daa866e Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sun, 2 Dec 2018 10:03:24 -0500 Subject: [PATCH 1847/2269] Revert "wlcore: Add missing PM call for wlcore_cmd_wait_for_event_or_timeout()" This reverts commit e87efc44dd36ba3db59847c418354711ebad779b which was upstream commit 4ec7cece87b3ed21ffcd407c62fb2f151a366bc1. From Dietmar May's report on the stable mailing list (https://www.spinics.net/lists/stable/msg272201.html): > I've run into some problems which appear due to (a) recent patch(es) on > the wlcore wifi driver. > > 4.4.160 - commit 3fdd34643ffc378b5924941fad40352c04610294 > 4.9.131 - commit afeeecc764436f31d4447575bb9007732333818c > > Earlier versions (4.9.130 and 4.4.159 - tested back to 4.4.49) do not > exhibit this problem. It is still present in 4.9.141. > > master as of 4.20.0-rc4 does not exhibit this problem. > > Basically, during client association when in AP mode (running hostapd), > handshake may or may not complete following a noticeable delay. If > successful, then the driver fails consistently in warn_slowpath_null > during disassociation. If unsuccessful, the wifi client attempts multiple > times, sometimes failing repeatedly. I've had clients unable to connect > for 3-5 minutes during testing, with the syslog filled with dozens of > backtraces. syslog details are below. > > I'm working on an embedded device with a TI 3352 ARM processor and a > murata wl1271 module in sdio mode. We're running a fully patched ubuntu > 18.04 ARM build, with a kernel built from kernel.org's stable/linux repo . > Relevant parts of the kernel config are included below. > > The commit message states: > > > /I've only seen this few times with the runtime PM patches enabled so > > this one is probably not needed before that. This seems to work > > currently based on the current PM implementation timer. Let's apply > > this separately though in case others are hitting this issue./ > We're not doing anything explicit with power management. The device is an > IoT edge gateway with battery backup, normally running on wall power. The > battery is currently used solely to shut down the system cleanly to avoid > filesystem corruption. > > The device tree is configured to keep power in suspend; but the device > should never suspend, so in our case, there is no need to call > wl1271_ps_elp_wakeup() or wl1271_ps_elp_sleep(), as occurs in the patch. Signed-off-by: Sasha Levin --- drivers/net/wireless/ti/wlcore/cmd.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/cmd.c b/drivers/net/wireless/ti/wlcore/cmd.c index f48c3f62966d4..761cf8573a805 100644 --- a/drivers/net/wireless/ti/wlcore/cmd.c +++ b/drivers/net/wireless/ti/wlcore/cmd.c @@ -35,7 +35,6 @@ #include "wl12xx_80211.h" #include "cmd.h" #include "event.h" -#include "ps.h" #include "tx.h" #include "hw_ops.h" @@ -192,10 +191,6 @@ int wlcore_cmd_wait_for_event_or_timeout(struct wl1271 *wl, timeout_time = jiffies + msecs_to_jiffies(WL1271_EVENT_TIMEOUT); - ret = wl1271_ps_elp_wakeup(wl); - if (ret < 0) - return ret; - do { if (time_after(jiffies, timeout_time)) { wl1271_debug(DEBUG_CMD, "timeout waiting for event %d", @@ -227,7 +222,6 @@ int wlcore_cmd_wait_for_event_or_timeout(struct wl1271 *wl, } while (!event); out: - wl1271_ps_elp_sleep(wl); kfree(events_vector); return ret; } -- GitLab From 3a8a411c927716fad4004a6953a5b2cf988eb2df Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 20 Nov 2018 11:39:56 +0000 Subject: [PATCH 1848/2269] net: skb_scrub_packet(): Scrub offload_fwd_mark [ Upstream commit b5dd186d10ba59e6b5ba60e42b3b083df56df6f3 ] When a packet is trapped and the corresponding SKB marked as already-forwarded, it retains this marking even after it is forwarded across veth links into another bridge. There, since it ingresses the bridge over veth, which doesn't have offload_fwd_mark, it triggers a warning in nbp_switchdev_frame_mark(). Then nbp_switchdev_allowed_egress() decides not to allow egress from this bridge through another veth, because the SKB is already marked, and the mark (of 0) of course matches. Thus the packet is incorrectly blocked. Solve by resetting offload_fwd_mark() in skb_scrub_packet(). That function is called from tunnels and also from veth, and thus catches the cases where traffic is forwarded between bridges and transformed in a way that invalidates the marking. Fixes: 6bc506b4fb06 ("bridge: switchdev: Add forward mark support for stacked devices") Fixes: abf4bb6b63d0 ("skbuff: Add the offload_mr_fwd_mark field") Signed-off-by: Petr Machata Suggested-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c19a118f9f827..4067fa3fcbb22 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4882,6 +4882,10 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) nf_reset(skb); nf_reset_trace(skb); +#ifdef CONFIG_NET_SWITCHDEV + skb->offload_fwd_mark = 0; +#endif + if (!xnet) return; -- GitLab From 44e4f3644934b65c91bc05245a9447dde2c7af40 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 21 Nov 2018 16:32:10 +0100 Subject: [PATCH 1849/2269] net: thunderx: set xdp_prog to NULL if bpf_prog_add fails [ Upstream commit 6d0f60b0f8588fd4380ea5df9601e12fddd55ce2 ] Set xdp_prog pointer to NULL if bpf_prog_add fails since that routine reports the error code instead of NULL in case of failure and xdp_prog pointer value is used in the driver to verify if XDP is currently enabled. Moreover report the error code to userspace if nicvf_xdp_setup fails Fixes: 05c773f52b96 ("net: thunderx: Add basic XDP support") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 2237ef8e43447..f13256af8031b 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1691,6 +1691,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) bool if_up = netif_running(nic->netdev); struct bpf_prog *old_prog; bool bpf_attached = false; + int ret = 0; /* For now just support only the usual MTU sized frames */ if (prog && (dev->mtu > 1500)) { @@ -1724,8 +1725,12 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) if (nic->xdp_prog) { /* Attach BPF program */ nic->xdp_prog = bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1); - if (!IS_ERR(nic->xdp_prog)) + if (!IS_ERR(nic->xdp_prog)) { bpf_attached = true; + } else { + ret = PTR_ERR(nic->xdp_prog); + nic->xdp_prog = NULL; + } } /* Calculate Tx queues needed for XDP and network stack */ @@ -1737,7 +1742,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) netif_trans_update(nic->netdev); } - return 0; + return ret; } static int nicvf_xdp(struct net_device *netdev, struct netdev_xdp *xdp) -- GitLab From e5cfda6c5d04a04e9ec9834925e590f7e921c57f Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 22 Nov 2018 14:36:30 +0800 Subject: [PATCH 1850/2269] virtio-net: disable guest csum during XDP set [ Upstream commit e59ff2c49ae16e1d179de679aca81405829aee6c ] We don't disable VIRTIO_NET_F_GUEST_CSUM if XDP was set. This means we can receive partial csumed packets with metadata kept in the vnet_hdr. This may have several side effects: - It could be overridden by header adjustment, thus is might be not correct after XDP processing. - There's no way to pass such metadata information through XDP_REDIRECT to another driver. - XDP does not support checksum offload right now. So simply disable guest csum if possible in this the case of XDP. Fixes: 3f93522ffab2d ("virtio-net: switch off offloads on demand if possible on XDP set") Reported-by: Jesper Dangaard Brouer Cc: Jesper Dangaard Brouer Cc: Pavel Popa Cc: David Ahern Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/virtio_net.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index f528e9ac3413b..2ffa7b290591f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -61,7 +61,8 @@ static const unsigned long guest_offloads[] = { VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_ECN, - VIRTIO_NET_F_GUEST_UFO + VIRTIO_NET_F_GUEST_UFO, + VIRTIO_NET_F_GUEST_CSUM }; struct virtnet_stats { @@ -1939,9 +1940,6 @@ static int virtnet_clear_guest_offloads(struct virtnet_info *vi) if (!vi->guest_offloads) return 0; - if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM)) - offloads = 1ULL << VIRTIO_NET_F_GUEST_CSUM; - return virtnet_set_guest_offloads(vi, offloads); } @@ -1951,8 +1949,6 @@ static int virtnet_restore_guest_offloads(struct virtnet_info *vi) if (!vi->guest_offloads) return 0; - if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM)) - offloads |= 1ULL << VIRTIO_NET_F_GUEST_CSUM; return virtnet_set_guest_offloads(vi, offloads); } -- GitLab From 0435cabc0d6a0f0ebb60589a9ad28d1a3aecd748 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 22 Nov 2018 14:36:31 +0800 Subject: [PATCH 1851/2269] virtio-net: fail XDP set if guest csum is negotiated [ Upstream commit 18ba58e1c234ea1a2d9835ac8c1735d965ce4640 ] We don't support partial csumed packet since its metadata will be lost or incorrect during XDP processing. So fail the XDP set if guest_csum feature is negotiated. Fixes: f600b6905015 ("virtio_net: Add XDP support") Reported-by: Jesper Dangaard Brouer Cc: Jesper Dangaard Brouer Cc: Pavel Popa Cc: David Ahern Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/virtio_net.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 2ffa7b290591f..0e8e3be503327 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1966,8 +1966,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || - virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO))) { - NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO, disable LRO first"); + virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || + virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) { + NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO/CSUM, disable LRO/CSUM first"); return -EOPNOTSUPP; } -- GitLab From 42412faf1a9356e6967c5c5fc530fe0ab5e32e9f Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 23 Nov 2018 18:28:01 +0100 Subject: [PATCH 1852/2269] net: thunderx: set tso_hdrs pointer to NULL in nicvf_free_snd_queue [ Upstream commit ef2a7cf1d8831535b8991459567b385661eb4a36 ] Reset snd_queue tso_hdrs pointer to NULL in nicvf_free_snd_queue routine since it is used to check if tso dma descriptor queue has been previously allocated. The issue can be triggered with the following reproducer: $ip link set dev enP2p1s0v0 xdpdrv obj xdp_dummy.o $ip link set dev enP2p1s0v0 xdpdrv off [ 341.467649] WARNING: CPU: 74 PID: 2158 at mm/vmalloc.c:1511 __vunmap+0x98/0xe0 [ 341.515010] Hardware name: GIGABYTE H270-T70/MT70-HD0, BIOS T49 02/02/2018 [ 341.521874] pstate: 60400005 (nZCv daif +PAN -UAO) [ 341.526654] pc : __vunmap+0x98/0xe0 [ 341.530132] lr : __vunmap+0x98/0xe0 [ 341.533609] sp : ffff00001c5db860 [ 341.536913] x29: ffff00001c5db860 x28: 0000000000020000 [ 341.542214] x27: ffff810feb5090b0 x26: ffff000017e57000 [ 341.547515] x25: 0000000000000000 x24: 00000000fbd00000 [ 341.552816] x23: 0000000000000000 x22: ffff810feb5090b0 [ 341.558117] x21: 0000000000000000 x20: 0000000000000000 [ 341.563418] x19: ffff000017e57000 x18: 0000000000000000 [ 341.568719] x17: 0000000000000000 x16: 0000000000000000 [ 341.574020] x15: 0000000000000010 x14: ffffffffffffffff [ 341.579321] x13: ffff00008985eb27 x12: ffff00000985eb2f [ 341.584622] x11: ffff0000096b3000 x10: ffff00001c5db510 [ 341.589923] x9 : 00000000ffffffd0 x8 : ffff0000086868e8 [ 341.595224] x7 : 3430303030303030 x6 : 00000000000006ef [ 341.600525] x5 : 00000000003fffff x4 : 0000000000000000 [ 341.605825] x3 : 0000000000000000 x2 : ffffffffffffffff [ 341.611126] x1 : ffff0000096b3728 x0 : 0000000000000038 [ 341.616428] Call trace: [ 341.618866] __vunmap+0x98/0xe0 [ 341.621997] vunmap+0x3c/0x50 [ 341.624961] arch_dma_free+0x68/0xa0 [ 341.628534] dma_direct_free+0x50/0x80 [ 341.632285] nicvf_free_resources+0x160/0x2d8 [nicvf] [ 341.637327] nicvf_config_data_transfer+0x174/0x5e8 [nicvf] [ 341.642890] nicvf_stop+0x298/0x340 [nicvf] [ 341.647066] __dev_close_many+0x9c/0x108 [ 341.650977] dev_close_many+0xa4/0x158 [ 341.654720] rollback_registered_many+0x140/0x530 [ 341.659414] rollback_registered+0x54/0x80 [ 341.663499] unregister_netdevice_queue+0x9c/0xe8 [ 341.668192] unregister_netdev+0x28/0x38 [ 341.672106] nicvf_remove+0xa4/0xa8 [nicvf] [ 341.676280] nicvf_shutdown+0x20/0x30 [nicvf] [ 341.680630] pci_device_shutdown+0x44/0x88 [ 341.684720] device_shutdown+0x144/0x250 [ 341.688640] kernel_restart_prepare+0x44/0x50 [ 341.692986] kernel_restart+0x20/0x68 [ 341.696638] __se_sys_reboot+0x210/0x238 [ 341.700550] __arm64_sys_reboot+0x24/0x30 [ 341.704555] el0_svc_handler+0x94/0x110 [ 341.708382] el0_svc+0x8/0xc [ 341.711252] ---[ end trace 3f4019c8439959c9 ]--- [ 341.715874] page:ffff7e0003ef4000 count:0 mapcount:0 mapping:0000000000000000 index:0x4 [ 341.723872] flags: 0x1fffe000000000() [ 341.727527] raw: 001fffe000000000 ffff7e0003f1a008 ffff7e0003ef4048 0000000000000000 [ 341.735263] raw: 0000000000000004 0000000000000000 00000000ffffffff 0000000000000000 [ 341.742994] page dumped because: VM_BUG_ON_PAGE(page_ref_count(page) == 0) where xdp_dummy.c is a simple bpf program that forwards the incoming frames to the network stack (available here: https://github.com/altoor/xdp_walkthrough_examples/blob/master/sample_1/xdp_dummy.c) Fixes: 05c773f52b96 ("net: thunderx: Add basic XDP support") Fixes: 4863dea3fab0 ("net: Adding support for Cavium ThunderX network controller") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index a3d12dbde95b6..09494e1c77c5c 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -585,10 +585,12 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq) if (!sq->dmem.base) return; - if (sq->tso_hdrs) + if (sq->tso_hdrs) { dma_free_coherent(&nic->pdev->dev, sq->dmem.q_len * TSO_HEADER_SIZE, sq->tso_hdrs, sq->tso_hdrs_phys); + sq->tso_hdrs = NULL; + } /* Free pending skbs in the queue */ smp_rmb(); -- GitLab From 67f6fba765ef8cb19089d476e3c367212a2fbdfd Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 20 Nov 2018 13:00:18 -0500 Subject: [PATCH 1853/2269] packet: copy user buffers before orphan or clone [ Upstream commit 5cd8d46ea1562be80063f53c7c6a5f40224de623 ] tpacket_snd sends packets with user pages linked into skb frags. It notifies that pages can be reused when the skb is released by setting skb->destructor to tpacket_destruct_skb. This can cause data corruption if the skb is orphaned (e.g., on transmit through veth) or cloned (e.g., on mirror to another psock). Create a kernel-private copy of data in these cases, same as tun/tap zerocopy transmission. Reuse that infrastructure: mark the skb as SKBTX_ZEROCOPY_FRAG, which will trigger copy in skb_orphan_frags(_rx). Unlike other zerocopy packets, do not set shinfo destructor_arg to struct ubuf_info. tpacket_destruct_skb already uses that ptr to notify when the original skb is released and a timestamp is recorded. Do not change this timestamp behavior. The ubuf_info->callback is not needed anyway, as no zerocopy notification is expected. Mark destructor_arg as not-a-uarg by setting the lower bit to 1. The resulting value is not a valid ubuf_info pointer, nor a valid tpacket_snd frame address. Add skb_zcopy_.._nouarg helpers for this. The fix relies on features introduced in commit 52267790ef52 ("sock: add MSG_ZEROCOPY"), so can be backported as is only to 4.14. Tested with from `./in_netns.sh ./txring_overwrite` from http://github.com/wdebruij/kerneltools/tests Fixes: 69e3c75f4d54 ("net: TX_RING and packet mmap") Reported-by: Anand H. Krishnan Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 18 +++++++++++++++++- net/packet/af_packet.c | 4 ++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f64e884440826..f6250555ce7d2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1288,6 +1288,22 @@ static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg) } } +static inline void skb_zcopy_set_nouarg(struct sk_buff *skb, void *val) +{ + skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t) val | 0x1UL); + skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG; +} + +static inline bool skb_zcopy_is_nouarg(struct sk_buff *skb) +{ + return (uintptr_t) skb_shinfo(skb)->destructor_arg & 0x1UL; +} + +static inline void *skb_zcopy_get_nouarg(struct sk_buff *skb) +{ + return (void *)((uintptr_t) skb_shinfo(skb)->destructor_arg & ~0x1UL); +} + /* Release a reference on a zerocopy structure */ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy) { @@ -1297,7 +1313,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy) if (uarg->callback == sock_zerocopy_callback) { uarg->zerocopy = uarg->zerocopy && zerocopy; sock_zerocopy_put(uarg); - } else { + } else if (!skb_zcopy_is_nouarg(skb)) { uarg->callback(uarg, zerocopy); } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8d1a7c9003930..88d5b2645bb06 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2433,7 +2433,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb) void *ph; __u32 ts; - ph = skb_shinfo(skb)->destructor_arg; + ph = skb_zcopy_get_nouarg(skb); packet_dec_pending(&po->tx_ring); ts = __packet_set_timestamp(po, ph, skb); @@ -2499,7 +2499,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->priority = po->sk.sk_priority; skb->mark = po->sk.sk_mark; sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags); - skb_shinfo(skb)->destructor_arg = ph.raw; + skb_zcopy_set_nouarg(skb, ph.raw); skb_reserve(skb, hlen); skb_reset_network_header(skb); -- GitLab From 47897682fe041a5898cfd30a5f24d3173e856d5a Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 28 Nov 2018 14:53:19 +0800 Subject: [PATCH 1854/2269] rapidio/rionet: do not free skb before reading its length [ Upstream commit cfc435198f53a6fa1f656d98466b24967ff457d0 ] skb is freed via dev_kfree_skb_any, however, skb->len is read then. This may result in a use-after-free bug. Fixes: e6161d64263 ("rapidio/rionet: rework driver initialization and removal") Signed-off-by: Pan Bian Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/rionet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index e9f101c9bae2c..bfbb39f935545 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -216,9 +216,9 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev) * it just report sending a packet to the target * (without actual packet transfer). */ - dev_kfree_skb_any(skb); ndev->stats.tx_packets++; ndev->stats.tx_bytes += skb->len; + dev_kfree_skb_any(skb); } } -- GitLab From eb09c6dbe4bb799ba153c5d729e2d26098a3b410 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 28 Nov 2018 16:20:50 +0100 Subject: [PATCH 1855/2269] s390/qeth: fix length check in SNMP processing [ Upstream commit 9a764c1e59684c0358e16ccaafd870629f2cfe67 ] The response for a SNMP request can consist of multiple parts, which the cmd callback stages into a kernel buffer until all parts have been received. If the callback detects that the staging buffer provides insufficient space, it bails out with error. This processing is buggy for the first part of the response - while it initially checks for a length of 'data_len', it later copies an additional amount of 'offsetof(struct qeth_snmp_cmd, data)' bytes. Fix the calculation of 'data_len' for the first part of the response. This also nicely cleans up the memcpy code. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Julian Wiedmann Reviewed-by: Ursula Braun Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_core_main.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 169dd7127f9ef..69ef5f4060edb 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -4545,8 +4545,8 @@ static int qeth_snmp_command_cb(struct qeth_card *card, { struct qeth_ipa_cmd *cmd; struct qeth_arp_query_info *qinfo; - struct qeth_snmp_cmd *snmp; unsigned char *data; + void *snmp_data; __u16 data_len; QETH_CARD_TEXT(card, 3, "snpcmdcb"); @@ -4554,7 +4554,6 @@ static int qeth_snmp_command_cb(struct qeth_card *card, cmd = (struct qeth_ipa_cmd *) sdata; data = (unsigned char *)((char *)cmd - reply->offset); qinfo = (struct qeth_arp_query_info *) reply->param; - snmp = &cmd->data.setadapterparms.data.snmp; if (cmd->hdr.return_code) { QETH_CARD_TEXT_(card, 4, "scer1%x", cmd->hdr.return_code); @@ -4567,10 +4566,15 @@ static int qeth_snmp_command_cb(struct qeth_card *card, return 0; } data_len = *((__u16 *)QETH_IPA_PDU_LEN_PDU1(data)); - if (cmd->data.setadapterparms.hdr.seq_no == 1) - data_len -= (__u16)((char *)&snmp->data - (char *)cmd); - else - data_len -= (__u16)((char *)&snmp->request - (char *)cmd); + if (cmd->data.setadapterparms.hdr.seq_no == 1) { + snmp_data = &cmd->data.setadapterparms.data.snmp; + data_len -= offsetof(struct qeth_ipa_cmd, + data.setadapterparms.data.snmp); + } else { + snmp_data = &cmd->data.setadapterparms.data.snmp.request; + data_len -= offsetof(struct qeth_ipa_cmd, + data.setadapterparms.data.snmp.request); + } /* check if there is enough room in userspace */ if ((qinfo->udata_len - qinfo->udata_offset) < data_len) { @@ -4583,16 +4587,9 @@ static int qeth_snmp_command_cb(struct qeth_card *card, QETH_CARD_TEXT_(card, 4, "sseqn%i", cmd->data.setadapterparms.hdr.seq_no); /*copy entries to user buffer*/ - if (cmd->data.setadapterparms.hdr.seq_no == 1) { - memcpy(qinfo->udata + qinfo->udata_offset, - (char *)snmp, - data_len + offsetof(struct qeth_snmp_cmd, data)); - qinfo->udata_offset += offsetof(struct qeth_snmp_cmd, data); - } else { - memcpy(qinfo->udata + qinfo->udata_offset, - (char *)&snmp->request, data_len); - } + memcpy(qinfo->udata + qinfo->udata_offset, snmp_data, data_len); qinfo->udata_offset += data_len; + /* check if all replies received ... */ QETH_CARD_TEXT_(card, 4, "srtot%i", cmd->data.setadapterparms.hdr.used_total); -- GitLab From 72e5a2bdf753f5ba82ea6bfb7706aabc9587d0cb Mon Sep 17 00:00:00 2001 From: Bernd Eckstein <3erndeckstein@gmail.com> Date: Fri, 23 Nov 2018 13:51:26 +0100 Subject: [PATCH 1856/2269] usbnet: ipheth: fix potential recvmsg bug and recvmsg bug 2 [ Upstream commit 45611c61dd503454b2edae00aabe1e429ec49ebe ] The bug is not easily reproducable, as it may occur very infrequently (we had machines with 20minutes heavy downloading before it occurred) However, on a virual machine (VMWare on Windows 10 host) it occurred pretty frequently (1-2 seconds after a speedtest was started) dev->tx_skb mab be freed via dev_kfree_skb_irq on a callback before it is set. This causes the following problems: - double free of the skb or potential memory leak - in dmesg: 'recvmsg bug' and 'recvmsg bug 2' and eventually general protection fault Example dmesg output: [ 134.841986] ------------[ cut here ]------------ [ 134.841987] recvmsg bug: copied 9C24A555 seq 9C24B557 rcvnxt 9C25A6B3 fl 0 [ 134.841993] WARNING: CPU: 7 PID: 2629 at /build/linux-hwe-On9fm7/linux-hwe-4.15.0/net/ipv4/tcp.c:1865 tcp_recvmsg+0x44d/0xab0 [ 134.841994] Modules linked in: ipheth(OE) kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd glue_helper cryptd vmw_balloon intel_rapl_perf joydev input_leds serio_raw vmw_vsock_vmci_transport vsock shpchp i2c_piix4 mac_hid binfmt_misc vmw_vmci parport_pc ppdev lp parport autofs4 vmw_pvscsi vmxnet3 hid_generic usbhid hid vmwgfx ttm drm_kms_helper syscopyarea sysfillrect mptspi mptscsih sysimgblt ahci psmouse fb_sys_fops pata_acpi mptbase libahci e1000 drm scsi_transport_spi [ 134.842046] CPU: 7 PID: 2629 Comm: python Tainted: G W OE 4.15.0-34-generic #37~16.04.1-Ubuntu [ 134.842046] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 05/19/2017 [ 134.842048] RIP: 0010:tcp_recvmsg+0x44d/0xab0 [ 134.842048] RSP: 0018:ffffa6630422bcc8 EFLAGS: 00010286 [ 134.842049] RAX: 0000000000000000 RBX: ffff997616f4f200 RCX: 0000000000000006 [ 134.842049] RDX: 0000000000000007 RSI: 0000000000000082 RDI: ffff9976257d6490 [ 134.842050] RBP: ffffa6630422bd98 R08: 0000000000000001 R09: 000000000004bba4 [ 134.842050] R10: 0000000001e00c6f R11: 000000000004bba4 R12: ffff99760dee3000 [ 134.842051] R13: 0000000000000000 R14: ffff99760dee3514 R15: 0000000000000000 [ 134.842051] FS: 00007fe332347700(0000) GS:ffff9976257c0000(0000) knlGS:0000000000000000 [ 134.842052] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 134.842053] CR2: 0000000001e41000 CR3: 000000020e9b4006 CR4: 00000000003606e0 [ 134.842055] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 134.842055] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 134.842057] Call Trace: [ 134.842060] ? aa_sk_perm+0x53/0x1a0 [ 134.842064] inet_recvmsg+0x51/0xc0 [ 134.842066] sock_recvmsg+0x43/0x50 [ 134.842070] SYSC_recvfrom+0xe4/0x160 [ 134.842072] ? __schedule+0x3de/0x8b0 [ 134.842075] ? ktime_get_ts64+0x4c/0xf0 [ 134.842079] SyS_recvfrom+0xe/0x10 [ 134.842082] do_syscall_64+0x73/0x130 [ 134.842086] entry_SYSCALL_64_after_hwframe+0x3d/0xa2 [ 134.842086] RIP: 0033:0x7fe331f5a81d [ 134.842088] RSP: 002b:00007ffe8da98398 EFLAGS: 00000246 ORIG_RAX: 000000000000002d [ 134.842090] RAX: ffffffffffffffda RBX: ffffffffffffffff RCX: 00007fe331f5a81d [ 134.842094] RDX: 00000000000003fb RSI: 0000000001e00874 RDI: 0000000000000003 [ 134.842095] RBP: 00007fe32f642c70 R08: 0000000000000000 R09: 0000000000000000 [ 134.842097] R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe332347698 [ 134.842099] R13: 0000000001b7e0a0 R14: 0000000001e00874 R15: 0000000000000000 [ 134.842103] Code: 24 fd ff ff e9 cc fe ff ff 48 89 d8 41 8b 8c 24 10 05 00 00 44 8b 45 80 48 c7 c7 08 bd 59 8b 48 89 85 68 ff ff ff e8 b3 c4 7d ff <0f> 0b 48 8b 85 68 ff ff ff e9 e9 fe ff ff 41 8b 8c 24 10 05 00 [ 134.842126] ---[ end trace b7138fc08c83147f ]--- [ 134.842144] general protection fault: 0000 [#1] SMP PTI [ 134.842145] Modules linked in: ipheth(OE) kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd glue_helper cryptd vmw_balloon intel_rapl_perf joydev input_leds serio_raw vmw_vsock_vmci_transport vsock shpchp i2c_piix4 mac_hid binfmt_misc vmw_vmci parport_pc ppdev lp parport autofs4 vmw_pvscsi vmxnet3 hid_generic usbhid hid vmwgfx ttm drm_kms_helper syscopyarea sysfillrect mptspi mptscsih sysimgblt ahci psmouse fb_sys_fops pata_acpi mptbase libahci e1000 drm scsi_transport_spi [ 134.842161] CPU: 7 PID: 2629 Comm: python Tainted: G W OE 4.15.0-34-generic #37~16.04.1-Ubuntu [ 134.842162] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 05/19/2017 [ 134.842164] RIP: 0010:tcp_close+0x2c6/0x440 [ 134.842165] RSP: 0018:ffffa6630422bde8 EFLAGS: 00010202 [ 134.842167] RAX: 0000000000000000 RBX: ffff99760dee3000 RCX: 0000000180400034 [ 134.842168] RDX: 5c4afd407207a6c4 RSI: ffffe868495bd300 RDI: ffff997616f4f200 [ 134.842169] RBP: ffffa6630422be08 R08: 0000000016f4d401 R09: 0000000180400034 [ 134.842169] R10: ffffa6630422bd98 R11: 0000000000000000 R12: 000000000000600c [ 134.842170] R13: 0000000000000000 R14: ffff99760dee30c8 R15: ffff9975bd44fe00 [ 134.842171] FS: 00007fe332347700(0000) GS:ffff9976257c0000(0000) knlGS:0000000000000000 [ 134.842173] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 134.842174] CR2: 0000000001e41000 CR3: 000000020e9b4006 CR4: 00000000003606e0 [ 134.842177] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 134.842178] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 134.842179] Call Trace: [ 134.842181] inet_release+0x42/0x70 [ 134.842183] __sock_release+0x42/0xb0 [ 134.842184] sock_close+0x15/0x20 [ 134.842187] __fput+0xea/0x220 [ 134.842189] ____fput+0xe/0x10 [ 134.842191] task_work_run+0x8a/0xb0 [ 134.842193] exit_to_usermode_loop+0xc4/0xd0 [ 134.842195] do_syscall_64+0xf4/0x130 [ 134.842197] entry_SYSCALL_64_after_hwframe+0x3d/0xa2 [ 134.842197] RIP: 0033:0x7fe331f5a560 [ 134.842198] RSP: 002b:00007ffe8da982e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000003 [ 134.842200] RAX: 0000000000000000 RBX: 00007fe32f642c70 RCX: 00007fe331f5a560 [ 134.842201] RDX: 00000000008f5320 RSI: 0000000001cd4b50 RDI: 0000000000000003 [ 134.842202] RBP: 00007fe32f6500f8 R08: 000000000000003c R09: 00000000009343c0 [ 134.842203] R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe32f6500d0 [ 134.842204] R13: 00000000008f5320 R14: 00000000008f5320 R15: 0000000001cd4770 [ 134.842205] Code: c8 00 00 00 45 31 e4 49 39 fe 75 4d eb 50 83 ab d8 00 00 00 01 48 8b 17 48 8b 47 08 48 c7 07 00 00 00 00 48 c7 47 08 00 00 00 00 <48> 89 42 08 48 89 10 0f b6 57 34 8b 47 2c 2b 47 28 83 e2 01 80 [ 134.842226] RIP: tcp_close+0x2c6/0x440 RSP: ffffa6630422bde8 [ 134.842227] ---[ end trace b7138fc08c831480 ]--- The proposed patch eliminates a potential racing condition. Before, usb_submit_urb was called and _after_ that, the skb was attached (dev->tx_skb). So, on a callback it was possible, however unlikely that the skb was freed before it was set. That way (because dev->tx_skb was not set to NULL after it was freed), it could happen that a skb from a earlier transmission was freed a second time (and the skb we should have freed did not get freed at all) Now we free the skb directly in ipheth_tx(). It is not passed to the callback anymore, eliminating the posibility of a double free of the same skb. Depending on the retval of usb_submit_urb() we use dev_kfree_skb_any() respectively dev_consume_skb_any() to free the skb. Signed-off-by: Oliver Zweigle Signed-off-by: Bernd Eckstein <3ernd.Eckstein@gmail.com> Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/ipheth.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index d49c7103085e6..aabbcfb6e6da9 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -140,7 +140,6 @@ struct ipheth_device { struct usb_device *udev; struct usb_interface *intf; struct net_device *net; - struct sk_buff *tx_skb; struct urb *tx_urb; struct urb *rx_urb; unsigned char *tx_buf; @@ -229,6 +228,7 @@ static void ipheth_rcvbulk_callback(struct urb *urb) case -ENOENT: case -ECONNRESET: case -ESHUTDOWN: + case -EPROTO: return; case 0: break; @@ -280,7 +280,6 @@ static void ipheth_sndbulk_callback(struct urb *urb) dev_err(&dev->intf->dev, "%s: urb status: %d\n", __func__, status); - dev_kfree_skb_irq(dev->tx_skb); netif_wake_queue(dev->net); } @@ -410,7 +409,7 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net) if (skb->len > IPHETH_BUF_SIZE) { WARN(1, "%s: skb too large: %d bytes\n", __func__, skb->len); dev->net->stats.tx_dropped++; - dev_kfree_skb_irq(skb); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } @@ -430,12 +429,11 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net) dev_err(&dev->intf->dev, "%s: usb_submit_urb: %d\n", __func__, retval); dev->net->stats.tx_errors++; - dev_kfree_skb_irq(skb); + dev_kfree_skb_any(skb); } else { - dev->tx_skb = skb; - dev->net->stats.tx_packets++; dev->net->stats.tx_bytes += skb->len; + dev_consume_skb_any(skb); netif_stop_queue(net); } -- GitLab From e5d981df9a1d3f13dfff083ab0578daedbcd9e8a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 22 Jan 2018 22:53:28 +0100 Subject: [PATCH 1857/2269] sched/core: Fix cpu.max vs. cpuhotplug deadlock commit ce48c146495a1a50e48cdbfbfaba3e708be7c07c upstream Tejun reported the following cpu-hotplug lock (percpu-rwsem) read recursion: tg_set_cfs_bandwidth() get_online_cpus() cpus_read_lock() cfs_bandwidth_usage_inc() static_key_slow_inc() cpus_read_lock() Reported-by: Tejun Heo Tested-by: Tejun Heo Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180122215328.GP3397@worktop Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- include/linux/jump_label.h | 7 +++++++ kernel/jump_label.c | 12 +++++++++--- kernel/sched/fair.c | 4 ++-- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 3b7675bcca645..cd0d2270998fb 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -160,6 +160,8 @@ extern void arch_jump_label_transform_static(struct jump_entry *entry, extern int jump_label_text_reserved(void *start, void *end); extern void static_key_slow_inc(struct static_key *key); extern void static_key_slow_dec(struct static_key *key); +extern void static_key_slow_inc_cpuslocked(struct static_key *key); +extern void static_key_slow_dec_cpuslocked(struct static_key *key); extern void jump_label_apply_nops(struct module *mod); extern int static_key_count(struct static_key *key); extern void static_key_enable(struct static_key *key); @@ -222,6 +224,9 @@ static inline void static_key_slow_dec(struct static_key *key) atomic_dec(&key->enabled); } +#define static_key_slow_inc_cpuslocked(key) static_key_slow_inc(key) +#define static_key_slow_dec_cpuslocked(key) static_key_slow_dec(key) + static inline int jump_label_text_reserved(void *start, void *end) { return 0; @@ -416,6 +421,8 @@ extern bool ____wrong_branch_error(void); #define static_branch_inc(x) static_key_slow_inc(&(x)->key) #define static_branch_dec(x) static_key_slow_dec(&(x)->key) +#define static_branch_inc_cpuslocked(x) static_key_slow_inc_cpuslocked(&(x)->key) +#define static_branch_dec_cpuslocked(x) static_key_slow_dec_cpuslocked(&(x)->key) /* * Normal usage; boolean enable/disable. diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 7c3774ac1d51f..70be35a19be25 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -79,7 +79,7 @@ int static_key_count(struct static_key *key) } EXPORT_SYMBOL_GPL(static_key_count); -static void static_key_slow_inc_cpuslocked(struct static_key *key) +void static_key_slow_inc_cpuslocked(struct static_key *key) { int v, v1; @@ -180,7 +180,7 @@ void static_key_disable(struct static_key *key) } EXPORT_SYMBOL_GPL(static_key_disable); -static void static_key_slow_dec_cpuslocked(struct static_key *key, +static void __static_key_slow_dec_cpuslocked(struct static_key *key, unsigned long rate_limit, struct delayed_work *work) { @@ -211,7 +211,7 @@ static void __static_key_slow_dec(struct static_key *key, struct delayed_work *work) { cpus_read_lock(); - static_key_slow_dec_cpuslocked(key, rate_limit, work); + __static_key_slow_dec_cpuslocked(key, rate_limit, work); cpus_read_unlock(); } @@ -229,6 +229,12 @@ void static_key_slow_dec(struct static_key *key) } EXPORT_SYMBOL_GPL(static_key_slow_dec); +void static_key_slow_dec_cpuslocked(struct static_key *key) +{ + STATIC_KEY_CHECK_USE(); + __static_key_slow_dec_cpuslocked(key, 0, NULL); +} + void static_key_slow_dec_deferred(struct static_key_deferred *key) { STATIC_KEY_CHECK_USE(); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2d4d79420e36e..7240bb4a40902 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4040,12 +4040,12 @@ static inline bool cfs_bandwidth_used(void) void cfs_bandwidth_usage_inc(void) { - static_key_slow_inc(&__cfs_bandwidth_used); + static_key_slow_inc_cpuslocked(&__cfs_bandwidth_used); } void cfs_bandwidth_usage_dec(void) { - static_key_slow_dec(&__cfs_bandwidth_used); + static_key_slow_dec_cpuslocked(&__cfs_bandwidth_used); } #else /* HAVE_JUMP_LABEL */ static bool cfs_bandwidth_used(void) -- GitLab From da06d6d14a0a14d398b1e7c8024b090e40f4ec89 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 1 Jun 2018 10:59:19 -0400 Subject: [PATCH 1858/2269] x86/bugs: Add AMD's variant of SSB_NO commit 24809860012e0130fbafe536709e08a22b3e959e upstream The AMD document outlining the SSBD handling 124441_AMD64_SpeculativeStoreBypassDisable_Whitepaper_final.pdf mentions that the CPUID 8000_0008.EBX[26] will mean that the speculative store bypass disable is no longer needed. A copy of this document is available at: https://bugzilla.kernel.org/show_bug.cgi?id=199889 Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Cc: Tom Lendacky Cc: Janakarajan Natarajan Cc: kvm@vger.kernel.org Cc: andrew.cooper3@citrix.com Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Borislav Petkov Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180601145921.9500-2-konrad.wilk@oracle.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/common.c | 3 ++- arch/x86/kvm/cpuid.c | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 673d6e988196b..21a653dfc01bf 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -285,6 +285,7 @@ #define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ #define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ +#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 96643e2c75b8b..065582692446d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -958,7 +958,8 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); if (!x86_match_cpu(cpu_no_spec_store_bypass) && - !(ia32_cap & ARCH_CAP_SSB_NO)) + !(ia32_cap & ARCH_CAP_SSB_NO) && + !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); if (x86_match_cpu(cpu_no_speculation)) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index d1f5c744142b2..95a98397beca0 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -367,7 +367,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = - F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD); + F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD) | F(AMD_SSB_NO); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = -- GitLab From 54b65f8eaf75872052565976f524e91133778c54 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 1 Jun 2018 10:59:20 -0400 Subject: [PATCH 1859/2269] x86/bugs: Add AMD's SPEC_CTRL MSR usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6ac2f49edb1ef5446089c7c660017732886d62d6 upstream The AMD document outlining the SSBD handling 124441_AMD64_SpeculativeStoreBypassDisable_Whitepaper_final.pdf mentions that if CPUID 8000_0008.EBX[24] is set we should be using the SPEC_CTRL MSR (0x48) over the VIRT SPEC_CTRL MSR (0xC001_011f) for speculative store bypass disable. This in effect means we should clear the X86_FEATURE_VIRT_SSBD flag so that we would prefer the SPEC_CTRL MSR. See the document titled: 124441_AMD64_SpeculativeStoreBypassDisable_Whitepaper_final.pdf A copy of this document is available at https://bugzilla.kernel.org/show_bug.cgi?id=199889 Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Cc: Tom Lendacky Cc: Janakarajan Natarajan Cc: kvm@vger.kernel.org Cc: KarimAllah Ahmed Cc: andrew.cooper3@citrix.com Cc: Joerg Roedel Cc: Radim Krčmář Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Paolo Bonzini Cc: Borislav Petkov Cc: David Woodhouse Cc: Kees Cook Link: https://lkml.kernel.org/r/20180601145921.9500-3-konrad.wilk@oracle.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/bugs.c | 12 +++++++----- arch/x86/kernel/cpu/common.c | 6 ++++++ arch/x86/kvm/cpuid.c | 10 ++++++++-- arch/x86/kvm/svm.c | 8 +++++--- 5 files changed, 27 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 21a653dfc01bf..7d910827126be 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -284,6 +284,7 @@ #define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ #define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ #define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e92aedd938064..fd79e16dab073 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -532,18 +532,20 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) if (mode == SPEC_STORE_BYPASS_DISABLE) { setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); /* - * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses - * a completely different MSR and bit dependent on family. + * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may + * use a completely different MSR and bit dependent on family. */ switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: + case X86_VENDOR_AMD: + if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { + x86_amd_ssb_disable(); + break; + } x86_spec_ctrl_base |= SPEC_CTRL_SSBD; x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); break; - case X86_VENDOR_AMD: - x86_amd_ssb_disable(); - break; } } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 065582692446d..51e49f6fe8e13 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -760,6 +760,12 @@ static void init_speculation_control(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_STIBP); set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); } + + if (cpu_has(c, X86_FEATURE_AMD_SSBD)) { + set_cpu_cap(c, X86_FEATURE_SSBD); + set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); + clear_cpu_cap(c, X86_FEATURE_VIRT_SSBD); + } } void get_cpu_cap(struct cpuinfo_x86 *c) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 95a98397beca0..bbcd69c76d964 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -367,7 +367,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = - F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD) | F(AMD_SSB_NO); + F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | + F(AMD_SSB_NO); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = @@ -649,7 +650,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ebx |= F(VIRT_SSBD); entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); - if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + /* + * The preference is to use SPEC CTRL MSR instead of the + * VIRT_SPEC MSR. + */ + if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) && + !boot_cpu_has(X86_FEATURE_AMD_SSBD)) entry->ebx |= F(VIRT_SSBD); break; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f6bebcec60b4e..e9dbd62bb46e1 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3644,7 +3644,8 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) && + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) return 1; msr_info->data = svm->spec_ctrl; @@ -3749,11 +3750,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_SPEC_CTRL: if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) && + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) return 1; /* The STIBP bit doesn't fault even if it's not advertised */ - if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) return 1; svm->spec_ctrl = data; -- GitLab From 6d2533a6818274602ef1a783594f2d6fc65d8fc6 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 1 Jun 2018 10:59:21 -0400 Subject: [PATCH 1860/2269] x86/bugs: Switch the selection of mitigation from CPU vendor to CPU features commit 108fab4b5c8f12064ef86e02cb0459992affb30f upstream Both AMD and Intel can have SPEC_CTRL_MSR for SSBD. However AMD also has two more other ways of doing it - which are !SPEC_CTRL MSR ways. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Cc: Kees Cook Cc: kvm@vger.kernel.org Cc: KarimAllah Ahmed Cc: andrew.cooper3@citrix.com Cc: "H. Peter Anvin" Cc: Borislav Petkov Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180601145921.9500-4-konrad.wilk@oracle.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index fd79e16dab073..63a9f3fd3a37f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -535,17 +535,12 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may * use a completely different MSR and bit dependent on family. */ - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - case X86_VENDOR_AMD: - if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { - x86_amd_ssb_disable(); - break; - } + if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + x86_amd_ssb_disable(); + else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); - break; } } -- GitLab From 0071cdff923922f85a8204fd9bff56a44af2b9a3 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 2 Jul 2018 16:35:53 -0500 Subject: [PATCH 1861/2269] x86/bugs: Update when to check for the LS_CFG SSBD mitigation commit 845d382bb15c6e7dc5026c0ff919c5b13fc7e11b upstream If either the X86_FEATURE_AMD_SSBD or X86_FEATURE_VIRT_SSBD features are present, then there is no need to perform the check for the LS_CFG SSBD mitigation support. Signed-off-by: Tom Lendacky Cc: Borislav Petkov Cc: David Woodhouse Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180702213553.29202.21089.stgit@tlendack-t1.amdoffice.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/amd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index dda741bd57892..7e03515662c0c 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -554,7 +554,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) nodes_per_socket = ((value >> 3) & 7) + 1; } - if (c->x86 >= 0x15 && c->x86 <= 0x17) { + if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) && + !boot_cpu_has(X86_FEATURE_VIRT_SSBD) && + c->x86 >= 0x15 && c->x86 <= 0x17) { unsigned int bit; switch (c->x86) { -- GitLab From 82647e461639a7cae7beaf2391f6b58f69b81110 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 2 Jul 2018 16:36:02 -0500 Subject: [PATCH 1862/2269] x86/bugs: Fix the AMD SSBD usage of the SPEC_CTRL MSR commit 612bc3b3d4be749f73a513a17d9b3ee1330d3487 upstream On AMD, the presence of the MSR_SPEC_CTRL feature does not imply that the SSBD mitigation support should use the SPEC_CTRL MSR. Other features could have caused the MSR_SPEC_CTRL feature to be set, while a different SSBD mitigation option is in place. Update the SSBD support to check for the actual SSBD features that will use the SPEC_CTRL MSR. Signed-off-by: Tom Lendacky Cc: Borislav Petkov Cc: David Woodhouse Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 6ac2f49edb1e ("x86/bugs: Add AMD's SPEC_CTRL MSR usage") Link: http://lkml.kernel.org/r/20180702213602.29202.33151.stgit@tlendack-t1.amdoffice.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 63a9f3fd3a37f..50941119e13ea 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -166,7 +166,8 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; /* SSBD controlled in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) hostval |= ssbd_tif_to_spec_ctrl(ti->flags); if (hostval != guestval) { @@ -535,9 +536,10 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may * use a completely different MSR and bit dependent on family. */ - if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) && + !static_cpu_has(X86_FEATURE_AMD_SSBD)) { x86_amd_ssb_disable(); - else { + } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); -- GitLab From 300a6f27dd8277843e39b37cca5e0c5b203743cb Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 25 Sep 2018 14:38:55 +0200 Subject: [PATCH 1863/2269] x86/speculation: Enable cross-hyperthread spectre v2 STIBP mitigation commit 53c613fe6349994f023245519265999eed75957f upstream STIBP is a feature provided by certain Intel ucodes / CPUs. This feature (once enabled) prevents cross-hyperthread control of decisions made by indirect branch predictors. Enable this feature if - the CPU is vulnerable to spectre v2 - the CPU supports SMT and has SMT siblings online - spectre_v2 mitigation autoselection is enabled (default) After some previous discussion, this leaves STIBP on all the time, as wrmsr on crossing kernel boundary is a no-no. This could perhaps later be a bit more optimized (like disabling it in NOHZ, experiment with disabling it in idle, etc) if needed. Note that the synchronization of the mask manipulation via newly added spec_ctrl_mutex is currently not strictly needed, as the only updater is already being serialized by cpu_add_remove_lock, but let's make this a little bit more future-proof. Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: "WoodhouseDavid" Cc: Andi Kleen Cc: Tim Chen Cc: "SchauflerCasey" Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/nycvar.YFH.7.76.1809251438240.15880@cbobk.fhfr.pm Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 57 ++++++++++++++++++++++++++++++++++---- kernel/cpu.c | 11 +++++++- 2 files changed, 61 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 50941119e13ea..ef02230221c52 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -34,12 +34,10 @@ static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); -/* - * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any - * writes to SPEC_CTRL contain whatever reserved bits have been set. - */ -u64 __ro_after_init x86_spec_ctrl_base; +/* The base value of the SPEC_CTRL MSR that always has to be preserved. */ +u64 x86_spec_ctrl_base; EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); +static DEFINE_MUTEX(spec_ctrl_mutex); /* * The vendor and possibly platform specific bits which can be modified in @@ -324,6 +322,46 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return cmd; } +static bool stibp_needed(void) +{ + if (spectre_v2_enabled == SPECTRE_V2_NONE) + return false; + + if (!boot_cpu_has(X86_FEATURE_STIBP)) + return false; + + return true; +} + +static void update_stibp_msr(void *info) +{ + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); +} + +void arch_smt_update(void) +{ + u64 mask; + + if (!stibp_needed()) + return; + + mutex_lock(&spec_ctrl_mutex); + mask = x86_spec_ctrl_base; + if (cpu_smt_control == CPU_SMT_ENABLED) + mask |= SPEC_CTRL_STIBP; + else + mask &= ~SPEC_CTRL_STIBP; + + if (mask != x86_spec_ctrl_base) { + pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", + cpu_smt_control == CPU_SMT_ENABLED ? + "Enabling" : "Disabling"); + x86_spec_ctrl_base = mask; + on_each_cpu(update_stibp_msr, NULL, 1); + } + mutex_unlock(&spec_ctrl_mutex); +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -423,6 +461,9 @@ specv2_set_mode: setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); pr_info("Enabling Restricted Speculation for firmware calls\n"); } + + /* Enable STIBP if appropriate */ + arch_smt_update(); } #undef pr_fmt @@ -813,6 +854,8 @@ static ssize_t l1tf_show_state(char *buf) static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { + int ret; + if (!boot_cpu_has_bug(bug)) return sprintf(buf, "Not affected\n"); @@ -827,10 +870,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return sprintf(buf, "Mitigation: __user pointer sanitization\n"); case X86_BUG_SPECTRE_V2: - return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + ret = sprintf(buf, "%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "", spectre_v2_module_string()); + return ret; case X86_BUG_SPEC_STORE_BYPASS: return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); diff --git a/kernel/cpu.c b/kernel/cpu.c index f3f389e333430..90cf6a04e08a2 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2045,6 +2045,12 @@ static void cpuhp_online_cpu_device(unsigned int cpu) kobject_uevent(&dev->kobj, KOBJ_ONLINE); } +/* + * Architectures that need SMT-specific errata handling during SMT hotplug + * should override this. + */ +void __weak arch_smt_update(void) { }; + static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { int cpu, ret = 0; @@ -2071,8 +2077,10 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) */ cpuhp_offline_cpu_device(cpu); } - if (!ret) + if (!ret) { cpu_smt_control = ctrlval; + arch_smt_update(); + } cpu_maps_update_done(); return ret; } @@ -2083,6 +2091,7 @@ static int cpuhp_smt_enable(void) cpu_maps_update_begin(); cpu_smt_control = CPU_SMT_ENABLED; + arch_smt_update(); for_each_present_cpu(cpu) { /* Skip online CPUs and CPUs on offline nodes */ if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) -- GitLab From 4741e31931b27b9eba1e9df02d781a2f808b7d82 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 25 Sep 2018 14:38:18 +0200 Subject: [PATCH 1864/2269] x86/speculation: Apply IBPB more strictly to avoid cross-process data leak commit dbfe2953f63c640463c630746cd5d9de8b2f63ae upstream Currently, IBPB is only issued in cases when switching into a non-dumpable process, the rationale being to protect such 'important and security sensitive' processess (such as GPG) from data leaking into a different userspace process via spectre v2. This is however completely insufficient to provide proper userspace-to-userpace spectrev2 protection, as any process can poison branch buffers before being scheduled out, and the newly scheduled process immediately becomes spectrev2 victim. In order to minimize the performance impact (for usecases that do require spectrev2 protection), issue the barrier only in cases when switching between processess where the victim can't be ptraced by the potential attacker (as in such cases, the attacker doesn't have to bother with branch buffers at all). [ tglx: Split up PTRACE_MODE_NOACCESS_CHK into PTRACE_MODE_SCHED and PTRACE_MODE_IBPB to be able to do ptrace() context tracking reasonably fine-grained ] Fixes: 18bf3c3ea8 ("x86/speculation: Use Indirect Branch Prediction Barrier in context switch") Originally-by: Tim Chen Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: "WoodhouseDavid" Cc: Andi Kleen Cc: "SchauflerCasey" Link: https://lkml.kernel.org/r/nycvar.YFH.7.76.1809251437340.15880@cbobk.fhfr.pm Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/tlb.c | 31 ++++++++++++++++++++----------- include/linux/ptrace.h | 21 +++++++++++++++++++-- kernel/ptrace.c | 10 ++++++++++ 3 files changed, 49 insertions(+), 13 deletions(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 83a3f4c935fc5..9e943de9d6287 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -180,6 +181,19 @@ static void sync_current_stack_to_mm(struct mm_struct *mm) } } +static bool ibpb_needed(struct task_struct *tsk, u64 last_ctx_id) +{ + /* + * Check if the current (previous) task has access to the memory + * of the @tsk (next) task. If access is denied, make sure to + * issue a IBPB to stop user->user Spectre-v2 attacks. + * + * Note: __ptrace_may_access() returns 0 or -ERRNO. + */ + return (tsk && tsk->mm && tsk->mm->context.ctx_id != last_ctx_id && + ptrace_may_access_sched(tsk, PTRACE_MODE_SPEC_IBPB)); +} + void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { @@ -256,18 +270,13 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * one process from doing Spectre-v2 attacks on another. * * As an optimization, flush indirect branches only when - * switching into processes that disable dumping. This - * protects high value processes like gpg, without having - * too high performance overhead. IBPB is *expensive*! - * - * This will not flush branches when switching into kernel - * threads. It will also not flush if we switch to idle - * thread and back to the same process. It will flush if we - * switch to a different non-dumpable process. + * switching into a processes that can't be ptrace by the + * current one (as in such case, attacker has much more + * convenient way how to tamper with the next process than + * branch buffer poisoning). */ - if (tsk && tsk->mm && - tsk->mm->context.ctx_id != last_ctx_id && - get_dumpable(tsk->mm) != SUID_DUMP_USER) + if (static_cpu_has(X86_FEATURE_USE_IBPB) && + ibpb_needed(tsk, last_ctx_id)) indirect_branch_prediction_barrier(); if (IS_ENABLED(CONFIG_VMAP_STACK)) { diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 919b2a0b03074..b1d43b80abfa8 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -62,14 +62,17 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); #define PTRACE_MODE_READ 0x01 #define PTRACE_MODE_ATTACH 0x02 #define PTRACE_MODE_NOAUDIT 0x04 -#define PTRACE_MODE_FSCREDS 0x08 -#define PTRACE_MODE_REALCREDS 0x10 +#define PTRACE_MODE_FSCREDS 0x08 +#define PTRACE_MODE_REALCREDS 0x10 +#define PTRACE_MODE_SCHED 0x20 +#define PTRACE_MODE_IBPB 0x40 /* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */ #define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS) #define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS) #define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS) #define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS) +#define PTRACE_MODE_SPEC_IBPB (PTRACE_MODE_ATTACH_REALCREDS | PTRACE_MODE_IBPB) /** * ptrace_may_access - check whether the caller is permitted to access @@ -87,6 +90,20 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); */ extern bool ptrace_may_access(struct task_struct *task, unsigned int mode); +/** + * ptrace_may_access - check whether the caller is permitted to access + * a target task. + * @task: target task + * @mode: selects type of access and caller credentials + * + * Returns true on success, false on denial. + * + * Similar to ptrace_may_access(). Only to be called from context switch + * code. Does not call into audit and the regular LSM hooks due to locking + * constraints. + */ +extern bool ptrace_may_access_sched(struct task_struct *task, unsigned int mode); + static inline int ptrace_reparented(struct task_struct *child) { return !same_thread_group(child->real_parent, child->parent); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 84b1367935e41..d7be2159ac09a 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -261,6 +261,9 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) { + if (mode & PTRACE_MODE_SCHED) + return false; + if (mode & PTRACE_MODE_NOAUDIT) return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE); else @@ -328,9 +331,16 @@ ok: !ptrace_has_cap(mm->user_ns, mode))) return -EPERM; + if (mode & PTRACE_MODE_SCHED) + return 0; return security_ptrace_access_check(task, mode); } +bool ptrace_may_access_sched(struct task_struct *task, unsigned int mode) +{ + return __ptrace_may_access(task, mode | PTRACE_MODE_SCHED); +} + bool ptrace_may_access(struct task_struct *task, unsigned int mode) { int err; -- GitLab From 0d55dce78a0e96f0909f4b7395887e84ba637254 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 25 Sep 2018 14:39:28 +0200 Subject: [PATCH 1865/2269] x86/speculation: Propagate information about RSB filling mitigation to sysfs commit bb4b3b7762735cdaba5a40fd94c9303d9ffa147a upstream If spectrev2 mitigation has been enabled, RSB is filled on context switch in order to protect from various classes of spectrev2 attacks. If this mitigation is enabled, say so in sysfs for spectrev2. Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: "WoodhouseDavid" Cc: Andi Kleen Cc: Tim Chen Cc: "SchauflerCasey" Link: https://lkml.kernel.org/r/nycvar.YFH.7.76.1809251438580.15880@cbobk.fhfr.pm Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index ef02230221c52..700d1149601e0 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -870,10 +870,11 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return sprintf(buf, "Mitigation: __user pointer sanitization\n"); case X86_BUG_SPECTRE_V2: - ret = sprintf(buf, "%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + ret = sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "", + boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", spectre_v2_module_string()); return ret; -- GitLab From 46dfe55fbee9be96f70482f3053b084fad3858d0 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Tue, 18 Sep 2018 07:45:00 -0700 Subject: [PATCH 1866/2269] x86/speculation: Add RETPOLINE_AMD support to the inline asm CALL_NOSPEC variant commit 0cbb76d6285794f30953bfa3ab831714b59dd700 upstream ..so that they match their asm counterpart. Add the missing ANNOTATE_NOSPEC_ALTERNATIVE in CALL_NOSPEC, while at it. Signed-off-by: Zhenzhong Duan Signed-off-by: Borislav Petkov Cc: Daniel Borkmann Cc: David Woodhouse Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Konrad Rzeszutek Wilk Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Wang YanQing Cc: dhaval.giani@oracle.com Cc: srinivas.eeda@oracle.com Link: http://lkml.kernel.org/r/c3975665-173e-4d70-8dee-06c926ac26ee@default Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 1b4132161c1fe..7827f17e9a8a0 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -170,11 +170,15 @@ */ # define CALL_NOSPEC \ ANNOTATE_NOSPEC_ALTERNATIVE \ - ALTERNATIVE( \ + ALTERNATIVE_2( \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ "call __x86_indirect_thunk_%V[thunk_target]\n", \ - X86_FEATURE_RETPOLINE) + X86_FEATURE_RETPOLINE, \ + "lfence;\n" \ + ANNOTATE_RETPOLINE_SAFE \ + "call *%[thunk_target]\n", \ + X86_FEATURE_RETPOLINE_AMD) # define THUNK_TARGET(addr) [thunk_target] "r" (addr) #elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE) @@ -184,7 +188,8 @@ * here, anyway. */ # define CALL_NOSPEC \ - ALTERNATIVE( \ + ANNOTATE_NOSPEC_ALTERNATIVE \ + ALTERNATIVE_2( \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ " jmp 904f;\n" \ @@ -199,7 +204,11 @@ " ret;\n" \ " .align 16\n" \ "904: call 901b;\n", \ - X86_FEATURE_RETPOLINE) + X86_FEATURE_RETPOLINE, \ + "lfence;\n" \ + ANNOTATE_RETPOLINE_SAFE \ + "call *%[thunk_target]\n", \ + X86_FEATURE_RETPOLINE_AMD) # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) #else /* No retpoline for C / inline asm */ -- GitLab From a9c90037af800bcc74bbdac338355c7f331bd913 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Fri, 2 Nov 2018 01:45:41 -0700 Subject: [PATCH 1867/2269] x86/retpoline: Make CONFIG_RETPOLINE depend on compiler support commit 4cd24de3a0980bf3100c9dcb08ef65ca7c31af48 upstream Since retpoline capable compilers are widely available, make CONFIG_RETPOLINE hard depend on the compiler capability. Break the build when CONFIG_RETPOLINE is enabled and the compiler does not support it. Emit an error message in that case: "arch/x86/Makefile:226: *** You are building kernel with non-retpoline compiler, please update your compiler.. Stop." [dwmw: Fail the build with non-retpoline compiler] Suggested-by: Peter Zijlstra Signed-off-by: Zhenzhong Duan Signed-off-by: Thomas Gleixner Cc: David Woodhouse Cc: Borislav Petkov Cc: Daniel Borkmann Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Andy Lutomirski Cc: Masahiro Yamada Cc: Michal Marek Cc: Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/cca0cb20-f9e2-4094-840b-fb0f8810cd34@default Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 4 ---- arch/x86/Makefile | 5 +++-- arch/x86/include/asm/nospec-branch.h | 10 ++++++---- arch/x86/kernel/cpu/bugs.c | 2 +- scripts/Makefile.build | 2 -- 5 files changed, 10 insertions(+), 13 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2af0af33362a6..026c2f2ac702d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -440,10 +440,6 @@ config RETPOLINE branches. Requires a compiler with -mindirect-branch=thunk-extern support for full protection. The kernel may run slower. - Without compiler support, at least indirect branches in assembler - code are eliminated. Since this includes the syscall entry path, - it is not entirely pointless. - config INTEL_RDT bool "Intel Resource Director Technology support" default n diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1c4d012550ec5..ce3658dd98e82 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -241,9 +241,10 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # Avoid indirect branches in kernel to deal with Spectre ifdef CONFIG_RETPOLINE -ifneq ($(RETPOLINE_CFLAGS),) - KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE +ifeq ($(RETPOLINE_CFLAGS),) + $(error You are building kernel with non-retpoline compiler, please update your compiler.) endif + KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) endif archscripts: scripts_basic diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 7827f17e9a8a0..690c838e3a7e7 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -162,11 +162,12 @@ _ASM_PTR " 999b\n\t" \ ".popsection\n\t" -#if defined(CONFIG_X86_64) && defined(RETPOLINE) +#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_X86_64 /* - * Since the inline asm uses the %V modifier which is only in newer GCC, - * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE. + * Inline asm uses the %V modifier which is only in newer GCC + * which is ensured when CONFIG_RETPOLINE is defined. */ # define CALL_NOSPEC \ ANNOTATE_NOSPEC_ALTERNATIVE \ @@ -181,7 +182,7 @@ X86_FEATURE_RETPOLINE_AMD) # define THUNK_TARGET(addr) [thunk_target] "r" (addr) -#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE) +#else /* CONFIG_X86_32 */ /* * For i386 we use the original ret-equivalent retpoline, because * otherwise we'll run out of registers. We don't care about CET @@ -211,6 +212,7 @@ X86_FEATURE_RETPOLINE_AMD) # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) +#endif #else /* No retpoline for C / inline asm */ # define CALL_NOSPEC "call *%[thunk_target]\n" # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 700d1149601e0..a0146ee52124e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -251,7 +251,7 @@ static void __init spec2_print_if_secure(const char *reason) static inline bool retp_compiler(void) { - return __is_defined(RETPOLINE); + return __is_defined(CONFIG_RETPOLINE); } static inline bool match_option(const char *arg, int arglen, const char *opt) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 7143da06d702b..be9e5deb58ba2 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -272,10 +272,8 @@ else objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable) endif ifdef CONFIG_RETPOLINE -ifneq ($(RETPOLINE_CFLAGS),) objtool_args += --retpoline endif -endif ifdef CONFIG_MODVERSIONS -- GitLab From 8bbb50b6de99c178acd648a73098009073139f46 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Fri, 2 Nov 2018 01:45:41 -0700 Subject: [PATCH 1868/2269] x86/retpoline: Remove minimal retpoline support commit ef014aae8f1cd2793e4e014bbb102bed53f852b7 upstream Now that CONFIG_RETPOLINE hard depends on compiler support, there is no reason to keep the minimal retpoline support around which only provided basic protection in the assembly files. Suggested-by: Peter Zijlstra Signed-off-by: Zhenzhong Duan Signed-off-by: Thomas Gleixner Cc: David Woodhouse Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/f06f0a89-5587-45db-8ed2-0a9d6638d5c0@default Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 3 --- arch/x86/kernel/cpu/bugs.c | 13 ++----------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 690c838e3a7e7..20d016b4e986f 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -221,11 +221,8 @@ /* The Spectre V2 mitigation variants */ enum spectre_v2_mitigation { SPECTRE_V2_NONE, - SPECTRE_V2_RETPOLINE_MINIMAL, - SPECTRE_V2_RETPOLINE_MINIMAL_AMD, SPECTRE_V2_RETPOLINE_GENERIC, SPECTRE_V2_RETPOLINE_AMD, - SPECTRE_V2_IBRS, SPECTRE_V2_IBRS_ENHANCED, }; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index a0146ee52124e..dd87fe5397c33 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -134,8 +134,6 @@ enum spectre_v2_mitigation_cmd { static const char *spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", - [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline", - [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline", [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", @@ -249,11 +247,6 @@ static void __init spec2_print_if_secure(const char *reason) pr_info("%s selected on command line.\n", reason); } -static inline bool retp_compiler(void) -{ - return __is_defined(CONFIG_RETPOLINE); -} - static inline bool match_option(const char *arg, int arglen, const char *opt) { int len = strlen(opt); @@ -414,14 +407,12 @@ retpoline_auto: pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n"); goto retpoline_generic; } - mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : - SPECTRE_V2_RETPOLINE_MINIMAL_AMD; + mode = SPECTRE_V2_RETPOLINE_AMD; setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD); setup_force_cpu_cap(X86_FEATURE_RETPOLINE); } else { retpoline_generic: - mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC : - SPECTRE_V2_RETPOLINE_MINIMAL; + mode = SPECTRE_V2_RETPOLINE_GENERIC; setup_force_cpu_cap(X86_FEATURE_RETPOLINE); } -- GitLab From 7d422ca1950a1f04bc467e706ae6f89840b0bd16 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:29 +0100 Subject: [PATCH 1869/2269] x86/speculation: Update the TIF_SSBD comment commit 8eb729b77faf83ac4c1f363a9ad68d042415f24c upstream "Reduced Data Speculation" is an obsolete term. The correct new name is "Speculative store bypass disable" - which is abbreviated into SSBD. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185003.593893901@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/thread_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 95ff2d7f553f3..2976312b826a8 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -81,7 +81,7 @@ struct thread_info { #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ -#define TIF_SSBD 5 /* Reduced data speculation */ +#define TIF_SSBD 5 /* Speculative store bypass disable */ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ -- GitLab From 4398714cb7d595725a4840da137faeac47c10669 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:30 +0100 Subject: [PATCH 1870/2269] x86/speculation: Clean up spectre_v2_parse_cmdline() commit 24848509aa55eac39d524b587b051f4e86df3c12 upstream Remove the unnecessary 'else' statement in spectre_v2_parse_cmdline() to save an indentation level. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185003.688010903@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index dd87fe5397c33..cb087dee51173 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -275,22 +275,21 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) return SPECTRE_V2_CMD_NONE; - else { - ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); - if (ret < 0) - return SPECTRE_V2_CMD_AUTO; - for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { - if (!match_option(arg, ret, mitigation_options[i].option)) - continue; - cmd = mitigation_options[i].cmd; - break; - } + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); + if (ret < 0) + return SPECTRE_V2_CMD_AUTO; - if (i >= ARRAY_SIZE(mitigation_options)) { - pr_err("unknown option (%s). Switching to AUTO select\n", arg); - return SPECTRE_V2_CMD_AUTO; - } + for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { + if (!match_option(arg, ret, mitigation_options[i].option)) + continue; + cmd = mitigation_options[i].cmd; + break; + } + + if (i >= ARRAY_SIZE(mitigation_options)) { + pr_err("unknown option (%s). Switching to AUTO select\n", arg); + return SPECTRE_V2_CMD_AUTO; } if ((cmd == SPECTRE_V2_CMD_RETPOLINE || -- GitLab From 05dd5dc4c4fba8a8457b0bcb5336d4fe22d5c013 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:31 +0100 Subject: [PATCH 1871/2269] x86/speculation: Remove unnecessary ret variable in cpu_show_common() commit b86bda0426853bfe8a3506c7d2a5b332760ae46b upstream Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185003.783903657@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index cb087dee51173..fcd710ab26b5a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -844,8 +844,6 @@ static ssize_t l1tf_show_state(char *buf) static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { - int ret; - if (!boot_cpu_has_bug(bug)) return sprintf(buf, "Not affected\n"); @@ -860,13 +858,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return sprintf(buf, "Mitigation: __user pointer sanitization\n"); case X86_BUG_SPECTRE_V2: - ret = sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "", boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", spectre_v2_module_string()); - return ret; case X86_BUG_SPEC_STORE_BYPASS: return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); -- GitLab From 8588c7d4a487927356a5026982bd89b45a2092fe Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:32 +0100 Subject: [PATCH 1872/2269] x86/speculation: Move STIPB/IBPB string conditionals out of cpu_show_common() commit a8f76ae41cd633ac00be1b3019b1eb4741be3828 upstream The Spectre V2 printout in cpu_show_common() handles conditionals for the various mitigation methods directly in the sprintf() argument list. That's hard to read and will become unreadable if more complex decisions need to be made for a particular method. Move the conditionals for STIBP and IBPB string selection into helper functions, so they can be extended later on. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185003.874479208@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index fcd710ab26b5a..06494a0cb107d 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -841,6 +841,22 @@ static ssize_t l1tf_show_state(char *buf) } #endif +static char *stibp_state(void) +{ + if (x86_spec_ctrl_base & SPEC_CTRL_STIBP) + return ", STIBP"; + else + return ""; +} + +static char *ibpb_state(void) +{ + if (boot_cpu_has(X86_FEATURE_USE_IBPB)) + return ", IBPB"; + else + return ""; +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -859,9 +875,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_SPECTRE_V2: return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", + ibpb_state(), boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "", + stibp_state(), boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", spectre_v2_module_string()); -- GitLab From aca2ddbc23800652ec2f09c2df76f417823cda77 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:33 +0100 Subject: [PATCH 1873/2269] x86/speculation: Disable STIBP when enhanced IBRS is in use commit 34bce7c9690b1d897686aac89604ba7adc365556 upstream If enhanced IBRS is active, STIBP is redundant for mitigating Spectre v2 user space exploits from hyperthread sibling. Disable STIBP when enhanced IBRS is used. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185003.966801480@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 06494a0cb107d..3b2c5f26bde2b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -319,6 +319,10 @@ static bool stibp_needed(void) if (spectre_v2_enabled == SPECTRE_V2_NONE) return false; + /* Enhanced IBRS makes using STIBP unnecessary. */ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return false; + if (!boot_cpu_has(X86_FEATURE_STIBP)) return false; @@ -843,6 +847,9 @@ static ssize_t l1tf_show_state(char *buf) static char *stibp_state(void) { + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return ""; + if (x86_spec_ctrl_base & SPEC_CTRL_STIBP) return ", STIBP"; else -- GitLab From 999b295ad1a9980754c6d63e9f8be109a3574118 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:34 +0100 Subject: [PATCH 1874/2269] x86/speculation: Rename SSBD update functions commit 26c4d75b234040c11728a8acb796b3a85ba7507c upstream During context switch, the SSBD bit in SPEC_CTRL MSR is updated according to changes of the TIF_SSBD flag in the current and next running task. Currently, only the bit controlling speculative store bypass disable in SPEC_CTRL MSR is updated and the related update functions all have "speculative_store" or "ssb" in their names. For enhanced mitigation control other bits in SPEC_CTRL MSR need to be updated as well, which makes the SSB names inadequate. Rename the "speculative_store*" functions to a more generic name. No functional change. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.058866968@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/spec-ctrl.h | 6 +++--- arch/x86/kernel/cpu/bugs.c | 4 ++-- arch/x86/kernel/process.c | 12 ++++++------ 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index ae7c2c5cd7f0e..8e2f8411c7a76 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -70,11 +70,11 @@ extern void speculative_store_bypass_ht_init(void); static inline void speculative_store_bypass_ht_init(void) { } #endif -extern void speculative_store_bypass_update(unsigned long tif); +extern void speculation_ctrl_update(unsigned long tif); -static inline void speculative_store_bypass_update_current(void) +static inline void speculation_ctrl_update_current(void) { - speculative_store_bypass_update(current_thread_info()->flags); + speculation_ctrl_update(current_thread_info()->flags); } #endif diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3b2c5f26bde2b..a06ddbd25219e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -199,7 +199,7 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) : ssbd_spec_ctrl_to_tif(hostval); - speculative_store_bypass_update(tif); + speculation_ctrl_update(tif); } } EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl); @@ -629,7 +629,7 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) * mitigation until it is next scheduled. */ if (task == current && update) - speculative_store_bypass_update_current(); + speculation_ctrl_update_current(); return 0; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 988a98f34c664..48c4a0fbd7644 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -398,27 +398,27 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn)); } -static __always_inline void intel_set_ssb_state(unsigned long tifn) +static __always_inline void spec_ctrl_update_msr(unsigned long tifn) { u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); wrmsrl(MSR_IA32_SPEC_CTRL, msr); } -static __always_inline void __speculative_store_bypass_update(unsigned long tifn) +static __always_inline void __speculation_ctrl_update(unsigned long tifn) { if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) amd_set_ssb_virt_state(tifn); else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) amd_set_core_ssb_state(tifn); else - intel_set_ssb_state(tifn); + spec_ctrl_update_msr(tifn); } -void speculative_store_bypass_update(unsigned long tif) +void speculation_ctrl_update(unsigned long tif) { preempt_disable(); - __speculative_store_bypass_update(tif); + __speculation_ctrl_update(tif); preempt_enable(); } @@ -455,7 +455,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); if ((tifp ^ tifn) & _TIF_SSBD) - __speculative_store_bypass_update(tifn); + __speculation_ctrl_update(tifn); } /* -- GitLab From bc4aa78e6954bd71295543163271262fede5d46f Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:35 +0100 Subject: [PATCH 1875/2269] x86/speculation: Reorganize speculation control MSRs update commit 01daf56875ee0cd50ed496a09b20eb369b45dfa5 upstream The logic to detect whether there's a change in the previous and next task's flag relevant to update speculation control MSRs is spread out across multiple functions. Consolidate all checks needed for updating speculation control MSRs into the new __speculation_ctrl_update() helper function. This makes it easy to pick the right speculation control MSR and the bits in MSR_IA32_SPEC_CTRL that need updating based on TIF flags changes. Originally-by: Thomas Lendacky Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.151077005@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/process.c | 46 ++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 48c4a0fbd7644..65c9edd08c27b 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -398,27 +398,40 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn)); } -static __always_inline void spec_ctrl_update_msr(unsigned long tifn) -{ - u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); - - wrmsrl(MSR_IA32_SPEC_CTRL, msr); -} +/* + * Update the MSRs managing speculation control, during context switch. + * + * tifp: Previous task's thread flags + * tifn: Next task's thread flags + */ +static __always_inline void __speculation_ctrl_update(unsigned long tifp, + unsigned long tifn) +{ + u64 msr = x86_spec_ctrl_base; + bool updmsr = false; + + /* If TIF_SSBD is different, select the proper mitigation method */ + if ((tifp ^ tifn) & _TIF_SSBD) { + if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { + amd_set_ssb_virt_state(tifn); + } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { + amd_set_core_ssb_state(tifn); + } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) { + msr |= ssbd_tif_to_spec_ctrl(tifn); + updmsr = true; + } + } -static __always_inline void __speculation_ctrl_update(unsigned long tifn) -{ - if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) - amd_set_ssb_virt_state(tifn); - else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) - amd_set_core_ssb_state(tifn); - else - spec_ctrl_update_msr(tifn); + if (updmsr) + wrmsrl(MSR_IA32_SPEC_CTRL, msr); } void speculation_ctrl_update(unsigned long tif) { + /* Forced update. Make sure all relevant TIF flags are different */ preempt_disable(); - __speculation_ctrl_update(tif); + __speculation_ctrl_update(~tif, tif); preempt_enable(); } @@ -454,8 +467,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, if ((tifp ^ tifn) & _TIF_NOCPUID) set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); - if ((tifp ^ tifn) & _TIF_SSBD) - __speculation_ctrl_update(tifn); + __speculation_ctrl_update(tifp, tifn); } /* -- GitLab From 01659361c63fdc91c0af239d08cdd211d590a656 Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Sun, 25 Nov 2018 19:33:36 +0100 Subject: [PATCH 1876/2269] sched/smt: Make sched_smt_present track topology commit c5511d03ec090980732e929c318a7a6374b5550e upstream Currently the 'sched_smt_present' static key is enabled when at CPU bringup SMT topology is observed, but it is never disabled. However there is demand to also disable the key when the topology changes such that there is no SMT present anymore. Implement this by making the key count the number of cores that have SMT enabled. In particular, the SMT topology bits are set before interrrupts are enabled and similarly, are cleared after interrupts are disabled for the last time and the CPU dies. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.246110444@linutronix.de Signed-off-by: Greg Kroah-Hartman --- kernel/sched/core.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3bc664662081c..0552ddbb25e2a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5617,15 +5617,10 @@ int sched_cpu_activate(unsigned int cpu) #ifdef CONFIG_SCHED_SMT /* - * The sched_smt_present static key needs to be evaluated on every - * hotplug event because at boot time SMT might be disabled when - * the number of booted CPUs is limited. - * - * If then later a sibling gets hotplugged, then the key would stay - * off and SMT scheduling would never be functional. + * When going up, increment the number of cores with SMT present. */ - if (cpumask_weight(cpu_smt_mask(cpu)) > 1) - static_branch_enable_cpuslocked(&sched_smt_present); + if (cpumask_weight(cpu_smt_mask(cpu)) == 2) + static_branch_inc_cpuslocked(&sched_smt_present); #endif set_cpu_active(cpu, true); @@ -5669,6 +5664,14 @@ int sched_cpu_deactivate(unsigned int cpu) */ synchronize_rcu_mult(call_rcu, call_rcu_sched); +#ifdef CONFIG_SCHED_SMT + /* + * When going down, decrement the number of cores with SMT present. + */ + if (cpumask_weight(cpu_smt_mask(cpu)) == 2) + static_branch_dec_cpuslocked(&sched_smt_present); +#endif + if (!sched_smp_initialized) return 0; -- GitLab From 44ac7cd01544dd12241bc0c0e0eb5604ad77379a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:37 +0100 Subject: [PATCH 1877/2269] x86/Kconfig: Select SCHED_SMT if SMP enabled commit dbe733642e01dd108f71436aaea7b328cb28fd87 upstream CONFIG_SCHED_SMT is enabled by all distros, so there is not a real point to have it configurable. The runtime overhead in the core scheduler code is minimal because the actual SMT scheduling parts are conditional on a static key. This allows to expose the scheduler's SMT state static key to the speculation control code. Alternatively the scheduler's static key could be made always available when CONFIG_SMP is enabled, but that's just adding an unused static key to every other architecture for nothing. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.337452245@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 026c2f2ac702d..4f393eb9745f3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -955,13 +955,7 @@ config NR_CPUS approximately eight kilobytes to the kernel image. config SCHED_SMT - bool "SMT (Hyperthreading) scheduler support" - depends on SMP - ---help--- - SMT scheduler support improves the CPU scheduler's decision making - when dealing with Intel Pentium 4 chips with HyperThreading at a - cost of slightly increased overhead in some places. If unsure say - N here. + def_bool y if SMP config SCHED_MC def_bool y -- GitLab From 0e797117f18573ab31f72aed0924ff7999d860ea Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:38 +0100 Subject: [PATCH 1878/2269] sched/smt: Expose sched_smt_present static key commit 321a874a7ef85655e93b3206d0f36b4a6097f948 upstream Make the scheduler's 'sched_smt_present' static key globaly available, so it can be used in the x86 speculation control code. Provide a query function and a stub for the CONFIG_SMP=n case. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.430168326@linutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/sched/smt.h | 18 ++++++++++++++++++ kernel/sched/sched.h | 4 +--- 2 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 include/linux/sched/smt.h diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h new file mode 100644 index 0000000000000..c9e0be5141104 --- /dev/null +++ b/include/linux/sched/smt.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SCHED_SMT_H +#define _LINUX_SCHED_SMT_H + +#include + +#ifdef CONFIG_SCHED_SMT +extern struct static_key_false sched_smt_present; + +static __always_inline bool sched_smt_active(void) +{ + return static_branch_likely(&sched_smt_present); +} +#else +static inline bool sched_smt_active(void) { return false; } +#endif + +#endif diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 63d999dfec80c..b3ba6e5e99f20 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -825,9 +826,6 @@ static inline int cpu_of(struct rq *rq) #ifdef CONFIG_SCHED_SMT - -extern struct static_key_false sched_smt_present; - extern void __update_idle_core(struct rq *rq); static inline void update_idle_core(struct rq *rq) -- GitLab From 36a4c5fc92857711019dc56be2669e19f8c3c86e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:39 +0100 Subject: [PATCH 1879/2269] x86/speculation: Rework SMT state change commit a74cfffb03b73d41e08f84c2e5c87dec0ce3db9f upstream arch_smt_update() is only called when the sysfs SMT control knob is changed. This means that when SMT is enabled in the sysfs control knob the system is considered to have SMT active even if all siblings are offline. To allow finegrained control of the speculation mitigations, the actual SMT state is more interesting than the fact that siblings could be enabled. Rework the code, so arch_smt_update() is invoked from each individual CPU hotplug function, and simplify the update function while at it. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.521974984@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 11 +++++------ include/linux/sched/smt.h | 2 ++ kernel/cpu.c | 15 +++++++++------ 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index a06ddbd25219e..fa7a35ab831fa 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -342,16 +343,14 @@ void arch_smt_update(void) return; mutex_lock(&spec_ctrl_mutex); - mask = x86_spec_ctrl_base; - if (cpu_smt_control == CPU_SMT_ENABLED) + + mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; + if (sched_smt_active()) mask |= SPEC_CTRL_STIBP; - else - mask &= ~SPEC_CTRL_STIBP; if (mask != x86_spec_ctrl_base) { pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", - cpu_smt_control == CPU_SMT_ENABLED ? - "Enabling" : "Disabling"); + mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling"); x86_spec_ctrl_base = mask; on_each_cpu(update_stibp_msr, NULL, 1); } diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h index c9e0be5141104..59d3736c454cf 100644 --- a/include/linux/sched/smt.h +++ b/include/linux/sched/smt.h @@ -15,4 +15,6 @@ static __always_inline bool sched_smt_active(void) static inline bool sched_smt_active(void) { return false; } #endif +void arch_smt_update(void); + #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index 90cf6a04e08a2..5c907d96e3ddd 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -347,6 +348,12 @@ void cpu_hotplug_enable(void) EXPORT_SYMBOL_GPL(cpu_hotplug_enable); #endif /* CONFIG_HOTPLUG_CPU */ +/* + * Architectures that need SMT-specific errata handling during SMT hotplug + * should override this. + */ +void __weak arch_smt_update(void) { } + #ifdef CONFIG_HOTPLUG_SMT enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; EXPORT_SYMBOL_GPL(cpu_smt_control); @@ -998,6 +1005,7 @@ out: * concurrent CPU hotplug via cpu_add_remove_lock. */ lockup_detector_cleanup(); + arch_smt_update(); return ret; } @@ -1126,6 +1134,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target) ret = cpuhp_up_callbacks(cpu, st, target); out: cpus_write_unlock(); + arch_smt_update(); return ret; } @@ -2045,12 +2054,6 @@ static void cpuhp_online_cpu_device(unsigned int cpu) kobject_uevent(&dev->kobj, KOBJ_ONLINE); } -/* - * Architectures that need SMT-specific errata handling during SMT hotplug - * should override this. - */ -void __weak arch_smt_update(void) { }; - static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { int cpu, ret = 0; -- GitLab From 8345d546238f68e7fe219df25eeecb200f34334b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:40 +0100 Subject: [PATCH 1880/2269] x86/l1tf: Show actual SMT state commit 130d6f946f6f2a972ee3ec8540b7243ab99abe97 upstream Use the now exposed real SMT state, not the SMT sysfs control knob state. This reflects the state of the system when the mitigation status is queried. This does not change the warning in the VMX launch code. There the dependency on the control knob makes sense because siblings could be brought online anytime after launching the VM. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.613357354@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index fa7a35ab831fa..ccd08d1276245 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -829,13 +829,14 @@ static ssize_t l1tf_show_state(char *buf) if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED || (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER && - cpu_smt_control == CPU_SMT_ENABLED)) + sched_smt_active())) { return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG, l1tf_vmx_states[l1tf_vmx_mitigation]); + } return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG, l1tf_vmx_states[l1tf_vmx_mitigation], - cpu_smt_control == CPU_SMT_ENABLED ? "vulnerable" : "disabled"); + sched_smt_active() ? "vulnerable" : "disabled"); } #else static ssize_t l1tf_show_state(char *buf) -- GitLab From 72f90a89be9e125c1dd44a4d70b83055d1c3753e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:41 +0100 Subject: [PATCH 1881/2269] x86/speculation: Reorder the spec_v2 code commit 15d6b7aab0793b2de8a05d8a828777dd24db424e upstream Reorder the code so it is better grouped. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.707122879@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 168 ++++++++++++++++++------------------- 1 file changed, 84 insertions(+), 84 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index ccd08d1276245..9fea8ff20c1f9 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -123,29 +123,6 @@ void __init check_bugs(void) #endif } -/* The kernel command line selection */ -enum spectre_v2_mitigation_cmd { - SPECTRE_V2_CMD_NONE, - SPECTRE_V2_CMD_AUTO, - SPECTRE_V2_CMD_FORCE, - SPECTRE_V2_CMD_RETPOLINE, - SPECTRE_V2_CMD_RETPOLINE_GENERIC, - SPECTRE_V2_CMD_RETPOLINE_AMD, -}; - -static const char *spectre_v2_strings[] = { - [SPECTRE_V2_NONE] = "Vulnerable", - [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", - [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", - [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", -}; - -#undef pr_fmt -#define pr_fmt(fmt) "Spectre V2 : " fmt - -static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = - SPECTRE_V2_NONE; - void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { @@ -215,6 +192,12 @@ static void x86_amd_ssb_disable(void) wrmsrl(MSR_AMD64_LS_CFG, msrval); } +#undef pr_fmt +#define pr_fmt(fmt) "Spectre V2 : " fmt + +static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = + SPECTRE_V2_NONE; + #ifdef RETPOLINE static bool spectre_v2_bad_module; @@ -236,18 +219,6 @@ static inline const char *spectre_v2_module_string(void) static inline const char *spectre_v2_module_string(void) { return ""; } #endif -static void __init spec2_print_if_insecure(const char *reason) -{ - if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s selected on command line.\n", reason); -} - -static void __init spec2_print_if_secure(const char *reason) -{ - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s selected on command line.\n", reason); -} - static inline bool match_option(const char *arg, int arglen, const char *opt) { int len = strlen(opt); @@ -255,24 +226,53 @@ static inline bool match_option(const char *arg, int arglen, const char *opt) return len == arglen && !strncmp(arg, opt, len); } +/* The kernel command line selection for spectre v2 */ +enum spectre_v2_mitigation_cmd { + SPECTRE_V2_CMD_NONE, + SPECTRE_V2_CMD_AUTO, + SPECTRE_V2_CMD_FORCE, + SPECTRE_V2_CMD_RETPOLINE, + SPECTRE_V2_CMD_RETPOLINE_GENERIC, + SPECTRE_V2_CMD_RETPOLINE_AMD, +}; + +static const char *spectre_v2_strings[] = { + [SPECTRE_V2_NONE] = "Vulnerable", + [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", + [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", + [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", +}; + static const struct { const char *option; enum spectre_v2_mitigation_cmd cmd; bool secure; } mitigation_options[] = { - { "off", SPECTRE_V2_CMD_NONE, false }, - { "on", SPECTRE_V2_CMD_FORCE, true }, - { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, - { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, - { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, - { "auto", SPECTRE_V2_CMD_AUTO, false }, + { "off", SPECTRE_V2_CMD_NONE, false }, + { "on", SPECTRE_V2_CMD_FORCE, true }, + { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, + { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, + { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, + { "auto", SPECTRE_V2_CMD_AUTO, false }, }; +static void __init spec2_print_if_insecure(const char *reason) +{ + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + pr_info("%s selected on command line.\n", reason); +} + +static void __init spec2_print_if_secure(const char *reason) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + pr_info("%s selected on command line.\n", reason); +} + static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) { + enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; char arg[20]; int ret, i; - enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) return SPECTRE_V2_CMD_NONE; @@ -315,48 +315,6 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return cmd; } -static bool stibp_needed(void) -{ - if (spectre_v2_enabled == SPECTRE_V2_NONE) - return false; - - /* Enhanced IBRS makes using STIBP unnecessary. */ - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) - return false; - - if (!boot_cpu_has(X86_FEATURE_STIBP)) - return false; - - return true; -} - -static void update_stibp_msr(void *info) -{ - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); -} - -void arch_smt_update(void) -{ - u64 mask; - - if (!stibp_needed()) - return; - - mutex_lock(&spec_ctrl_mutex); - - mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; - if (sched_smt_active()) - mask |= SPEC_CTRL_STIBP; - - if (mask != x86_spec_ctrl_base) { - pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", - mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling"); - x86_spec_ctrl_base = mask; - on_each_cpu(update_stibp_msr, NULL, 1); - } - mutex_unlock(&spec_ctrl_mutex); -} - static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -459,6 +417,48 @@ specv2_set_mode: arch_smt_update(); } +static bool stibp_needed(void) +{ + if (spectre_v2_enabled == SPECTRE_V2_NONE) + return false; + + /* Enhanced IBRS makes using STIBP unnecessary. */ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return false; + + if (!boot_cpu_has(X86_FEATURE_STIBP)) + return false; + + return true; +} + +static void update_stibp_msr(void *info) +{ + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); +} + +void arch_smt_update(void) +{ + u64 mask; + + if (!stibp_needed()) + return; + + mutex_lock(&spec_ctrl_mutex); + + mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; + if (sched_smt_active()) + mask |= SPEC_CTRL_STIBP; + + if (mask != x86_spec_ctrl_base) { + pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", + mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling"); + x86_spec_ctrl_base = mask; + on_each_cpu(update_stibp_msr, NULL, 1); + } + mutex_unlock(&spec_ctrl_mutex); +} + #undef pr_fmt #define pr_fmt(fmt) "Speculative Store Bypass: " fmt -- GitLab From caa118cf4c7997a35f6a18f5a16c38c7a9c71e3f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:42 +0100 Subject: [PATCH 1882/2269] x86/speculation: Mark string arrays const correctly commit 8770709f411763884535662744a3786a1806afd3 upstream checkpatch.pl muttered when reshuffling the code: WARNING: static const char * array should probably be static const char * const Fix up all the string arrays. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.800018931@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 9fea8ff20c1f9..45e08aacefe39 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -236,7 +236,7 @@ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_RETPOLINE_AMD, }; -static const char *spectre_v2_strings[] = { +static const char * const spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", @@ -473,7 +473,7 @@ enum ssb_mitigation_cmd { SPEC_STORE_BYPASS_CMD_SECCOMP, }; -static const char *ssb_strings[] = { +static const char * const ssb_strings[] = { [SPEC_STORE_BYPASS_NONE] = "Vulnerable", [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled", [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl", @@ -813,7 +813,7 @@ early_param("l1tf", l1tf_cmdline); #define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion" #if IS_ENABLED(CONFIG_KVM_INTEL) -static const char *l1tf_vmx_states[] = { +static const char * const l1tf_vmx_states[] = { [VMENTER_L1D_FLUSH_AUTO] = "auto", [VMENTER_L1D_FLUSH_NEVER] = "vulnerable", [VMENTER_L1D_FLUSH_COND] = "conditional cache flushes", -- GitLab From 95d41f1332f78fa6ad19459d1b98ad70407dd88b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:43 +0100 Subject: [PATCH 1883/2269] x86/speculataion: Mark command line parser data __initdata commit 30ba72a990f5096ae08f284de17986461efcc408 upstream No point to keep that around. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.893886356@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 45e08aacefe39..b11c7dfcae0e3 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -247,7 +247,7 @@ static const struct { const char *option; enum spectre_v2_mitigation_cmd cmd; bool secure; -} mitigation_options[] = { +} mitigation_options[] __initdata = { { "off", SPECTRE_V2_CMD_NONE, false }, { "on", SPECTRE_V2_CMD_FORCE, true }, { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, @@ -483,7 +483,7 @@ static const char * const ssb_strings[] = { static const struct { const char *option; enum ssb_mitigation_cmd cmd; -} ssb_mitigation_options[] = { +} ssb_mitigation_options[] __initdata = { { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ -- GitLab From ebd473909994d4ecce8e973f048f06d999f84845 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:44 +0100 Subject: [PATCH 1884/2269] x86/speculation: Unify conditional spectre v2 print functions commit 495d470e9828500e0155027f230449ac5e29c025 upstream There is no point in having two functions and a conditional at the call site. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.986890749@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b11c7dfcae0e3..e3b444dd0039f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -256,15 +256,9 @@ static const struct { { "auto", SPECTRE_V2_CMD_AUTO, false }, }; -static void __init spec2_print_if_insecure(const char *reason) +static void __init spec_v2_print_cond(const char *reason, bool secure) { - if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s selected on command line.\n", reason); -} - -static void __init spec2_print_if_secure(const char *reason) -{ - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure) pr_info("%s selected on command line.\n", reason); } @@ -307,11 +301,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return SPECTRE_V2_CMD_AUTO; } - if (mitigation_options[i].secure) - spec2_print_if_secure(mitigation_options[i].option); - else - spec2_print_if_insecure(mitigation_options[i].option); - + spec_v2_print_cond(mitigation_options[i].option, + mitigation_options[i].secure); return cmd; } -- GitLab From 90f293cc49fa44e24c07e74c89f3d87bc97cf41e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:45 +0100 Subject: [PATCH 1885/2269] x86/speculation: Add command line control for indirect branch speculation commit fa1202ef224391b6f5b26cdd44cc50495e8fab54 upstream Add command line control for user space indirect branch speculation mitigations. The new option is: spectre_v2_user= The initial options are: - on: Unconditionally enabled - off: Unconditionally disabled -auto: Kernel selects mitigation (default off for now) When the spectre_v2= command line argument is either 'on' or 'off' this implies that the application to application control follows that state even if a contradicting spectre_v2_user= argument is supplied. Originally-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.082720373@linutronix.de Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 32 ++++- arch/x86/include/asm/nospec-branch.h | 10 ++ arch/x86/kernel/cpu/bugs.c | 133 +++++++++++++++--- 3 files changed, 156 insertions(+), 19 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 99a08722124d6..f0c19f61db807 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3994,9 +3994,13 @@ spectre_v2= [X86] Control mitigation of Spectre variant 2 (indirect branch speculation) vulnerability. + The default operation protects the kernel from + user space attacks. - on - unconditionally enable - off - unconditionally disable + on - unconditionally enable, implies + spectre_v2_user=on + off - unconditionally disable, implies + spectre_v2_user=off auto - kernel detects whether your CPU model is vulnerable @@ -4006,6 +4010,12 @@ CONFIG_RETPOLINE configuration option, and the compiler with which the kernel was built. + Selecting 'on' will also enable the mitigation + against user space to user space task attacks. + + Selecting 'off' will disable both the kernel and + the user space protections. + Specific mitigations can also be selected manually: retpoline - replace indirect branches @@ -4015,6 +4025,24 @@ Not specifying this option is equivalent to spectre_v2=auto. + spectre_v2_user= + [X86] Control mitigation of Spectre variant 2 + (indirect branch speculation) vulnerability between + user space tasks + + on - Unconditionally enable mitigations. Is + enforced by spectre_v2=on + + off - Unconditionally disable mitigations. Is + enforced by spectre_v2=off + + auto - Kernel selects the mitigation depending on + the available CPU features and vulnerability. + Default is off. + + Not specifying this option is equivalent to + spectre_v2_user=auto. + spec_store_bypass_disable= [HW] Control Speculative Store Bypass (SSB) Disable mitigation (Speculative Store Bypass vulnerability) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 20d016b4e986f..6057086fde1c8 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -3,6 +3,8 @@ #ifndef _ASM_X86_NOSPEC_BRANCH_H_ #define _ASM_X86_NOSPEC_BRANCH_H_ +#include + #include #include #include @@ -226,6 +228,12 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS_ENHANCED, }; +/* The indirect branch speculation control variants */ +enum spectre_v2_user_mitigation { + SPECTRE_V2_USER_NONE, + SPECTRE_V2_USER_STRICT, +}; + /* The Speculative Store Bypass disable variants */ enum ssb_mitigation { SPEC_STORE_BYPASS_NONE, @@ -303,6 +311,8 @@ do { \ preempt_enable(); \ } while (0) +DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); + #endif /* __ASSEMBLY__ */ /* diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e3b444dd0039f..bf5c3c4b9c194 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -53,6 +53,9 @@ static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; u64 __ro_after_init x86_amd_ls_cfg_base; u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask; +/* Control conditional STIPB in switch_to() */ +DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp); + void __init check_bugs(void) { identify_boot_cpu(); @@ -198,6 +201,9 @@ static void x86_amd_ssb_disable(void) static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = SPECTRE_V2_NONE; +static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init = + SPECTRE_V2_USER_NONE; + #ifdef RETPOLINE static bool spectre_v2_bad_module; @@ -236,6 +242,104 @@ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_RETPOLINE_AMD, }; +enum spectre_v2_user_cmd { + SPECTRE_V2_USER_CMD_NONE, + SPECTRE_V2_USER_CMD_AUTO, + SPECTRE_V2_USER_CMD_FORCE, +}; + +static const char * const spectre_v2_user_strings[] = { + [SPECTRE_V2_USER_NONE] = "User space: Vulnerable", + [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection", +}; + +static const struct { + const char *option; + enum spectre_v2_user_cmd cmd; + bool secure; +} v2_user_options[] __initdata = { + { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, + { "off", SPECTRE_V2_USER_CMD_NONE, false }, + { "on", SPECTRE_V2_USER_CMD_FORCE, true }, +}; + +static void __init spec_v2_user_print_cond(const char *reason, bool secure) +{ + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure) + pr_info("spectre_v2_user=%s forced on command line.\n", reason); +} + +static enum spectre_v2_user_cmd __init +spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) +{ + char arg[20]; + int ret, i; + + switch (v2_cmd) { + case SPECTRE_V2_CMD_NONE: + return SPECTRE_V2_USER_CMD_NONE; + case SPECTRE_V2_CMD_FORCE: + return SPECTRE_V2_USER_CMD_FORCE; + default: + break; + } + + ret = cmdline_find_option(boot_command_line, "spectre_v2_user", + arg, sizeof(arg)); + if (ret < 0) + return SPECTRE_V2_USER_CMD_AUTO; + + for (i = 0; i < ARRAY_SIZE(v2_user_options); i++) { + if (match_option(arg, ret, v2_user_options[i].option)) { + spec_v2_user_print_cond(v2_user_options[i].option, + v2_user_options[i].secure); + return v2_user_options[i].cmd; + } + } + + pr_err("Unknown user space protection option (%s). Switching to AUTO select\n", arg); + return SPECTRE_V2_USER_CMD_AUTO; +} + +static void __init +spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) +{ + enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; + bool smt_possible = IS_ENABLED(CONFIG_SMP); + + if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP)) + return; + + if (cpu_smt_control == CPU_SMT_FORCE_DISABLED || + cpu_smt_control == CPU_SMT_NOT_SUPPORTED) + smt_possible = false; + + switch (spectre_v2_parse_user_cmdline(v2_cmd)) { + case SPECTRE_V2_USER_CMD_AUTO: + case SPECTRE_V2_USER_CMD_NONE: + goto set_mode; + case SPECTRE_V2_USER_CMD_FORCE: + mode = SPECTRE_V2_USER_STRICT; + break; + } + + /* Initialize Indirect Branch Prediction Barrier */ + if (boot_cpu_has(X86_FEATURE_IBPB)) { + setup_force_cpu_cap(X86_FEATURE_USE_IBPB); + pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); + } + + /* If enhanced IBRS is enabled no STIPB required */ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return; + +set_mode: + spectre_v2_user = mode; + /* Only print the STIBP mode when SMT possible */ + if (smt_possible) + pr_info("%s\n", spectre_v2_user_strings[mode]); +} + static const char * const spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", @@ -382,12 +486,6 @@ specv2_set_mode: setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); - /* Initialize Indirect Branch Prediction Barrier if supported */ - if (boot_cpu_has(X86_FEATURE_IBPB)) { - setup_force_cpu_cap(X86_FEATURE_USE_IBPB); - pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); - } - /* * Retpoline means the kernel is safe because it has no indirect * branches. Enhanced IBRS protects firmware too, so, enable restricted @@ -404,23 +502,21 @@ specv2_set_mode: pr_info("Enabling Restricted Speculation for firmware calls\n"); } + /* Set up IBPB and STIBP depending on the general spectre V2 command */ + spectre_v2_user_select_mitigation(cmd); + /* Enable STIBP if appropriate */ arch_smt_update(); } static bool stibp_needed(void) { - if (spectre_v2_enabled == SPECTRE_V2_NONE) - return false; - /* Enhanced IBRS makes using STIBP unnecessary. */ if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return false; - if (!boot_cpu_has(X86_FEATURE_STIBP)) - return false; - - return true; + /* Check for strict user mitigation mode */ + return spectre_v2_user == SPECTRE_V2_USER_STRICT; } static void update_stibp_msr(void *info) @@ -841,10 +937,13 @@ static char *stibp_state(void) if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return ""; - if (x86_spec_ctrl_base & SPEC_CTRL_STIBP) - return ", STIBP"; - else - return ""; + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + return ", STIBP: disabled"; + case SPECTRE_V2_USER_STRICT: + return ", STIBP: forced"; + } + return ""; } static char *ibpb_state(void) -- GitLab From 1fe4e69a5ce7bf36fbd303e64cb63adb14708c0b Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:46 +0100 Subject: [PATCH 1886/2269] x86/speculation: Prepare for per task indirect branch speculation control commit 5bfbe3ad5840d941b89bcac54b821ba14f50a0ba upstream To avoid the overhead of STIBP always on, it's necessary to allow per task control of STIBP. Add a new task flag TIF_SPEC_IB and evaluate it during context switch if SMT is active and flag evaluation is enabled by the speculation control code. Add the conditional evaluation to x86_virt_spec_ctrl() as well so the guest/host switch works properly. This has no effect because TIF_SPEC_IB cannot be set yet and the static key which controls evaluation is off. Preparatory patch for adding the control code. [ tglx: Simplify the context switch logic and make the TIF evaluation depend on SMP=y and on the static key controlling the conditional update. Rename it to TIF_SPEC_IB because it controls both STIBP and IBPB ] Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.176917199@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 5 +++-- arch/x86/include/asm/spec-ctrl.h | 12 ++++++++++++ arch/x86/include/asm/thread_info.h | 5 ++++- arch/x86/kernel/cpu/bugs.c | 4 ++++ arch/x86/kernel/process.c | 20 ++++++++++++++++++-- 5 files changed, 41 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ef7eec669a1bc..62c62d3eb0ff8 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -41,9 +41,10 @@ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ -#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ +#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ +#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ -#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 8e2f8411c7a76..27b0bce3933ba 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -53,12 +53,24 @@ static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); } +static inline u64 stibp_tif_to_spec_ctrl(u64 tifn) +{ + BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT); + return (tifn & _TIF_SPEC_IB) >> (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT); +} + static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl) { BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); } +static inline unsigned long stibp_spec_ctrl_to_tif(u64 spec_ctrl) +{ + BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT); + return (spec_ctrl & SPEC_CTRL_STIBP) << (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT); +} + static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) { return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 2976312b826a8..78ebda74b225f 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -85,6 +85,7 @@ struct thread_info { #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ +#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_PATCH_PENDING 13 /* pending live patching update */ @@ -112,6 +113,7 @@ struct thread_info { #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_SPEC_IB (1 << TIF_SPEC_IB) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) @@ -148,7 +150,8 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD) + (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \ + _TIF_SSBD|_TIF_SPEC_IB) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bf5c3c4b9c194..5bba757fead18 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -147,6 +147,10 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) static_cpu_has(X86_FEATURE_AMD_SSBD)) hostval |= ssbd_tif_to_spec_ctrl(ti->flags); + /* Conditional STIBP enabled? */ + if (static_branch_unlikely(&switch_to_cond_stibp)) + hostval |= stibp_tif_to_spec_ctrl(ti->flags); + if (hostval != guestval) { msrval = setguest ? guestval : hostval; wrmsrl(MSR_IA32_SPEC_CTRL, msrval); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 65c9edd08c27b..2baa10f05052e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -407,11 +407,17 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) static __always_inline void __speculation_ctrl_update(unsigned long tifp, unsigned long tifn) { + unsigned long tif_diff = tifp ^ tifn; u64 msr = x86_spec_ctrl_base; bool updmsr = false; - /* If TIF_SSBD is different, select the proper mitigation method */ - if ((tifp ^ tifn) & _TIF_SSBD) { + /* + * If TIF_SSBD is different, select the proper mitigation + * method. Note that if SSBD mitigation is disabled or permanentely + * enabled this branch can't be taken because nothing can set + * TIF_SSBD. + */ + if (tif_diff & _TIF_SSBD) { if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { amd_set_ssb_virt_state(tifn); } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { @@ -423,6 +429,16 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, } } + /* + * Only evaluate TIF_SPEC_IB if conditional STIBP is enabled, + * otherwise avoid the MSR write. + */ + if (IS_ENABLED(CONFIG_SMP) && + static_branch_unlikely(&switch_to_cond_stibp)) { + updmsr |= !!(tif_diff & _TIF_SPEC_IB); + msr |= stibp_tif_to_spec_ctrl(tifn); + } + if (updmsr) wrmsrl(MSR_IA32_SPEC_CTRL, msr); } -- GitLab From 7fe6a4baff26dc90fa8ebcbb8f844cc8d3fb256f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:47 +0100 Subject: [PATCH 1887/2269] x86/process: Consolidate and simplify switch_to_xtra() code commit ff16701a29cba3aafa0bd1656d766813b2d0a811 upstream Move the conditional invocation of __switch_to_xtra() into an inline function so the logic can be shared between 32 and 64 bit. Remove the handthrough of the TSS pointer and retrieve the pointer directly in the bitmap handling function. Use this_cpu_ptr() instead of the per_cpu() indirection. This is a preparatory change so integration of conditional indirect branch speculation optimization happens only in one place. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.280855518@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/switch_to.h | 3 --- arch/x86/kernel/process.c | 12 +++++++----- arch/x86/kernel/process.h | 24 ++++++++++++++++++++++++ arch/x86/kernel/process_32.c | 10 +++------- arch/x86/kernel/process_64.c | 10 +++------- 5 files changed, 37 insertions(+), 22 deletions(-) create mode 100644 arch/x86/kernel/process.h diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 9b6df68d8fd1e..12ef2b49d11ba 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -11,9 +11,6 @@ struct task_struct *__switch_to_asm(struct task_struct *prev, __visible struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next); -struct tss_struct; -void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss); /* This runs runs on the previous thread's stack. */ static inline void prepare_switch_to(struct task_struct *prev, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 2baa10f05052e..dd1157b4bd154 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -41,6 +41,8 @@ #include #include +#include "process.h" + /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, * no more per-task TSS's. The TSS size is kept cacheline-aligned @@ -255,11 +257,12 @@ void arch_setup_new_exec(void) enable_cpuid(); } -static inline void switch_to_bitmap(struct tss_struct *tss, - struct thread_struct *prev, +static inline void switch_to_bitmap(struct thread_struct *prev, struct thread_struct *next, unsigned long tifp, unsigned long tifn) { + struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); + if (tifn & _TIF_IO_BITMAP) { /* * Copy the relevant range of the IO bitmap. @@ -451,8 +454,7 @@ void speculation_ctrl_update(unsigned long tif) preempt_enable(); } -void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss) +void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev, *next; unsigned long tifp, tifn; @@ -462,7 +464,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, tifn = READ_ONCE(task_thread_info(next_p)->flags); tifp = READ_ONCE(task_thread_info(prev_p)->flags); - switch_to_bitmap(tss, prev, next, tifp, tifn); + switch_to_bitmap(prev, next, tifp, tifn); propagate_user_return_notify(prev_p, next_p); diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h new file mode 100644 index 0000000000000..020fbfac3a27a --- /dev/null +++ b/arch/x86/kernel/process.h @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Code shared between 32 and 64 bit + +void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p); + +/* + * This needs to be inline to optimize for the common case where no extra + * work needs to be done. + */ +static inline void switch_to_extra(struct task_struct *prev, + struct task_struct *next) +{ + unsigned long next_tif = task_thread_info(next)->flags; + unsigned long prev_tif = task_thread_info(prev)->flags; + + /* + * __switch_to_xtra() handles debug registers, i/o bitmaps, + * speculation mitigations etc. + */ + if (unlikely(next_tif & _TIF_WORK_CTXSW_NEXT || + prev_tif & _TIF_WORK_CTXSW_PREV)) + __switch_to_xtra(prev, next); +} diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 5224c60991841..c2df91eab5733 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -59,6 +59,8 @@ #include #include +#include "process.h" + void __show_regs(struct pt_regs *regs, int all) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; @@ -234,7 +236,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct fpu *prev_fpu = &prev->fpu; struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ @@ -266,12 +267,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) set_iopl_mask(next->iopl); - /* - * Now maybe handle debug registers and/or IO bitmaps - */ - if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || - task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) - __switch_to_xtra(prev_p, next_p, tss); + switch_to_extra(prev_p, next_p); /* * Leave lazy mode, flushing any hypercalls made here. diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index cbeecfcc66d68..ec63d6be5e022 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -59,6 +59,8 @@ #include #endif +#include "process.h" + __visible DEFINE_PER_CPU(unsigned long, rsp_scratch); /* Prints also some state that isn't saved in the pt_regs */ @@ -400,7 +402,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct fpu *prev_fpu = &prev->fpu; struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && this_cpu_read(irq_count) != -1); @@ -467,12 +468,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* Reload sp0. */ update_sp0(next_p); - /* - * Now maybe reload the debug registers and handle I/O bitmaps - */ - if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT || - task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) - __switch_to_xtra(prev_p, next_p, tss); + __switch_to_xtra(prev_p, next_p); #ifdef CONFIG_XEN_PV /* -- GitLab From ba523588b1bc0b9601e7ef102b56dc246a91d026 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:48 +0100 Subject: [PATCH 1888/2269] x86/speculation: Avoid __switch_to_xtra() calls commit 5635d99953f04b550738f6f4c1c532667c3fd872 upstream The TIF_SPEC_IB bit does not need to be evaluated in the decision to invoke __switch_to_xtra() when: - CONFIG_SMP is disabled - The conditional STIPB mode is disabled The TIF_SPEC_IB bit still controls IBPB in both cases so the TIF work mask checks might invoke __switch_to_xtra() for nothing if TIF_SPEC_IB is the only set bit in the work masks. Optimize it out by masking the bit at compile time for CONFIG_SMP=n and at run time when the static key controlling the conditional STIBP mode is disabled. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.374062201@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/thread_info.h | 13 +++++++++++-- arch/x86/kernel/process.h | 15 +++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 78ebda74b225f..7b30338ca55b5 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -149,9 +149,18 @@ struct thread_info { _TIF_FSCHECK) /* flags to check in __switch_to() */ -#define _TIF_WORK_CTXSW \ +#define _TIF_WORK_CTXSW_BASE \ (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \ - _TIF_SSBD|_TIF_SPEC_IB) + _TIF_SSBD) + +/* + * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated. + */ +#ifdef CONFIG_SMP +# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE | _TIF_SPEC_IB) +#else +# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE) +#endif #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h index 020fbfac3a27a..898e97cf6629d 100644 --- a/arch/x86/kernel/process.h +++ b/arch/x86/kernel/process.h @@ -2,6 +2,8 @@ // // Code shared between 32 and 64 bit +#include + void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p); /* @@ -14,6 +16,19 @@ static inline void switch_to_extra(struct task_struct *prev, unsigned long next_tif = task_thread_info(next)->flags; unsigned long prev_tif = task_thread_info(prev)->flags; + if (IS_ENABLED(CONFIG_SMP)) { + /* + * Avoid __switch_to_xtra() invocation when conditional + * STIPB is disabled and the only different bit is + * TIF_SPEC_IB. For CONFIG_SMP=n TIF_SPEC_IB is not + * in the TIF_WORK_CTXSW masks. + */ + if (!static_branch_likely(&switch_to_cond_stibp)) { + prev_tif &= ~_TIF_SPEC_IB; + next_tif &= ~_TIF_SPEC_IB; + } + } + /* * __switch_to_xtra() handles debug registers, i/o bitmaps, * speculation mitigations etc. -- GitLab From cbca99b96f06f540669f07f768e228014e38b3b6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:49 +0100 Subject: [PATCH 1889/2269] x86/speculation: Prepare for conditional IBPB in switch_mm() commit 4c71a2b6fd7e42814aa68a6dec88abf3b42ea573 upstream The IBPB speculation barrier is issued from switch_mm() when the kernel switches to a user space task with a different mm than the user space task which ran last on the same CPU. An additional optimization is to avoid IBPB when the incoming task can be ptraced by the outgoing task. This optimization only works when switching directly between two user space tasks. When switching from a kernel task to a user space task the optimization fails because the previous task cannot be accessed anymore. So for quite some scenarios the optimization is just adding overhead. The upcoming conditional IBPB support will issue IBPB only for user space tasks which have the TIF_SPEC_IB bit set. This requires to handle the following cases: 1) Switch from a user space task (potential attacker) which has TIF_SPEC_IB set to a user space task (potential victim) which has TIF_SPEC_IB not set. 2) Switch from a user space task (potential attacker) which has TIF_SPEC_IB not set to a user space task (potential victim) which has TIF_SPEC_IB set. This needs to be optimized for the case where the IBPB can be avoided when only kernel threads ran in between user space tasks which belong to the same process. The current check whether two tasks belong to the same context is using the tasks context id. While correct, it's simpler to use the mm pointer because it allows to mangle the TIF_SPEC_IB bit into it. The context id based mechanism requires extra storage, which creates worse code. When a task is scheduled out its TIF_SPEC_IB bit is mangled as bit 0 into the per CPU storage which is used to track the last user space mm which was running on a CPU. This bit can be used together with the TIF_SPEC_IB bit of the incoming task to make the decision whether IBPB needs to be issued or not to cover the two cases above. As conditional IBPB is going to be the default, remove the dubious ptrace check for the IBPB always case and simply issue IBPB always when the process changes. Move the storage to a different place in the struct as the original one created a hole. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.466447057@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 2 + arch/x86/include/asm/tlbflush.h | 8 +- arch/x86/kernel/cpu/bugs.c | 29 +++++-- arch/x86/mm/tlb.c | 114 ++++++++++++++++++++------- 4 files changed, 118 insertions(+), 35 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 6057086fde1c8..2de273c475217 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -312,6 +312,8 @@ do { \ } while (0) DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); +DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); +DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 2501be609b821..e31040333f0cc 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -185,10 +185,14 @@ struct tlb_state { #define LOADED_MM_SWITCHING ((struct mm_struct *)1) + /* Last user mm for optimizing IBPB */ + union { + struct mm_struct *last_user_mm; + unsigned long last_user_mm_ibpb; + }; + u16 loaded_mm_asid; u16 next_asid; - /* last user mm's ctx id */ - u64 last_ctx_id; /* * We can be in one of several states: diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 5bba757fead18..f4bcdae79907e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -55,6 +55,10 @@ u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask; /* Control conditional STIPB in switch_to() */ DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp); +/* Control conditional IBPB in switch_mm() */ +DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); +/* Control unconditional IBPB in switch_mm() */ +DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); void __init check_bugs(void) { @@ -330,7 +334,17 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) /* Initialize Indirect Branch Prediction Barrier */ if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); - pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); + + switch (mode) { + case SPECTRE_V2_USER_STRICT: + static_branch_enable(&switch_mm_always_ibpb); + break; + default: + break; + } + + pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n", + mode == SPECTRE_V2_USER_STRICT ? "always-on" : "conditional"); } /* If enhanced IBRS is enabled no STIPB required */ @@ -952,10 +966,15 @@ static char *stibp_state(void) static char *ibpb_state(void) { - if (boot_cpu_has(X86_FEATURE_USE_IBPB)) - return ", IBPB"; - else - return ""; + if (boot_cpu_has(X86_FEATURE_IBPB)) { + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + return ", IBPB: disabled"; + case SPECTRE_V2_USER_STRICT: + return ", IBPB: always-on"; + } + } + return ""; } static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 9e943de9d6287..5400a24e1a8c0 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include @@ -30,6 +29,12 @@ * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi */ +/* + * Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is + * stored in cpu_tlb_state.last_user_mm_ibpb. + */ +#define LAST_USER_MM_IBPB 0x1UL + /* * We get here when we do something requiring a TLB invalidation * but could not go invalidate all of the contexts. We do the @@ -181,17 +186,87 @@ static void sync_current_stack_to_mm(struct mm_struct *mm) } } -static bool ibpb_needed(struct task_struct *tsk, u64 last_ctx_id) +static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next) +{ + unsigned long next_tif = task_thread_info(next)->flags; + unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB; + + return (unsigned long)next->mm | ibpb; +} + +static void cond_ibpb(struct task_struct *next) { + if (!next || !next->mm) + return; + /* - * Check if the current (previous) task has access to the memory - * of the @tsk (next) task. If access is denied, make sure to - * issue a IBPB to stop user->user Spectre-v2 attacks. - * - * Note: __ptrace_may_access() returns 0 or -ERRNO. + * Both, the conditional and the always IBPB mode use the mm + * pointer to avoid the IBPB when switching between tasks of the + * same process. Using the mm pointer instead of mm->context.ctx_id + * opens a hypothetical hole vs. mm_struct reuse, which is more or + * less impossible to control by an attacker. Aside of that it + * would only affect the first schedule so the theoretically + * exposed data is not really interesting. */ - return (tsk && tsk->mm && tsk->mm->context.ctx_id != last_ctx_id && - ptrace_may_access_sched(tsk, PTRACE_MODE_SPEC_IBPB)); + if (static_branch_likely(&switch_mm_cond_ibpb)) { + unsigned long prev_mm, next_mm; + + /* + * This is a bit more complex than the always mode because + * it has to handle two cases: + * + * 1) Switch from a user space task (potential attacker) + * which has TIF_SPEC_IB set to a user space task + * (potential victim) which has TIF_SPEC_IB not set. + * + * 2) Switch from a user space task (potential attacker) + * which has TIF_SPEC_IB not set to a user space task + * (potential victim) which has TIF_SPEC_IB set. + * + * This could be done by unconditionally issuing IBPB when + * a task which has TIF_SPEC_IB set is either scheduled in + * or out. Though that results in two flushes when: + * + * - the same user space task is scheduled out and later + * scheduled in again and only a kernel thread ran in + * between. + * + * - a user space task belonging to the same process is + * scheduled in after a kernel thread ran in between + * + * - a user space task belonging to the same process is + * scheduled in immediately. + * + * Optimize this with reasonably small overhead for the + * above cases. Mangle the TIF_SPEC_IB bit into the mm + * pointer of the incoming task which is stored in + * cpu_tlbstate.last_user_mm_ibpb for comparison. + */ + next_mm = mm_mangle_tif_spec_ib(next); + prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb); + + /* + * Issue IBPB only if the mm's are different and one or + * both have the IBPB bit set. + */ + if (next_mm != prev_mm && + (next_mm | prev_mm) & LAST_USER_MM_IBPB) + indirect_branch_prediction_barrier(); + + this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm); + } + + if (static_branch_unlikely(&switch_mm_always_ibpb)) { + /* + * Only flush when switching to a user space task with a + * different context than the user space task which ran + * last on this CPU. + */ + if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) { + indirect_branch_prediction_barrier(); + this_cpu_write(cpu_tlbstate.last_user_mm, next->mm); + } + } } void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, @@ -262,22 +337,13 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, } else { u16 new_asid; bool need_flush; - u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id); /* * Avoid user/user BTB poisoning by flushing the branch * predictor when switching between processes. This stops * one process from doing Spectre-v2 attacks on another. - * - * As an optimization, flush indirect branches only when - * switching into a processes that can't be ptrace by the - * current one (as in such case, attacker has much more - * convenient way how to tamper with the next process than - * branch buffer poisoning). */ - if (static_cpu_has(X86_FEATURE_USE_IBPB) && - ibpb_needed(tsk, last_ctx_id)) - indirect_branch_prediction_barrier(); + cond_ibpb(tsk); if (IS_ENABLED(CONFIG_VMAP_STACK)) { /* @@ -327,14 +393,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); } - /* - * Record last user mm's context id, so we can avoid - * flushing branch buffer with IBPB if we switch back - * to the same user. - */ - if (next != &init_mm) - this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); - /* Make sure we write CR3 before loaded_mm. */ barrier(); @@ -415,7 +473,7 @@ void initialize_tlbstate_and_flush(void) write_cr3(build_cr3(mm->pgd, 0)); /* Reinitialize tlbstate. */ - this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id); + this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB); this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); this_cpu_write(cpu_tlbstate.next_asid, 1); this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id); -- GitLab From dae4d59096700703266190f8dff92f9edb82b1b5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:50 +0100 Subject: [PATCH 1890/2269] ptrace: Remove unused ptrace_may_access_sched() and MODE_IBRS commit 46f7ecb1e7359f183f5bbd1e08b90e10e52164f9 upstream The IBPB control code in x86 removed the usage. Remove the functionality which was introduced for this. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.559149393@linutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/ptrace.h | 17 ----------------- kernel/ptrace.c | 10 ---------- 2 files changed, 27 deletions(-) diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index b1d43b80abfa8..38342e88b3f33 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -64,15 +64,12 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); #define PTRACE_MODE_NOAUDIT 0x04 #define PTRACE_MODE_FSCREDS 0x08 #define PTRACE_MODE_REALCREDS 0x10 -#define PTRACE_MODE_SCHED 0x20 -#define PTRACE_MODE_IBPB 0x40 /* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */ #define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS) #define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS) #define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS) #define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS) -#define PTRACE_MODE_SPEC_IBPB (PTRACE_MODE_ATTACH_REALCREDS | PTRACE_MODE_IBPB) /** * ptrace_may_access - check whether the caller is permitted to access @@ -90,20 +87,6 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); */ extern bool ptrace_may_access(struct task_struct *task, unsigned int mode); -/** - * ptrace_may_access - check whether the caller is permitted to access - * a target task. - * @task: target task - * @mode: selects type of access and caller credentials - * - * Returns true on success, false on denial. - * - * Similar to ptrace_may_access(). Only to be called from context switch - * code. Does not call into audit and the regular LSM hooks due to locking - * constraints. - */ -extern bool ptrace_may_access_sched(struct task_struct *task, unsigned int mode); - static inline int ptrace_reparented(struct task_struct *child) { return !same_thread_group(child->real_parent, child->parent); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index d7be2159ac09a..84b1367935e41 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -261,9 +261,6 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) { - if (mode & PTRACE_MODE_SCHED) - return false; - if (mode & PTRACE_MODE_NOAUDIT) return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE); else @@ -331,16 +328,9 @@ ok: !ptrace_has_cap(mm->user_ns, mode))) return -EPERM; - if (mode & PTRACE_MODE_SCHED) - return 0; return security_ptrace_access_check(task, mode); } -bool ptrace_may_access_sched(struct task_struct *task, unsigned int mode) -{ - return __ptrace_may_access(task, mode | PTRACE_MODE_SCHED); -} - bool ptrace_may_access(struct task_struct *task, unsigned int mode) { int err; -- GitLab From dcb4ac34f9373b1f4365b24ddfac2bcdefd59f6e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:51 +0100 Subject: [PATCH 1891/2269] x86/speculation: Split out TIF update commit e6da8bb6f9abb2628381904b24163c770e630bac upstream The update of the TIF_SSBD flag and the conditional speculation control MSR update is done in the ssb_prctl_set() function directly. The upcoming prctl support for controlling indirect branch speculation via STIBP needs the same mechanism. Split the code out and make it reusable. Reword the comment about updates for other tasks. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.652305076@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index f4bcdae79907e..68da43cacead7 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -699,10 +699,29 @@ static void ssb_select_mitigation(void) #undef pr_fmt #define pr_fmt(fmt) "Speculation prctl: " fmt -static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) +static void task_update_spec_tif(struct task_struct *tsk, int tifbit, bool on) { bool update; + if (on) + update = !test_and_set_tsk_thread_flag(tsk, tifbit); + else + update = test_and_clear_tsk_thread_flag(tsk, tifbit); + + /* + * Immediately update the speculation control MSRs for the current + * task, but for a non-current task delay setting the CPU + * mitigation until it is scheduled next. + * + * This can only happen for SECCOMP mitigation. For PRCTL it's + * always the current task. + */ + if (tsk == current && update) + speculation_ctrl_update_current(); +} + +static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) +{ if (ssb_mode != SPEC_STORE_BYPASS_PRCTL && ssb_mode != SPEC_STORE_BYPASS_SECCOMP) return -ENXIO; @@ -713,28 +732,20 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) if (task_spec_ssb_force_disable(task)) return -EPERM; task_clear_spec_ssb_disable(task); - update = test_and_clear_tsk_thread_flag(task, TIF_SSBD); + task_update_spec_tif(task, TIF_SSBD, false); break; case PR_SPEC_DISABLE: task_set_spec_ssb_disable(task); - update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); + task_update_spec_tif(task, TIF_SSBD, true); break; case PR_SPEC_FORCE_DISABLE: task_set_spec_ssb_disable(task); task_set_spec_ssb_force_disable(task); - update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); + task_update_spec_tif(task, TIF_SSBD, true); break; default: return -ERANGE; } - - /* - * If being set on non-current task, delay setting the CPU - * mitigation until it is next scheduled. - */ - if (task == current && update) - speculation_ctrl_update_current(); - return 0; } -- GitLab From e3f822b628a0d715a46d233e81881f6c2885247b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 28 Nov 2018 10:56:57 +0100 Subject: [PATCH 1892/2269] x86/speculation: Prevent stale SPEC_CTRL msr content commit 6d991ba509ebcfcc908e009d1db51972a4f7a064 upstream The seccomp speculation control operates on all tasks of a process, but only the current task of a process can update the MSR immediately. For the other threads the update is deferred to the next context switch. This creates the following situation with Process A and B: Process A task 2 and Process B task 1 are pinned on CPU1. Process A task 2 does not have the speculation control TIF bit set. Process B task 1 has the speculation control TIF bit set. CPU0 CPU1 MSR bit is set ProcB.T1 schedules out ProcA.T2 schedules in MSR bit is cleared ProcA.T1 seccomp_update() set TIF bit on ProcA.T2 ProcB.T1 schedules in MSR is not updated <-- FAIL This happens because the context switch code tries to avoid the MSR update if the speculation control TIF bits of the incoming and the outgoing task are the same. In the worst case ProcB.T1 and ProcA.T2 are the only tasks scheduling back and forth on CPU1, which keeps the MSR stale forever. In theory this could be remedied by IPIs, but chasing the remote task which could be migrated is complex and full of races. The straight forward solution is to avoid the asychronous update of the TIF bit and defer it to the next context switch. The speculation control state is stored in task_struct::atomic_flags by the prctl and seccomp updates already. Add a new TIF_SPEC_FORCE_UPDATE bit and set this after updating the atomic_flags. Check the bit on context switch and force a synchronous update of the speculation control if set. Use the same mechanism for updating the current task. Reported-by: Tim Chen Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1811272247140.1875@nanos.tec.linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/spec-ctrl.h | 6 +----- arch/x86/include/asm/thread_info.h | 4 +++- arch/x86/kernel/cpu/bugs.c | 18 +++++++----------- arch/x86/kernel/process.c | 30 +++++++++++++++++++++++++++++- 4 files changed, 40 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 27b0bce3933ba..5393babc05989 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -83,10 +83,6 @@ static inline void speculative_store_bypass_ht_init(void) { } #endif extern void speculation_ctrl_update(unsigned long tif); - -static inline void speculation_ctrl_update_current(void) -{ - speculation_ctrl_update(current_thread_info()->flags); -} +extern void speculation_ctrl_update_current(void); #endif diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 7b30338ca55b5..bf9175d878442 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -86,6 +86,7 @@ struct thread_info { #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ #define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */ +#define TIF_SPEC_FORCE_UPDATE 10 /* Force speculation MSR update in context switch */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_PATCH_PENDING 13 /* pending live patching update */ @@ -114,6 +115,7 @@ struct thread_info { #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_SPEC_IB (1 << TIF_SPEC_IB) +#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) @@ -151,7 +153,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW_BASE \ (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \ - _TIF_SSBD) + _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE) /* * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 68da43cacead7..9f9b03094628f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -699,14 +699,10 @@ static void ssb_select_mitigation(void) #undef pr_fmt #define pr_fmt(fmt) "Speculation prctl: " fmt -static void task_update_spec_tif(struct task_struct *tsk, int tifbit, bool on) +static void task_update_spec_tif(struct task_struct *tsk) { - bool update; - - if (on) - update = !test_and_set_tsk_thread_flag(tsk, tifbit); - else - update = test_and_clear_tsk_thread_flag(tsk, tifbit); + /* Force the update of the real TIF bits */ + set_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE); /* * Immediately update the speculation control MSRs for the current @@ -716,7 +712,7 @@ static void task_update_spec_tif(struct task_struct *tsk, int tifbit, bool on) * This can only happen for SECCOMP mitigation. For PRCTL it's * always the current task. */ - if (tsk == current && update) + if (tsk == current) speculation_ctrl_update_current(); } @@ -732,16 +728,16 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) if (task_spec_ssb_force_disable(task)) return -EPERM; task_clear_spec_ssb_disable(task); - task_update_spec_tif(task, TIF_SSBD, false); + task_update_spec_tif(task); break; case PR_SPEC_DISABLE: task_set_spec_ssb_disable(task); - task_update_spec_tif(task, TIF_SSBD, true); + task_update_spec_tif(task); break; case PR_SPEC_FORCE_DISABLE: task_set_spec_ssb_disable(task); task_set_spec_ssb_force_disable(task); - task_update_spec_tif(task, TIF_SSBD, true); + task_update_spec_tif(task); break; default: return -ERANGE; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index dd1157b4bd154..0b7be5f31fb40 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -446,6 +446,18 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, wrmsrl(MSR_IA32_SPEC_CTRL, msr); } +static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) +{ + if (test_and_clear_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE)) { + if (task_spec_ssb_disable(tsk)) + set_tsk_thread_flag(tsk, TIF_SSBD); + else + clear_tsk_thread_flag(tsk, TIF_SSBD); + } + /* Return the updated threadinfo flags*/ + return task_thread_info(tsk)->flags; +} + void speculation_ctrl_update(unsigned long tif) { /* Forced update. Make sure all relevant TIF flags are different */ @@ -454,6 +466,14 @@ void speculation_ctrl_update(unsigned long tif) preempt_enable(); } +/* Called from seccomp/prctl update */ +void speculation_ctrl_update_current(void) +{ + preempt_disable(); + speculation_ctrl_update(speculation_ctrl_update_tif(current)); + preempt_enable(); +} + void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev, *next; @@ -485,7 +505,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) if ((tifp ^ tifn) & _TIF_NOCPUID) set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); - __speculation_ctrl_update(tifp, tifn); + if (likely(!((tifp | tifn) & _TIF_SPEC_FORCE_UPDATE))) { + __speculation_ctrl_update(tifp, tifn); + } else { + speculation_ctrl_update_tif(prev_p); + tifn = speculation_ctrl_update_tif(next_p); + + /* Enforce MSR update to ensure consistent state */ + __speculation_ctrl_update(~tifn, tifn); + } } /* -- GitLab From 99f1cb80daab80107c4b7f7f9a9ddf041a8b2137 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:52 +0100 Subject: [PATCH 1893/2269] x86/speculation: Prepare arch_smt_update() for PRCTL mode commit 6893a959d7fdebbab5f5aa112c277d5a44435ba1 upstream The upcoming fine grained per task STIBP control needs to be updated on CPU hotplug as well. Split out the code which controls the strict mode so the prctl control code can be added later. Mark the SMP function call argument __unused while at it. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.759457117@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 46 +++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 9f9b03094628f..2e3da643d3fd8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -527,40 +527,44 @@ specv2_set_mode: arch_smt_update(); } -static bool stibp_needed(void) +static void update_stibp_msr(void * __unused) { - /* Enhanced IBRS makes using STIBP unnecessary. */ - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) - return false; - - /* Check for strict user mitigation mode */ - return spectre_v2_user == SPECTRE_V2_USER_STRICT; + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); } -static void update_stibp_msr(void *info) +/* Update x86_spec_ctrl_base in case SMT state changed. */ +static void update_stibp_strict(void) { - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + u64 mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; + + if (sched_smt_active()) + mask |= SPEC_CTRL_STIBP; + + if (mask == x86_spec_ctrl_base) + return; + + pr_info("Update user space SMT mitigation: STIBP %s\n", + mask & SPEC_CTRL_STIBP ? "always-on" : "off"); + x86_spec_ctrl_base = mask; + on_each_cpu(update_stibp_msr, NULL, 1); } void arch_smt_update(void) { - u64 mask; - - if (!stibp_needed()) + /* Enhanced IBRS implies STIBP. No update required. */ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return; mutex_lock(&spec_ctrl_mutex); - mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; - if (sched_smt_active()) - mask |= SPEC_CTRL_STIBP; - - if (mask != x86_spec_ctrl_base) { - pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", - mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling"); - x86_spec_ctrl_base = mask; - on_each_cpu(update_stibp_msr, NULL, 1); + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + break; + case SPECTRE_V2_USER_STRICT: + update_stibp_strict(); + break; } + mutex_unlock(&spec_ctrl_mutex); } -- GitLab From 6a847a6057721011b9534e709e5286c51ea31b9f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:53 +0100 Subject: [PATCH 1894/2269] x86/speculation: Add prctl() control for indirect branch speculation commit 9137bb27e60e554dab694eafa4cca241fa3a694f upstream Add the PR_SPEC_INDIRECT_BRANCH option for the PR_GET_SPECULATION_CTRL and PR_SET_SPECULATION_CTRL prctls to allow fine grained per task control of indirect branch speculation via STIBP and IBPB. Invocations: Check indirect branch speculation status with - prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0); Enable indirect branch speculation with - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0); Disable indirect branch speculation with - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0); Force disable indirect branch speculation with - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0); See Documentation/userspace-api/spec_ctrl.rst. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.866780996@linutronix.de Signed-off-by: Greg Kroah-Hartman --- Documentation/userspace-api/spec_ctrl.rst | 9 +++ arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 67 +++++++++++++++++++++++ arch/x86/kernel/process.c | 5 ++ include/linux/sched.h | 9 +++ include/uapi/linux/prctl.h | 1 + 6 files changed, 92 insertions(+) diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst index 32f3d55c54b75..c4dbe6f7cdae8 100644 --- a/Documentation/userspace-api/spec_ctrl.rst +++ b/Documentation/userspace-api/spec_ctrl.rst @@ -92,3 +92,12 @@ Speculation misfeature controls * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0); * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0); * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0); + +- PR_SPEC_INDIR_BRANCH: Indirect Branch Speculation in User Processes + (Mitigate Spectre V2 style attacks against user processes) + + Invocations: + * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0); diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 2de273c475217..35ef92c5cc663 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -232,6 +232,7 @@ enum spectre_v2_mitigation { enum spectre_v2_user_mitigation { SPECTRE_V2_USER_NONE, SPECTRE_V2_USER_STRICT, + SPECTRE_V2_USER_PRCTL, }; /* The Speculative Store Bypass disable variants */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 2e3da643d3fd8..bcd39b928fea6 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -563,6 +563,8 @@ void arch_smt_update(void) case SPECTRE_V2_USER_STRICT: update_stibp_strict(); break; + case SPECTRE_V2_USER_PRCTL: + break; } mutex_unlock(&spec_ctrl_mutex); @@ -749,12 +751,50 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) return 0; } +static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) +{ + switch (ctrl) { + case PR_SPEC_ENABLE: + if (spectre_v2_user == SPECTRE_V2_USER_NONE) + return 0; + /* + * Indirect branch speculation is always disabled in strict + * mode. + */ + if (spectre_v2_user == SPECTRE_V2_USER_STRICT) + return -EPERM; + task_clear_spec_ib_disable(task); + task_update_spec_tif(task); + break; + case PR_SPEC_DISABLE: + case PR_SPEC_FORCE_DISABLE: + /* + * Indirect branch speculation is always allowed when + * mitigation is force disabled. + */ + if (spectre_v2_user == SPECTRE_V2_USER_NONE) + return -EPERM; + if (spectre_v2_user == SPECTRE_V2_USER_STRICT) + return 0; + task_set_spec_ib_disable(task); + if (ctrl == PR_SPEC_FORCE_DISABLE) + task_set_spec_ib_force_disable(task); + task_update_spec_tif(task); + break; + default: + return -ERANGE; + } + return 0; +} + int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, unsigned long ctrl) { switch (which) { case PR_SPEC_STORE_BYPASS: return ssb_prctl_set(task, ctrl); + case PR_SPEC_INDIRECT_BRANCH: + return ib_prctl_set(task, ctrl); default: return -ENODEV; } @@ -787,11 +827,34 @@ static int ssb_prctl_get(struct task_struct *task) } } +static int ib_prctl_get(struct task_struct *task) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + return PR_SPEC_NOT_AFFECTED; + + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + return PR_SPEC_ENABLE; + case SPECTRE_V2_USER_PRCTL: + if (task_spec_ib_force_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; + if (task_spec_ib_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; + case SPECTRE_V2_USER_STRICT: + return PR_SPEC_DISABLE; + default: + return PR_SPEC_NOT_AFFECTED; + } +} + int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) { switch (which) { case PR_SPEC_STORE_BYPASS: return ssb_prctl_get(task); + case PR_SPEC_INDIRECT_BRANCH: + return ib_prctl_get(task); default: return -ENODEV; } @@ -971,6 +1034,8 @@ static char *stibp_state(void) return ", STIBP: disabled"; case SPECTRE_V2_USER_STRICT: return ", STIBP: forced"; + case SPECTRE_V2_USER_PRCTL: + return ""; } return ""; } @@ -983,6 +1048,8 @@ static char *ibpb_state(void) return ", IBPB: disabled"; case SPECTRE_V2_USER_STRICT: return ", IBPB: always-on"; + case SPECTRE_V2_USER_PRCTL: + return ""; } } return ""; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0b7be5f31fb40..a98d1cdd6299a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -453,6 +453,11 @@ static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) set_tsk_thread_flag(tsk, TIF_SSBD); else clear_tsk_thread_flag(tsk, TIF_SSBD); + + if (task_spec_ib_disable(tsk)) + set_tsk_thread_flag(tsk, TIF_SPEC_IB); + else + clear_tsk_thread_flag(tsk, TIF_SPEC_IB); } /* Return the updated threadinfo flags*/ return task_thread_info(tsk)->flags; diff --git a/include/linux/sched.h b/include/linux/sched.h index e04919aa82019..866439c361a95 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1405,6 +1405,8 @@ static inline bool is_percpu_thread(void) #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ #define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */ #define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/ +#define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */ +#define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */ #define TASK_PFA_TEST(name, func) \ static inline bool task_##func(struct task_struct *p) \ @@ -1436,6 +1438,13 @@ TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable) TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) +TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable) +TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable) +TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable) + +TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable) +TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable) + static inline void current_restore_flags(unsigned long orig_flags, unsigned long flags) { diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 3027f943f4b33..214102fab940f 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -203,6 +203,7 @@ struct prctl_mm_map { #define PR_SET_SPECULATION_CTRL 53 /* Speculation control variants */ # define PR_SPEC_STORE_BYPASS 0 +# define PR_SPEC_INDIRECT_BRANCH 1 /* Return and control values for PR_SET/GET_SPECULATION_CTRL */ # define PR_SPEC_NOT_AFFECTED 0 # define PR_SPEC_PRCTL (1UL << 0) -- GitLab From 605b2828ff55471d8b2ab0b405991895a2216e06 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:54 +0100 Subject: [PATCH 1895/2269] x86/speculation: Enable prctl mode for spectre_v2_user commit 7cc765a67d8e04ef7d772425ca5a2a1e2b894c15 upstream Now that all prerequisites are in place: - Add the prctl command line option - Default the 'auto' mode to 'prctl' - When SMT state changes, update the static key which controls the conditional STIBP evaluation on context switch. - At init update the static key which controls the conditional IBPB evaluation on context switch. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.958421388@linutronix.de Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 7 +++- arch/x86/kernel/cpu/bugs.c | 41 +++++++++++++++---- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f0c19f61db807..dd9783e43443a 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4036,9 +4036,14 @@ off - Unconditionally disable mitigations. Is enforced by spectre_v2=off + prctl - Indirect branch speculation is enabled, + but mitigation can be enabled via prctl + per thread. The mitigation control state + is inherited on fork. + auto - Kernel selects the mitigation depending on the available CPU features and vulnerability. - Default is off. + Default is prctl. Not specifying this option is equivalent to spectre_v2_user=auto. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bcd39b928fea6..d87a877b1b383 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -254,11 +254,13 @@ enum spectre_v2_user_cmd { SPECTRE_V2_USER_CMD_NONE, SPECTRE_V2_USER_CMD_AUTO, SPECTRE_V2_USER_CMD_FORCE, + SPECTRE_V2_USER_CMD_PRCTL, }; static const char * const spectre_v2_user_strings[] = { [SPECTRE_V2_USER_NONE] = "User space: Vulnerable", [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection", + [SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl", }; static const struct { @@ -269,6 +271,7 @@ static const struct { { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, { "off", SPECTRE_V2_USER_CMD_NONE, false }, { "on", SPECTRE_V2_USER_CMD_FORCE, true }, + { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false }, }; static void __init spec_v2_user_print_cond(const char *reason, bool secure) @@ -323,12 +326,15 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) smt_possible = false; switch (spectre_v2_parse_user_cmdline(v2_cmd)) { - case SPECTRE_V2_USER_CMD_AUTO: case SPECTRE_V2_USER_CMD_NONE: goto set_mode; case SPECTRE_V2_USER_CMD_FORCE: mode = SPECTRE_V2_USER_STRICT; break; + case SPECTRE_V2_USER_CMD_AUTO: + case SPECTRE_V2_USER_CMD_PRCTL: + mode = SPECTRE_V2_USER_PRCTL; + break; } /* Initialize Indirect Branch Prediction Barrier */ @@ -339,6 +345,9 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) case SPECTRE_V2_USER_STRICT: static_branch_enable(&switch_mm_always_ibpb); break; + case SPECTRE_V2_USER_PRCTL: + static_branch_enable(&switch_mm_cond_ibpb); + break; default: break; } @@ -351,6 +360,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return; + /* + * If SMT is not possible or STIBP is not available clear the STIPB + * mode. + */ + if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP)) + mode = SPECTRE_V2_USER_NONE; set_mode: spectre_v2_user = mode; /* Only print the STIBP mode when SMT possible */ @@ -549,6 +564,15 @@ static void update_stibp_strict(void) on_each_cpu(update_stibp_msr, NULL, 1); } +/* Update the static key controlling the evaluation of TIF_SPEC_IB */ +static void update_indir_branch_cond(void) +{ + if (sched_smt_active()) + static_branch_enable(&switch_to_cond_stibp); + else + static_branch_disable(&switch_to_cond_stibp); +} + void arch_smt_update(void) { /* Enhanced IBRS implies STIBP. No update required. */ @@ -564,6 +588,7 @@ void arch_smt_update(void) update_stibp_strict(); break; case SPECTRE_V2_USER_PRCTL: + update_indir_branch_cond(); break; } @@ -1035,7 +1060,8 @@ static char *stibp_state(void) case SPECTRE_V2_USER_STRICT: return ", STIBP: forced"; case SPECTRE_V2_USER_PRCTL: - return ""; + if (static_key_enabled(&switch_to_cond_stibp)) + return ", STIBP: conditional"; } return ""; } @@ -1043,14 +1069,11 @@ static char *stibp_state(void) static char *ibpb_state(void) { if (boot_cpu_has(X86_FEATURE_IBPB)) { - switch (spectre_v2_user) { - case SPECTRE_V2_USER_NONE: - return ", IBPB: disabled"; - case SPECTRE_V2_USER_STRICT: + if (static_key_enabled(&switch_mm_always_ibpb)) return ", IBPB: always-on"; - case SPECTRE_V2_USER_PRCTL: - return ""; - } + if (static_key_enabled(&switch_mm_cond_ibpb)) + return ", IBPB: conditional"; + return ", IBPB: disabled"; } return ""; } -- GitLab From ca97dd0009e688a67f6e22ce3fce0b4523741243 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:55 +0100 Subject: [PATCH 1896/2269] x86/speculation: Add seccomp Spectre v2 user space protection mode commit 6b3e64c237c072797a9ec918654a60e3a46488e2 upstream If 'prctl' mode of user space protection from spectre v2 is selected on the kernel command-line, STIBP and IBPB are applied on tasks which restrict their indirect branch speculation via prctl. SECCOMP enables the SSBD mitigation for sandboxed tasks already, so it makes sense to prevent spectre v2 user space to user space attacks as well. The Intel mitigation guide documents how STIPB works: Setting bit 1 (STIBP) of the IA32_SPEC_CTRL MSR on a logical processor prevents the predicted targets of indirect branches on any logical processor of that core from being controlled by software that executes (or executed previously) on another logical processor of the same core. Ergo setting STIBP protects the task itself from being attacked from a task running on a different hyper-thread and protects the tasks running on different hyper-threads from being attacked. While the document suggests that the branch predictors are shielded between the logical processors, the observed performance regressions suggest that STIBP simply disables the branch predictor more or less completely. Of course the document wording is vague, but the fact that there is also no requirement for issuing IBPB when STIBP is used points clearly in that direction. The kernel still issues IBPB even when STIBP is used until Intel clarifies the whole mechanism. IBPB is issued when the task switches out, so malicious sandbox code cannot mistrain the branch predictor for the next user space task on the same logical processor. Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185006.051663132@linutronix.de Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 9 ++++++++- arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 17 ++++++++++++++++- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index dd9783e43443a..10a28f7b1ec63 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4041,9 +4041,16 @@ per thread. The mitigation control state is inherited on fork. + seccomp + - Same as "prctl" above, but all seccomp + threads will enable the mitigation unless + they explicitly opt out. + auto - Kernel selects the mitigation depending on the available CPU features and vulnerability. - Default is prctl. + + Default mitigation: + If CONFIG_SECCOMP=y then "seccomp", otherwise "prctl" Not specifying this option is equivalent to spectre_v2_user=auto. diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 35ef92c5cc663..a633767419f29 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -233,6 +233,7 @@ enum spectre_v2_user_mitigation { SPECTRE_V2_USER_NONE, SPECTRE_V2_USER_STRICT, SPECTRE_V2_USER_PRCTL, + SPECTRE_V2_USER_SECCOMP, }; /* The Speculative Store Bypass disable variants */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d87a877b1b383..5a03082ee189c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -255,12 +255,14 @@ enum spectre_v2_user_cmd { SPECTRE_V2_USER_CMD_AUTO, SPECTRE_V2_USER_CMD_FORCE, SPECTRE_V2_USER_CMD_PRCTL, + SPECTRE_V2_USER_CMD_SECCOMP, }; static const char * const spectre_v2_user_strings[] = { [SPECTRE_V2_USER_NONE] = "User space: Vulnerable", [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection", [SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl", + [SPECTRE_V2_USER_SECCOMP] = "User space: Mitigation: STIBP via seccomp and prctl", }; static const struct { @@ -272,6 +274,7 @@ static const struct { { "off", SPECTRE_V2_USER_CMD_NONE, false }, { "on", SPECTRE_V2_USER_CMD_FORCE, true }, { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false }, + { "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false }, }; static void __init spec_v2_user_print_cond(const char *reason, bool secure) @@ -331,10 +334,16 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) case SPECTRE_V2_USER_CMD_FORCE: mode = SPECTRE_V2_USER_STRICT; break; - case SPECTRE_V2_USER_CMD_AUTO: case SPECTRE_V2_USER_CMD_PRCTL: mode = SPECTRE_V2_USER_PRCTL; break; + case SPECTRE_V2_USER_CMD_AUTO: + case SPECTRE_V2_USER_CMD_SECCOMP: + if (IS_ENABLED(CONFIG_SECCOMP)) + mode = SPECTRE_V2_USER_SECCOMP; + else + mode = SPECTRE_V2_USER_PRCTL; + break; } /* Initialize Indirect Branch Prediction Barrier */ @@ -346,6 +355,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) static_branch_enable(&switch_mm_always_ibpb); break; case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: static_branch_enable(&switch_mm_cond_ibpb); break; default: @@ -588,6 +598,7 @@ void arch_smt_update(void) update_stibp_strict(); break; case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: update_indir_branch_cond(); break; } @@ -830,6 +841,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task) { if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP) ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); + if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP) + ib_prctl_set(task, PR_SPEC_FORCE_DISABLE); } #endif @@ -861,6 +874,7 @@ static int ib_prctl_get(struct task_struct *task) case SPECTRE_V2_USER_NONE: return PR_SPEC_ENABLE; case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: if (task_spec_ib_force_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; if (task_spec_ib_disable(task)) @@ -1060,6 +1074,7 @@ static char *stibp_state(void) case SPECTRE_V2_USER_STRICT: return ", STIBP: forced"; case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: if (static_key_enabled(&switch_to_cond_stibp)) return ", STIBP: conditional"; } -- GitLab From 78085d7e3816606d6aa211245ab2cde3463d7795 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:56 +0100 Subject: [PATCH 1897/2269] x86/speculation: Provide IBPB always command line options commit 55a974021ec952ee460dc31ca08722158639de72 upstream Provide the possibility to enable IBPB always in combination with 'prctl' and 'seccomp'. Add the extra command line options and rework the IBPB selection to evaluate the command instead of the mode selected by the STIPB switch case. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185006.144047038@linutronix.de Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 12 +++++++ arch/x86/kernel/cpu/bugs.c | 34 +++++++++++++------ 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 10a28f7b1ec63..5f3d581426004 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4041,11 +4041,23 @@ per thread. The mitigation control state is inherited on fork. + prctl,ibpb + - Like "prctl" above, but only STIBP is + controlled per thread. IBPB is issued + always when switching between different user + space processes. + seccomp - Same as "prctl" above, but all seccomp threads will enable the mitigation unless they explicitly opt out. + seccomp,ibpb + - Like "seccomp" above, but only STIBP is + controlled per thread. IBPB is issued + always when switching between different + user space processes. + auto - Kernel selects the mitigation depending on the available CPU features and vulnerability. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 5a03082ee189c..f7a6d6203e13b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -255,7 +255,9 @@ enum spectre_v2_user_cmd { SPECTRE_V2_USER_CMD_AUTO, SPECTRE_V2_USER_CMD_FORCE, SPECTRE_V2_USER_CMD_PRCTL, + SPECTRE_V2_USER_CMD_PRCTL_IBPB, SPECTRE_V2_USER_CMD_SECCOMP, + SPECTRE_V2_USER_CMD_SECCOMP_IBPB, }; static const char * const spectre_v2_user_strings[] = { @@ -270,11 +272,13 @@ static const struct { enum spectre_v2_user_cmd cmd; bool secure; } v2_user_options[] __initdata = { - { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, - { "off", SPECTRE_V2_USER_CMD_NONE, false }, - { "on", SPECTRE_V2_USER_CMD_FORCE, true }, - { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false }, - { "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false }, + { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, + { "off", SPECTRE_V2_USER_CMD_NONE, false }, + { "on", SPECTRE_V2_USER_CMD_FORCE, true }, + { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false }, + { "prctl,ibpb", SPECTRE_V2_USER_CMD_PRCTL_IBPB, false }, + { "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false }, + { "seccomp,ibpb", SPECTRE_V2_USER_CMD_SECCOMP_IBPB, false }, }; static void __init spec_v2_user_print_cond(const char *reason, bool secure) @@ -320,6 +324,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) { enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; bool smt_possible = IS_ENABLED(CONFIG_SMP); + enum spectre_v2_user_cmd cmd; if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP)) return; @@ -328,17 +333,20 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) cpu_smt_control == CPU_SMT_NOT_SUPPORTED) smt_possible = false; - switch (spectre_v2_parse_user_cmdline(v2_cmd)) { + cmd = spectre_v2_parse_user_cmdline(v2_cmd); + switch (cmd) { case SPECTRE_V2_USER_CMD_NONE: goto set_mode; case SPECTRE_V2_USER_CMD_FORCE: mode = SPECTRE_V2_USER_STRICT; break; case SPECTRE_V2_USER_CMD_PRCTL: + case SPECTRE_V2_USER_CMD_PRCTL_IBPB: mode = SPECTRE_V2_USER_PRCTL; break; case SPECTRE_V2_USER_CMD_AUTO: case SPECTRE_V2_USER_CMD_SECCOMP: + case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: if (IS_ENABLED(CONFIG_SECCOMP)) mode = SPECTRE_V2_USER_SECCOMP; else @@ -350,12 +358,15 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); - switch (mode) { - case SPECTRE_V2_USER_STRICT: + switch (cmd) { + case SPECTRE_V2_USER_CMD_FORCE: + case SPECTRE_V2_USER_CMD_PRCTL_IBPB: + case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: static_branch_enable(&switch_mm_always_ibpb); break; - case SPECTRE_V2_USER_PRCTL: - case SPECTRE_V2_USER_SECCOMP: + case SPECTRE_V2_USER_CMD_PRCTL: + case SPECTRE_V2_USER_CMD_AUTO: + case SPECTRE_V2_USER_CMD_SECCOMP: static_branch_enable(&switch_mm_cond_ibpb); break; default: @@ -363,7 +374,8 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) } pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n", - mode == SPECTRE_V2_USER_STRICT ? "always-on" : "conditional"); + static_key_enabled(&switch_mm_always_ibpb) ? + "always-on" : "conditional"); } /* If enhanced IBRS is enabled no STIPB required */ -- GitLab From a06361526cc71cb0ebef6ab7aa698ec63465b562 Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Wed, 31 Oct 2018 14:53:57 -0700 Subject: [PATCH 1898/2269] kvm: mmu: Fix race in emulated page table writes commit 0e0fee5c539b61fdd098332e0e2cc375d9073706 upstream. When a guest page table is updated via an emulated write, kvm_mmu_pte_write() is called to update the shadow PTE using the just written guest PTE value. But if two emulated guest PTE writes happened concurrently, it is possible that the guest PTE and the shadow PTE end up being out of sync. Emulated writes do not mark the shadow page as unsync-ed, so this inconsistency will not be resolved even by a guest TLB flush (unless the page was marked as unsync-ed at some other point). This is fixed by re-reading the current value of the guest PTE after the MMU lock has been acquired instead of just using the value that was written prior to calling kvm_mmu_pte_write(). Signed-off-by: Junaid Shahid Reviewed-by: Wanpeng Li Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/mmu.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d755e0d44ac1c..364d9895dd561 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4734,9 +4734,9 @@ static bool need_remote_flush(u64 old, u64 new) } static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, - const u8 *new, int *bytes) + int *bytes) { - u64 gentry; + u64 gentry = 0; int r; /* @@ -4748,22 +4748,12 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ *gpa &= ~(gpa_t)7; *bytes = 8; - r = kvm_vcpu_read_guest(vcpu, *gpa, &gentry, 8); - if (r) - gentry = 0; - new = (const u8 *)&gentry; } - switch (*bytes) { - case 4: - gentry = *(const u32 *)new; - break; - case 8: - gentry = *(const u64 *)new; - break; - default: - gentry = 0; - break; + if (*bytes == 4 || *bytes == 8) { + r = kvm_vcpu_read_guest_atomic(vcpu, *gpa, &gentry, *bytes); + if (r) + gentry = 0; } return gentry; @@ -4876,8 +4866,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); - gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes); - /* * No need to care whether allocation memory is successful * or not since pte prefetch is skiped if it does not have @@ -4886,6 +4874,9 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, mmu_topup_memory_caches(vcpu); spin_lock(&vcpu->kvm->mmu_lock); + + gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes); + ++vcpu->kvm->stat.mmu_pte_write; kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); -- GitLab From 57e972ecad4fdee027a690b64c7d619dae489015 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Tue, 22 May 2018 09:54:20 -0700 Subject: [PATCH 1899/2269] kvm: svm: Ensure an IBPB on all affected CPUs when freeing a vmcb commit fd65d3142f734bc4376053c8d75670041903134d upstream. Previously, we only called indirect_branch_prediction_barrier on the logical CPU that freed a vmcb. This function should be called on all logical CPUs that last loaded the vmcb in question. Fixes: 15d45071523d ("KVM/x86: Add IBPB support") Reported-by: Neel Natu Signed-off-by: Jim Mattson Reviewed-by: Konrad Rzeszutek Wilk Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e9dbd62bb46e1..17f08db345479 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1733,21 +1733,31 @@ out: return ERR_PTR(err); } +static void svm_clear_current_vmcb(struct vmcb *vmcb) +{ + int i; + + for_each_online_cpu(i) + cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL); +} + static void svm_free_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + /* + * The vmcb page can be recycled, causing a false negative in + * svm_vcpu_load(). So, ensure that no logical CPU has this + * vmcb page recorded as its current vmcb. + */ + svm_clear_current_vmcb(svm->vmcb); + __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT)); __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); __free_page(virt_to_page(svm->nested.hsave)); __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, svm); - /* - * The vmcb page can be recycled, causing a false negative in - * svm_vcpu_load(). So do a full IBPB now. - */ - indirect_branch_prediction_barrier(); } static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) -- GitLab From 08b9a96720a1e67dfed4b22a5192892d556fc5fc Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Thu, 8 Nov 2018 00:43:06 +0200 Subject: [PATCH 1900/2269] KVM: x86: Fix kernel info-leak in KVM_HC_CLOCK_PAIRING hypercall commit bcbfbd8ec21096027f1ee13ce6c185e8175166f6 upstream. kvm_pv_clock_pairing() allocates local var "struct kvm_clock_pairing clock_pairing" on stack and initializes all it's fields besides padding (clock_pairing.pad[]). Because clock_pairing var is written completely (including padding) to guest memory, failure to init struct padding results in kernel info-leak. Fix the issue by making sure to also init the padding with zeroes. Fixes: 55dd00a73a51 ("KVM: x86: add KVM_HC_CLOCK_PAIRING hypercall") Reported-by: syzbot+a8ef68d71211ba264f56@syzkaller.appspotmail.com Reviewed-by: Mark Kanda Signed-off-by: Liran Alon Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8d688b213504c..7c4d02dba1101 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6378,6 +6378,7 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, clock_pairing.nsec = ts.tv_nsec; clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle); clock_pairing.flags = 0; + memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad)); ret = 0; if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing, -- GitLab From 83f00ab9a7c03e9f1410727d985b7fe9473002e1 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 20 Nov 2018 16:34:18 +0800 Subject: [PATCH 1901/2269] KVM: X86: Fix scan ioapic use-before-initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e97f852fd4561e77721bb9a4e0ea9d98305b1e93 upstream. Reported by syzkaller: BUG: unable to handle kernel NULL pointer dereference at 00000000000001c8 PGD 80000003ec4da067 P4D 80000003ec4da067 PUD 3f7bfa067 PMD 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 7 PID: 5059 Comm: debug Tainted: G OE 4.19.0-rc5 #16 RIP: 0010:__lock_acquire+0x1a6/0x1990 Call Trace: lock_acquire+0xdb/0x210 _raw_spin_lock+0x38/0x70 kvm_ioapic_scan_entry+0x3e/0x110 [kvm] vcpu_enter_guest+0x167e/0x1910 [kvm] kvm_arch_vcpu_ioctl_run+0x35c/0x610 [kvm] kvm_vcpu_ioctl+0x3e9/0x6d0 [kvm] do_vfs_ioctl+0xa5/0x690 ksys_ioctl+0x6d/0x80 __x64_sys_ioctl+0x1a/0x20 do_syscall_64+0x83/0x6e0 entry_SYSCALL_64_after_hwframe+0x49/0xbe The reason is that the testcase writes hyperv synic HV_X64_MSR_SINT6 msr and triggers scan ioapic logic to load synic vectors into EOI exit bitmap. However, irqchip is not initialized by this simple testcase, ioapic/apic objects should not be accessed. This can be triggered by the following program: #define _GNU_SOURCE #include #include #include #include #include #include #include #include uint64_t r[3] = {0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff}; int main(void) { syscall(__NR_mmap, 0x20000000, 0x1000000, 3, 0x32, -1, 0); long res = 0; memcpy((void*)0x20000040, "/dev/kvm", 9); res = syscall(__NR_openat, 0xffffffffffffff9c, 0x20000040, 0, 0); if (res != -1) r[0] = res; res = syscall(__NR_ioctl, r[0], 0xae01, 0); if (res != -1) r[1] = res; res = syscall(__NR_ioctl, r[1], 0xae41, 0); if (res != -1) r[2] = res; memcpy( (void*)0x20000080, "\x01\x00\x00\x00\x00\x5b\x61\xbb\x96\x00\x00\x40\x00\x00\x00\x00\x01\x00" "\x08\x00\x00\x00\x00\x00\x0b\x77\xd1\x78\x4d\xd8\x3a\xed\xb1\x5c\x2e\x43" "\xaa\x43\x39\xd6\xff\xf5\xf0\xa8\x98\xf2\x3e\x37\x29\x89\xde\x88\xc6\x33" "\xfc\x2a\xdb\xb7\xe1\x4c\xac\x28\x61\x7b\x9c\xa9\xbc\x0d\xa0\x63\xfe\xfe" "\xe8\x75\xde\xdd\x19\x38\xdc\x34\xf5\xec\x05\xfd\xeb\x5d\xed\x2e\xaf\x22" "\xfa\xab\xb7\xe4\x42\x67\xd0\xaf\x06\x1c\x6a\x35\x67\x10\x55\xcb", 106); syscall(__NR_ioctl, r[2], 0x4008ae89, 0x20000080); syscall(__NR_ioctl, r[2], 0xae80, 0); return 0; } This patch fixes it by bailing out scan ioapic if ioapic is not initialized in kernel. Reported-by: Wei Wu Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Wei Wu Signed-off-by: Wanpeng Li Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7c4d02dba1101..f24329659bea8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6885,7 +6885,8 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) else { if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active) kvm_x86_ops->sync_pir_to_irr(vcpu); - kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); + if (ioapic_in_kernel(vcpu->kvm)) + kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); } bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors, vcpu_to_synic(vcpu)->vec_bitmap, 256); -- GitLab From d039837ab8cb7fb7095ec9746fb04a854987179e Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 26 Nov 2018 13:29:41 -0800 Subject: [PATCH 1902/2269] xtensa: enable coprocessors that are being flushed commit 2958b66694e018c552be0b60521fec27e8d12988 upstream. coprocessor_flush_all may be called from a context of a thread that is different from the thread being flushed. In that case contents of the cpenable special register may not match ti->cpenable of the target thread, resulting in unhandled coprocessor exception in the kernel context. Set cpenable special register to the ti->cpenable of the target register for the duration of the flush and restore it afterwards. This fixes the following crash caused by coprocessor register inspection in native gdb: (gdb) p/x $w0 Illegal instruction in kernel: sig: 9 [#1] PREEMPT Call Trace: ___might_sleep+0x184/0x1a4 __might_sleep+0x41/0xac exit_signals+0x14/0x218 do_exit+0xc9/0x8b8 die+0x99/0xa0 do_illegal_instruction+0x18/0x6c common_exception+0x77/0x77 coprocessor_flush+0x16/0x3c arch_ptrace+0x46c/0x674 sys_ptrace+0x2ce/0x3b4 system_call+0x54/0x80 common_exception+0x77/0x77 note: gdb[100] exited with preempt_count 1 Killed Cc: stable@vger.kernel.org Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/process.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index ff4f0ecb03dd1..f1c46bc5d4659 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -88,18 +88,21 @@ void coprocessor_release_all(struct thread_info *ti) void coprocessor_flush_all(struct thread_info *ti) { - unsigned long cpenable; + unsigned long cpenable, old_cpenable; int i; preempt_disable(); + RSR_CPENABLE(old_cpenable); cpenable = ti->cpenable; + WSR_CPENABLE(cpenable); for (i = 0; i < XCHAL_CP_MAX; i++) { if ((cpenable & 1) != 0 && coprocessor_owner[i] == ti) coprocessor_flush(ti, i); cpenable >>= 1; } + WSR_CPENABLE(old_cpenable); preempt_enable(); } -- GitLab From b35182226166c03713a10ffebcf2838569c95d57 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 26 Nov 2018 15:18:26 -0800 Subject: [PATCH 1903/2269] xtensa: fix coprocessor context offset definitions commit 03bc996af0cc71c7f30c384d8ce7260172423b34 upstream. Coprocessor context offsets are used by the assembly code that moves coprocessor context between the individual fields of the thread_info::xtregs_cp structure and coprocessor registers. This fixes coprocessor context clobbering on flushing and reloading during normal user code execution and user process debugging in the presence of more than one coprocessor in the core configuration. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/asm-offsets.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c index bcb5beb81177e..7df02fc934a93 100644 --- a/arch/xtensa/kernel/asm-offsets.c +++ b/arch/xtensa/kernel/asm-offsets.c @@ -91,14 +91,14 @@ int main(void) DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp)); DEFINE(THREAD_CPENABLE, offsetof (struct thread_info, cpenable)); #if XTENSA_HAVE_COPROCESSORS - DEFINE(THREAD_XTREGS_CP0, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP1, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP2, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP3, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP4, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP5, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP6, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP7, offsetof (struct thread_info, xtregs_cp)); + DEFINE(THREAD_XTREGS_CP0, offsetof(struct thread_info, xtregs_cp.cp0)); + DEFINE(THREAD_XTREGS_CP1, offsetof(struct thread_info, xtregs_cp.cp1)); + DEFINE(THREAD_XTREGS_CP2, offsetof(struct thread_info, xtregs_cp.cp2)); + DEFINE(THREAD_XTREGS_CP3, offsetof(struct thread_info, xtregs_cp.cp3)); + DEFINE(THREAD_XTREGS_CP4, offsetof(struct thread_info, xtregs_cp.cp4)); + DEFINE(THREAD_XTREGS_CP5, offsetof(struct thread_info, xtregs_cp.cp5)); + DEFINE(THREAD_XTREGS_CP6, offsetof(struct thread_info, xtregs_cp.cp6)); + DEFINE(THREAD_XTREGS_CP7, offsetof(struct thread_info, xtregs_cp.cp7)); #endif DEFINE(THREAD_XTREGS_USER, offsetof (struct thread_info, xtregs_user)); DEFINE(XTREGS_USER_SIZE, sizeof(xtregs_user_t)); -- GitLab From d36e4ee3b3ce4cf93e8fa145841287bbdb140fec Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 26 Nov 2018 18:06:01 -0800 Subject: [PATCH 1904/2269] xtensa: fix coprocessor part of ptrace_{get,set}xregs commit 38a35a78c5e270cbe53c4fef6b0d3c2da90dd849 upstream. Layout of coprocessor registers in the elf_xtregs_t and xtregs_coprocessor_t may be different due to alignment. Thus it is not always possible to copy data between the xtregs_coprocessor_t structure and the elf_xtregs_t and get correct values for all registers. Use a table of offsets and sizes of individual coprocessor register groups to do coprocessor context copying in the ptrace_getxregs and ptrace_setxregs. This fixes incorrect coprocessor register values reading from the user process by the native gdb on an xtensa core with multiple coprocessors and registers with high alignment requirements. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/ptrace.c | 42 +++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c index e2461968efb24..7c3ed7d780754 100644 --- a/arch/xtensa/kernel/ptrace.c +++ b/arch/xtensa/kernel/ptrace.c @@ -127,12 +127,37 @@ static int ptrace_setregs(struct task_struct *child, void __user *uregs) } +#if XTENSA_HAVE_COPROCESSORS +#define CP_OFFSETS(cp) \ + { \ + .elf_xtregs_offset = offsetof(elf_xtregs_t, cp), \ + .ti_offset = offsetof(struct thread_info, xtregs_cp.cp), \ + .sz = sizeof(xtregs_ ## cp ## _t), \ + } + +static const struct { + size_t elf_xtregs_offset; + size_t ti_offset; + size_t sz; +} cp_offsets[] = { + CP_OFFSETS(cp0), + CP_OFFSETS(cp1), + CP_OFFSETS(cp2), + CP_OFFSETS(cp3), + CP_OFFSETS(cp4), + CP_OFFSETS(cp5), + CP_OFFSETS(cp6), + CP_OFFSETS(cp7), +}; +#endif + static int ptrace_getxregs(struct task_struct *child, void __user *uregs) { struct pt_regs *regs = task_pt_regs(child); struct thread_info *ti = task_thread_info(child); elf_xtregs_t __user *xtregs = uregs; int ret = 0; + int i __maybe_unused; if (!access_ok(VERIFY_WRITE, uregs, sizeof(elf_xtregs_t))) return -EIO; @@ -140,8 +165,13 @@ static int ptrace_getxregs(struct task_struct *child, void __user *uregs) #if XTENSA_HAVE_COPROCESSORS /* Flush all coprocessor registers to memory. */ coprocessor_flush_all(ti); - ret |= __copy_to_user(&xtregs->cp0, &ti->xtregs_cp, - sizeof(xtregs_coprocessor_t)); + + for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i) + ret |= __copy_to_user((char __user *)xtregs + + cp_offsets[i].elf_xtregs_offset, + (const char *)ti + + cp_offsets[i].ti_offset, + cp_offsets[i].sz); #endif ret |= __copy_to_user(&xtregs->opt, ®s->xtregs_opt, sizeof(xtregs->opt)); @@ -157,6 +187,7 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs) struct pt_regs *regs = task_pt_regs(child); elf_xtregs_t *xtregs = uregs; int ret = 0; + int i __maybe_unused; if (!access_ok(VERIFY_READ, uregs, sizeof(elf_xtregs_t))) return -EFAULT; @@ -166,8 +197,11 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs) coprocessor_flush_all(ti); coprocessor_release_all(ti); - ret |= __copy_from_user(&ti->xtregs_cp, &xtregs->cp0, - sizeof(xtregs_coprocessor_t)); + for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i) + ret |= __copy_from_user((char *)ti + cp_offsets[i].ti_offset, + (const char __user *)xtregs + + cp_offsets[i].elf_xtregs_offset, + cp_offsets[i].sz); #endif ret |= __copy_from_user(®s->xtregs_opt, &xtregs->opt, sizeof(xtregs->opt)); -- GitLab From 52fa8eaac8149594d57bd4fe0cfeaa329037d400 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 14 Nov 2018 11:35:24 +0000 Subject: [PATCH 1905/2269] Btrfs: ensure path name is null terminated at btrfs_control_ioctl commit f505754fd6599230371cb01b9332754ddc104be1 upstream. We were using the path name received from user space without checking that it is null terminated. While btrfs-progs is well behaved and does proper validation and null termination, someone could call the ioctl and pass a non-null terminated patch, leading to buffer overrun problems in the kernel. The ioctl is protected by CAP_SYS_ADMIN. So just set the last byte of the path to a null character, similar to what we do in other ioctls (add/remove/resize device, snapshot creation, etc). CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Anand Jain Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index fe960d5e89137..49a02bf091aea 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2176,6 +2176,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, vol = memdup_user((void __user *)arg, sizeof(*vol)); if (IS_ERR(vol)) return PTR_ERR(vol); + vol->name[BTRFS_PATH_NAME_MAX] = '\0'; switch (cmd) { case BTRFS_IOC_SCAN_DEV: -- GitLab From e380f318e63d851607cfc8b288281668aad11d53 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Fri, 23 Nov 2018 18:10:15 +0800 Subject: [PATCH 1906/2269] btrfs: relocation: set trans to be NULL after ending transaction commit 42a657f57628402c73237547f0134e083e2f6764 upstream. The function relocate_block_group calls btrfs_end_transaction to release trans when update_backref_cache returns 1, and then continues the loop body. If btrfs_block_rsv_refill fails this time, it will jump out the loop and the freed trans will be accessed. This may result in a use-after-free bug. The patch assigns NULL to trans after trans is released so that it will not be accessed. Fixes: 0647bf564f1 ("Btrfs: improve forever loop when doing balance relocation") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Qu Wenruo Signed-off-by: Pan Bian Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/relocation.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index eeae2c3ab17e5..5feb8b03ffe86 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4048,6 +4048,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) restart: if (update_backref_cache(trans, &rc->backref_cache)) { btrfs_end_transaction(trans); + trans = NULL; continue; } -- GitLab From ee48a9df170c233b406d49aa8840625a51ee9c0c Mon Sep 17 00:00:00 2001 From: Hou Zhiqiang Date: Wed, 7 Nov 2018 05:16:49 +0000 Subject: [PATCH 1907/2269] PCI: layerscape: Fix wrong invocation of outbound window disable accessor commit c6fd6fe9dea44732cdcd970f1130b8cc50ad685a upstream. The order of parameters is not correct when invoking the outbound window disable routine. Fix it. Fixes: 4a2745d760fa ("PCI: layerscape: Disable outbound windows configured by bootloader") Signed-off-by: Hou Zhiqiang [lorenzo.pieralisi@arm.com: commit log] Signed-off-by: Lorenzo Pieralisi Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/dwc/pci-layerscape.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/dwc/pci-layerscape.c b/drivers/pci/dwc/pci-layerscape.c index 87fa486bee2cd..1ede4b60aac30 100644 --- a/drivers/pci/dwc/pci-layerscape.c +++ b/drivers/pci/dwc/pci-layerscape.c @@ -89,7 +89,7 @@ static void ls_pcie_disable_outbound_atus(struct ls_pcie *pcie) int i; for (i = 0; i < PCIE_IATU_NUM; i++) - dw_pcie_disable_atu(pcie->pci, DW_PCIE_REGION_OUTBOUND, i); + dw_pcie_disable_atu(pcie->pci, i, DW_PCIE_REGION_OUTBOUND); } static int ls1021_pcie_link_up(struct dw_pcie *pci) -- GitLab From ad953dfdfa9543d224abd18fbf12bcac6454c376 Mon Sep 17 00:00:00 2001 From: Christoph Muellner Date: Tue, 13 Nov 2018 11:25:35 +0100 Subject: [PATCH 1908/2269] arm64: dts: rockchip: Fix PCIe reset polarity for rk3399-puma-haikou. commit c1d91f86a1b4c9c05854d59c6a0abd5d0f75b849 upstream. This patch fixes the wrong polarity setting for the PCIe host driver's pre-reset pin for rk3399-puma-haikou. Without this patch link training will most likely fail. Fixes: 60fd9f72ce8a ("arm64: dts: rockchip: add Haikou baseboard with RK3399-Q7 SoM") Cc: stable@vger.kernel.org Signed-off-by: Christoph Muellner Signed-off-by: Heiko Stuebner Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts index 9a7486058455f..eea7f8f070cf8 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts @@ -130,7 +130,7 @@ }; &pcie0 { - ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_LOW>; + ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_HIGH>; num-lanes = <4>; pinctrl-names = "default"; pinctrl-0 = <&pcie_clkreqn_cpm>; -- GitLab From 855eefd9124a5a963625c046faca862c9369b37c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 27 Nov 2018 14:41:37 +0100 Subject: [PATCH 1909/2269] x86/MCE/AMD: Fix the thresholding machinery initialization order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 60c8144afc287ef09ce8c1230c6aa972659ba1bb upstream. Currently, the code sets up the thresholding interrupt vector and only then goes about initializing the thresholding banks. Which is wrong, because an early thresholding interrupt would cause a NULL pointer dereference when accessing those banks and prevent the machine from booting. Therefore, set the thresholding interrupt vector only *after* having initialized the banks successfully. Fixes: 18807ddb7f88 ("x86/mce/AMD: Reset Threshold Limit after logging error") Reported-by: Rafał Miłecki Reported-by: John Clemens Signed-off-by: Borislav Petkov Tested-by: Rafał Miłecki Tested-by: John Clemens Cc: Aravind Gopalakrishnan Cc: linux-edac@vger.kernel.org Cc: stable@vger.kernel.org Cc: Tony Luck Cc: x86@kernel.org Cc: Yazen Ghannam Link: https://lkml.kernel.org/r/20181127101700.2964-1-zajec5@gmail.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=201291 Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index dbcb010067496..beec0daecbc5c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -56,7 +56,7 @@ /* Threshold LVT offset is at MSR0xC0000410[15:12] */ #define SMCA_THR_LVT_OFF 0xF000 -static bool thresholding_en; +static bool thresholding_irq_en; static const char * const th_names[] = { "load_store", @@ -533,9 +533,8 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, set_offset: offset = setup_APIC_mce_threshold(offset, new); - - if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt)) - mce_threshold_vector = amd_threshold_interrupt; + if (offset == new) + thresholding_irq_en = true; done: mce_threshold_block_init(&b, offset); @@ -1356,9 +1355,6 @@ int mce_threshold_remove_device(unsigned int cpu) { unsigned int bank; - if (!thresholding_en) - return 0; - for (bank = 0; bank < mca_cfg.banks; ++bank) { if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; @@ -1376,9 +1372,6 @@ int mce_threshold_create_device(unsigned int cpu) struct threshold_bank **bp; int err = 0; - if (!thresholding_en) - return 0; - bp = per_cpu(threshold_banks, cpu); if (bp) return 0; @@ -1407,9 +1400,6 @@ static __init int threshold_init_device(void) { unsigned lcpu = 0; - if (mce_threshold_vector == amd_threshold_interrupt) - thresholding_en = true; - /* to hit CPUs online before the notifier is up */ for_each_online_cpu(lcpu) { int err = mce_threshold_create_device(lcpu); @@ -1418,6 +1408,9 @@ static __init int threshold_init_device(void) return err; } + if (thresholding_irq_en) + mce_threshold_vector = amd_threshold_interrupt; + return 0; } /* -- GitLab From e8499ab5b93aca271c343786084c000c6f7839b5 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 20 Nov 2018 11:26:35 +0100 Subject: [PATCH 1910/2269] x86/fpu: Disable bottom halves while loading FPU registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 68239654acafe6aad5a3c1dc7237e60accfebc03 upstream. The sequence fpu->initialized = 1; /* step A */ preempt_disable(); /* step B */ fpu__restore(fpu); preempt_enable(); in __fpu__restore_sig() is racy in regard to a context switch. For 32bit frames, __fpu__restore_sig() prepares the FPU state within fpu->state. To ensure that a context switch (switch_fpu_prepare() in particular) does not modify fpu->state it uses fpu__drop() which sets fpu->initialized to 0. After fpu->initialized is cleared, the CPU's FPU state is not saved to fpu->state during a context switch. The new state is loaded via fpu__restore(). It gets loaded into fpu->state from userland and ensured it is sane. fpu->initialized is then set to 1 in order to avoid fpu__initialize() doing anything (overwrite the new state) which is part of fpu__restore(). A context switch between step A and B above would save CPU's current FPU registers to fpu->state and overwrite the newly prepared state. This looks like a tiny race window but the Kernel Test Robot reported this back in 2016 while we had lazy FPU support. Borislav Petkov made the link between that report and another patch that has been posted. Since the removal of the lazy FPU support, this race goes unnoticed because the warning has been removed. Disable bottom halves around the restore sequence to avoid the race. BH need to be disabled because BH is allowed to run (even with preemption disabled) and might invoke kernel_fpu_begin() by doing IPsec. [ bp: massage commit message a bit. ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Acked-by: Ingo Molnar Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: "Jason A. Donenfeld" Cc: kvm ML Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Rik van Riel Cc: stable@vger.kernel.org Cc: x86-ml Link: http://lkml.kernel.org/r/20181120102635.ddv3fvavxajjlfqk@linutronix.de Link: https://lkml.kernel.org/r/20160226074940.GA28911@pd.tnic Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/fpu/signal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 61a949d84dfa5..d99a8ee9e185e 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -344,10 +344,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); } + local_bh_disable(); fpu->initialized = 1; - preempt_disable(); fpu__restore(fpu); - preempt_enable(); + local_bh_enable(); return err; } else { -- GitLab From fae1bec5c2b29e809bd81b2ce8b5ba979ad23a92 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 21 Nov 2018 11:16:10 +0100 Subject: [PATCH 1911/2269] perf/x86/intel: Move branch tracing setup to the Intel-specific source file commit ed6101bbf6266ee83e620b19faa7c6ad56bb41ab upstream. Moving branch tracing setup to Intel core object into separate intel_pmu_bts_config function, because it's Intel specific. Suggested-by: Peter Zijlstra Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/20181121101612.16272-1-jolsa@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/core.c | 20 ------------------ arch/x86/events/intel/core.c | 41 +++++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 21 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index e5097dc85a06c..7d12b0d1f3591 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -438,26 +438,6 @@ int x86_setup_perfctr(struct perf_event *event) if (config == -1LL) return -EINVAL; - /* - * Branch tracing: - */ - if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && - !attr->freq && hwc->sample_period == 1) { - /* BTS is not supported by this architecture. */ - if (!x86_pmu.bts_active) - return -EOPNOTSUPP; - - /* BTS is currently only allowed for user-mode. */ - if (!attr->exclude_kernel) - return -EOPNOTSUPP; - - /* disallow bts if conflicting events are present */ - if (x86_add_exclusive(x86_lbr_exclusive_lbr)) - return -EBUSY; - - event->destroy = hw_perf_lbr_event_destroy; - } - hwc->config |= config; return 0; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 228732654cfe1..20501336fd943 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2973,10 +2973,49 @@ static unsigned long intel_pmu_free_running_flags(struct perf_event *event) return flags; } +static int intel_pmu_bts_config(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + + if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && + !attr->freq && hwc->sample_period == 1) { + /* BTS is not supported by this architecture. */ + if (!x86_pmu.bts_active) + return -EOPNOTSUPP; + + /* BTS is currently only allowed for user-mode. */ + if (!attr->exclude_kernel) + return -EOPNOTSUPP; + + /* disallow bts if conflicting events are present */ + if (x86_add_exclusive(x86_lbr_exclusive_lbr)) + return -EBUSY; + + event->destroy = hw_perf_lbr_event_destroy; + } + + return 0; +} + +static int core_pmu_hw_config(struct perf_event *event) +{ + int ret = x86_pmu_hw_config(event); + + if (ret) + return ret; + + return intel_pmu_bts_config(event); +} + static int intel_pmu_hw_config(struct perf_event *event) { int ret = x86_pmu_hw_config(event); + if (ret) + return ret; + + ret = intel_pmu_bts_config(event); if (ret) return ret; @@ -3462,7 +3501,7 @@ static __initconst const struct x86_pmu core_pmu = { .enable_all = core_pmu_enable_all, .enable = core_pmu_enable_event, .disable = x86_pmu_disable_event, - .hw_config = x86_pmu_hw_config, + .hw_config = core_pmu_hw_config, .schedule_events = x86_schedule_events, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, .perfctr = MSR_ARCH_PERFMON_PERFCTR0, -- GitLab From ecef7c1ad46bfdc64c99574488252f4ba029afbc Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 21 Nov 2018 11:16:11 +0100 Subject: [PATCH 1912/2269] perf/x86/intel: Add generic branch tracing check to intel_pmu_has_bts() commit 67266c1080ad56c31af72b9c18355fde8ccc124a upstream. Currently we check the branch tracing only by checking for the PERF_COUNT_HW_BRANCH_INSTRUCTIONS event of PERF_TYPE_HARDWARE type. But we can define the same event with the PERF_TYPE_RAW type. Changing the intel_pmu_has_bts() code to check on event's final hw config value, so both HW types are covered. Adding unlikely to intel_pmu_has_bts() condition calls, because it was used in the original code in intel_bts_constraints. Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/20181121101612.16272-2-jolsa@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/core.c | 17 +++-------------- arch/x86/events/perf_event.h | 13 +++++++++---- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 20501336fd943..7bb80151bfff9 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2345,16 +2345,7 @@ done: static struct event_constraint * intel_bts_constraints(struct perf_event *event) { - struct hw_perf_event *hwc = &event->hw; - unsigned int hw_event, bts_event; - - if (event->attr.freq) - return NULL; - - hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; - bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); - - if (unlikely(hw_event == bts_event && hwc->sample_period == 1)) + if (unlikely(intel_pmu_has_bts(event))) return &bts_constraint; return NULL; @@ -2976,10 +2967,8 @@ static unsigned long intel_pmu_free_running_flags(struct perf_event *event) static int intel_pmu_bts_config(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; - struct hw_perf_event *hwc = &event->hw; - if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && - !attr->freq && hwc->sample_period == 1) { + if (unlikely(intel_pmu_has_bts(event))) { /* BTS is not supported by this architecture. */ if (!x86_pmu.bts_active) return -EOPNOTSUPP; @@ -3038,7 +3027,7 @@ static int intel_pmu_hw_config(struct perf_event *event) /* * BTS is set up earlier in this path, so don't account twice */ - if (!intel_pmu_has_bts(event)) { + if (!unlikely(intel_pmu_has_bts(event))) { /* disallow lbr if conflicting events are present */ if (x86_add_exclusive(x86_lbr_exclusive_lbr)) return -EBUSY; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index c6698c63c047b..3c51fcaf1e34e 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -850,11 +850,16 @@ static inline int amd_pmu_init(void) static inline bool intel_pmu_has_bts(struct perf_event *event) { - if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && - !event->attr.freq && event->hw.sample_period == 1) - return true; + struct hw_perf_event *hwc = &event->hw; + unsigned int hw_event, bts_event; + + if (event->attr.freq) + return false; + + hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; + bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); - return false; + return hw_event == bts_event && hwc->sample_period == 1; } int intel_pmu_save_and_restart(struct perf_event *event); -- GitLab From b7c769ebd9db6e67a6e01c962f7c92b706fa6acf Mon Sep 17 00:00:00 2001 From: Maximilian Heyne Date: Fri, 30 Nov 2018 08:35:14 -0700 Subject: [PATCH 1913/2269] fs: fix lost error code in dio_complete commit 41e817bca3acd3980efe5dd7d28af0e6f4ab9247 upstream. commit e259221763a40403d5bb232209998e8c45804ab8 ("fs: simplify the generic_write_sync prototype") reworked callers of generic_write_sync(), and ended up dropping the error return for the directio path. Prior to that commit, in dio_complete(), an error would be bubbled up the stack, but after that commit, errors passed on to dio_complete were eaten up. This was reported on the list earlier, and a fix was proposed in https://lore.kernel.org/lkml/20160921141539.GA17898@infradead.org/, but never followed up with. We recently hit this bug in our testing where fencing io errors, which were previously erroring out with EIO, were being returned as success operations after this commit. The fix proposed on the list earlier was a little short -- it would have still called generic_write_sync() in case `ret` already contained an error. This fix ensures generic_write_sync() is only called when there's no pending error in the write. Additionally, transferred is replaced with ret to bring this code in line with other callers. Fixes: e259221763a4 ("fs: simplify the generic_write_sync prototype") Reported-by: Ravi Nankani Signed-off-by: Maximilian Heyne Reviewed-by: Christoph Hellwig CC: Torsten Mehlan CC: Uwe Dannowski CC: Amit Shah CC: David Woodhouse CC: stable@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/direct-io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/direct-io.c b/fs/direct-io.c index 625a84aa6484f..40567501015f2 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -304,8 +304,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags) */ dio->iocb->ki_pos += transferred; - if (dio->op == REQ_OP_WRITE) - ret = generic_write_sync(dio->iocb, transferred); + if (ret > 0 && dio->op == REQ_OP_WRITE) + ret = generic_write_sync(dio->iocb, ret); dio->iocb->ki_complete(dio->iocb, ret, 0); } -- GitLab From 0d542d58b6b5fe01095484c2628e46afe88312a9 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 23 Nov 2018 18:16:33 +0100 Subject: [PATCH 1914/2269] ALSA: wss: Fix invalid snd_free_pages() at error path commit 7b69154171b407844c273ab4c10b5f0ddcd6aa29 upstream. Some spurious calls of snd_free_pages() have been overlooked and remain in the error paths of wss driver code. Since runtime->dma_area is managed by the PCM core helper, we shouldn't release manually. Drop the superfluous calls. Reviewed-by: Takashi Sakamoto Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/isa/wss/wss_lib.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/isa/wss/wss_lib.c b/sound/isa/wss/wss_lib.c index 8a852042a066a..91cd305cabd7c 100644 --- a/sound/isa/wss/wss_lib.c +++ b/sound/isa/wss/wss_lib.c @@ -1531,7 +1531,6 @@ static int snd_wss_playback_open(struct snd_pcm_substream *substream) if (err < 0) { if (chip->release_dma) chip->release_dma(chip, chip->dma_private_data, chip->dma1); - snd_free_pages(runtime->dma_area, runtime->dma_bytes); return err; } chip->playback_substream = substream; @@ -1572,7 +1571,6 @@ static int snd_wss_capture_open(struct snd_pcm_substream *substream) if (err < 0) { if (chip->release_dma) chip->release_dma(chip, chip->dma_private_data, chip->dma2); - snd_free_pages(runtime->dma_area, runtime->dma_bytes); return err; } chip->capture_substream = substream; -- GitLab From ef8944bf5a97557bf0cb7b0da78bc60eb78bff33 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 23 Nov 2018 15:44:00 +0100 Subject: [PATCH 1915/2269] ALSA: ac97: Fix incorrect bit shift at AC97-SPSA control write commit 7194eda1ba0872d917faf3b322540b4f57f11ba5 upstream. The function snd_ac97_put_spsa() gets the bit shift value from the associated private_value, but it extracts too much; the current code extracts 8 bit values in bits 8-15, but this is a combination of two nibbles (bits 8-11 and bits 12-15) for left and right shifts. Due to the incorrect bits extraction, the actual shift may go beyond the 32bit value, as spotted recently by UBSAN check: UBSAN: Undefined behaviour in sound/pci/ac97/ac97_codec.c:836:7 shift exponent 68 is too large for 32-bit type 'int' This patch fixes the shift value extraction by masking the properly with 0x0f instead of 0xff. Reported-and-tested-by: Meelis Roos Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/ac97/ac97_codec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c index 1ef7cdf1d3e81..38f355ae1863a 100644 --- a/sound/pci/ac97/ac97_codec.c +++ b/sound/pci/ac97/ac97_codec.c @@ -824,7 +824,7 @@ static int snd_ac97_put_spsa(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_ { struct snd_ac97 *ac97 = snd_kcontrol_chip(kcontrol); int reg = kcontrol->private_value & 0xff; - int shift = (kcontrol->private_value >> 8) & 0xff; + int shift = (kcontrol->private_value >> 8) & 0x0f; int mask = (kcontrol->private_value >> 16) & 0xff; // int invert = (kcontrol->private_value >> 24) & 0xff; unsigned short value, old, new; -- GitLab From 73ce314c172d4f7340b217c08861863665888972 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 22 Nov 2018 14:36:17 +0100 Subject: [PATCH 1916/2269] ALSA: control: Fix race between adding and removing a user element commit e1a7bfe3807974e66f971f2589d4e0197ec0fced upstream. The procedure for adding a user control element has some window opened for race against the concurrent removal of a user element. This was caught by syzkaller, hitting a KASAN use-after-free error. This patch addresses the bug by wrapping the whole procedure to add a user control element with the card->controls_rwsem, instead of only around the increment of card->user_ctl_count. This required a slight code refactoring, too. The function snd_ctl_add() is split to two parts: a core function to add the control element and a part calling it. The former is called from the function for adding a user control element inside the controls_rwsem. One change to be noted is that snd_ctl_notify() for adding a control element gets called inside the controls_rwsem as well while it was called outside the rwsem. But this should be OK, as snd_ctl_notify() takes another (finer) rwlock instead of rwsem, and the call of snd_ctl_notify() inside rwsem is already done in another code path. Reported-by: syzbot+dc09047bce3820621ba2@syzkaller.appspotmail.com Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/control.c | 80 +++++++++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 35 deletions(-) diff --git a/sound/core/control.c b/sound/core/control.c index af7e6165e21ed..36571cd49be33 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -347,6 +347,40 @@ static int snd_ctl_find_hole(struct snd_card *card, unsigned int count) return 0; } +/* add a new kcontrol object; call with card->controls_rwsem locked */ +static int __snd_ctl_add(struct snd_card *card, struct snd_kcontrol *kcontrol) +{ + struct snd_ctl_elem_id id; + unsigned int idx; + unsigned int count; + + id = kcontrol->id; + if (id.index > UINT_MAX - kcontrol->count) + return -EINVAL; + + if (snd_ctl_find_id(card, &id)) { + dev_err(card->dev, + "control %i:%i:%i:%s:%i is already present\n", + id.iface, id.device, id.subdevice, id.name, id.index); + return -EBUSY; + } + + if (snd_ctl_find_hole(card, kcontrol->count) < 0) + return -ENOMEM; + + list_add_tail(&kcontrol->list, &card->controls); + card->controls_count += kcontrol->count; + kcontrol->id.numid = card->last_numid + 1; + card->last_numid += kcontrol->count; + + id = kcontrol->id; + count = kcontrol->count; + for (idx = 0; idx < count; idx++, id.index++, id.numid++) + snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_ADD, &id); + + return 0; +} + /** * snd_ctl_add - add the control instance to the card * @card: the card instance @@ -363,45 +397,18 @@ static int snd_ctl_find_hole(struct snd_card *card, unsigned int count) */ int snd_ctl_add(struct snd_card *card, struct snd_kcontrol *kcontrol) { - struct snd_ctl_elem_id id; - unsigned int idx; - unsigned int count; int err = -EINVAL; if (! kcontrol) return err; if (snd_BUG_ON(!card || !kcontrol->info)) goto error; - id = kcontrol->id; - if (id.index > UINT_MAX - kcontrol->count) - goto error; down_write(&card->controls_rwsem); - if (snd_ctl_find_id(card, &id)) { - up_write(&card->controls_rwsem); - dev_err(card->dev, "control %i:%i:%i:%s:%i is already present\n", - id.iface, - id.device, - id.subdevice, - id.name, - id.index); - err = -EBUSY; - goto error; - } - if (snd_ctl_find_hole(card, kcontrol->count) < 0) { - up_write(&card->controls_rwsem); - err = -ENOMEM; - goto error; - } - list_add_tail(&kcontrol->list, &card->controls); - card->controls_count += kcontrol->count; - kcontrol->id.numid = card->last_numid + 1; - card->last_numid += kcontrol->count; - id = kcontrol->id; - count = kcontrol->count; + err = __snd_ctl_add(card, kcontrol); up_write(&card->controls_rwsem); - for (idx = 0; idx < count; idx++, id.index++, id.numid++) - snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_ADD, &id); + if (err < 0) + goto error; return 0; error: @@ -1360,9 +1367,12 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file, kctl->tlv.c = snd_ctl_elem_user_tlv; /* This function manage to free the instance on failure. */ - err = snd_ctl_add(card, kctl); - if (err < 0) - return err; + down_write(&card->controls_rwsem); + err = __snd_ctl_add(card, kctl); + if (err < 0) { + snd_ctl_free_one(kctl); + goto unlock; + } offset = snd_ctl_get_ioff(kctl, &info->id); snd_ctl_build_ioff(&info->id, kctl, offset); /* @@ -1373,10 +1383,10 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file, * which locks the element. */ - down_write(&card->controls_rwsem); card->user_ctl_count++; - up_write(&card->controls_rwsem); + unlock: + up_write(&card->controls_rwsem); return 0; } -- GitLab From 46663071adbc33e7b2a299b8e15bf36166101bcf Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 23 Nov 2018 18:18:30 +0100 Subject: [PATCH 1917/2269] ALSA: sparc: Fix invalid snd_free_pages() at error path commit 9a20332ab373b1f8f947e0a9c923652b32dab031 upstream. Some spurious calls of snd_free_pages() have been overlooked and remain in the error paths of sparc cs4231 driver code. Since runtime->dma_area is managed by the PCM core helper, we shouldn't release manually. Drop the superfluous calls. Reviewed-by: Takashi Sakamoto Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/sparc/cs4231.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sound/sparc/cs4231.c b/sound/sparc/cs4231.c index e73c962590eb6..079063d8038d9 100644 --- a/sound/sparc/cs4231.c +++ b/sound/sparc/cs4231.c @@ -1146,10 +1146,8 @@ static int snd_cs4231_playback_open(struct snd_pcm_substream *substream) runtime->hw = snd_cs4231_playback; err = snd_cs4231_open(chip, CS4231_MODE_PLAY); - if (err < 0) { - snd_free_pages(runtime->dma_area, runtime->dma_bytes); + if (err < 0) return err; - } chip->playback_substream = substream; chip->p_periods_sent = 0; snd_pcm_set_sync(substream); @@ -1167,10 +1165,8 @@ static int snd_cs4231_capture_open(struct snd_pcm_substream *substream) runtime->hw = snd_cs4231_capture; err = snd_cs4231_open(chip, CS4231_MODE_RECORD); - if (err < 0) { - snd_free_pages(runtime->dma_area, runtime->dma_bytes); + if (err < 0) return err; - } chip->capture_substream = substream; chip->c_periods_sent = 0; snd_pcm_set_sync(substream); -- GitLab From e50476e9da270e1dfc742187aa1520ac4edb3a74 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Thu, 8 Nov 2018 16:36:15 +0800 Subject: [PATCH 1918/2269] ALSA: hda/realtek - Support ALC300 commit 1078bef0cd9291355a20369b21cd823026ab8eaa upstream. This patch will enable ALC300. [ It's almost equivalent with other ALC269-compatible ones, and apparently has no loopback mixer -- tiwai ] Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index eb8807de3ebc0..31c71ba3248e7 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -343,6 +343,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0285: case 0x10ec0298: case 0x10ec0289: + case 0x10ec0300: alc_update_coef_idx(codec, 0x10, 1<<9, 0); break; case 0x10ec0275: @@ -2758,6 +2759,7 @@ enum { ALC269_TYPE_ALC215, ALC269_TYPE_ALC225, ALC269_TYPE_ALC294, + ALC269_TYPE_ALC300, ALC269_TYPE_ALC700, }; @@ -2792,6 +2794,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec) case ALC269_TYPE_ALC215: case ALC269_TYPE_ALC225: case ALC269_TYPE_ALC294: + case ALC269_TYPE_ALC300: case ALC269_TYPE_ALC700: ssids = alc269_ssids; break; @@ -7089,6 +7092,10 @@ static int patch_alc269(struct hda_codec *codec) spec->gen.mixer_nid = 0; /* ALC2x4 does not have any loopback mixer path */ alc_update_coef_idx(codec, 0x6b, 0x0018, (1<<4) | (1<<3)); /* UAJ MIC Vref control by verb */ break; + case 0x10ec0300: + spec->codec_variant = ALC269_TYPE_ALC300; + spec->gen.mixer_nid = 0; /* no loopback on ALC300 */ + break; case 0x10ec0700: case 0x10ec0701: case 0x10ec0703: @@ -8160,6 +8167,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = { HDA_CODEC_ENTRY(0x10ec0295, "ALC295", patch_alc269), HDA_CODEC_ENTRY(0x10ec0298, "ALC298", patch_alc269), HDA_CODEC_ENTRY(0x10ec0299, "ALC299", patch_alc269), + HDA_CODEC_ENTRY(0x10ec0300, "ALC300", patch_alc269), HDA_CODEC_REV_ENTRY(0x10ec0861, 0x100340, "ALC660", patch_alc861), HDA_CODEC_ENTRY(0x10ec0660, "ALC660-VD", patch_alc861vd), HDA_CODEC_ENTRY(0x10ec0861, "ALC861", patch_alc861), -- GitLab From e2ec0cb4d09277730decd5e1a9c75fb18ccb7d7a Mon Sep 17 00:00:00 2001 From: Anisse Astier Date: Fri, 23 Nov 2018 17:59:11 +0100 Subject: [PATCH 1919/2269] ALSA: hda/realtek - fix headset mic detection for MSI MS-B171 commit 8cd65271f8e545ddeed10ecc2e417936bdff168e upstream. MSI Cubi N 8GL (MS-B171) needs the same fixup as its older model, the MS-B120, in order for the headset mic to be properly detected. They both use a single 3-way jack for both mic and headset with an ALC283 codec, with the same pins used. Cc: stable@vger.kernel.org Signed-off-by: Anisse Astier Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 31c71ba3248e7..66b0a124beaea 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6411,6 +6411,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8), SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS), SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE), -- GitLab From 086d1f60f874829124ebeaf7d2223a414221ca3b Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sun, 25 Nov 2018 08:58:02 +0800 Subject: [PATCH 1920/2269] ext2: fix potential use after free commit ecebf55d27a11538ea84aee0be643dd953f830d5 upstream. The function ext2_xattr_set calls brelse(bh) to drop the reference count of bh. After that, bh may be freed. However, following brelse(bh), it reads bh->b_data via macro HDR(bh). This may result in a use-after-free bug. This patch moves brelse(bh) after reading field. CC: stable@vger.kernel.org Signed-off-by: Pan Bian Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext2/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 62d9a659a8ff4..dd8f10db82e99 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -612,9 +612,9 @@ skip_replace: } cleanup: - brelse(bh); if (!(bh && header == HDR(bh))) kfree(header); + brelse(bh); up_write(&EXT2_I(inode)->xattr_sem); return error; -- GitLab From 2a9443a9358031c6192c876f3e51e1fa00c4d497 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Sun, 18 Nov 2018 20:03:02 +0100 Subject: [PATCH 1921/2269] ARM: dts: rockchip: Remove @0 from the veyron memory node commit 672e60b72bbe7aace88721db55b380b6a51fb8f9 upstream. The Coreboot version on veyron ChromeOS devices seems to ignore memory@0 nodes when updating the available memory and instead inserts another memory node without the address. This leads to 4GB systems only ever be using 2GB as the memory@0 node takes precedence. So remove the @0 for veyron devices. Fixes: 0b639b815f15 ("ARM: dts: rockchip: Add missing unit name to memory nodes in rk3288 boards") Cc: stable@vger.kernel.org Reported-by: Heikki Lindholm Signed-off-by: Heiko Stuebner Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/rk3288-veyron.dtsi | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/rk3288-veyron.dtsi b/arch/arm/boot/dts/rk3288-veyron.dtsi index 6e5bd8974f22d..679b839bb2eb1 100644 --- a/arch/arm/boot/dts/rk3288-veyron.dtsi +++ b/arch/arm/boot/dts/rk3288-veyron.dtsi @@ -47,7 +47,11 @@ #include "rk3288.dtsi" / { - memory@0 { + /* + * The default coreboot on veyron devices ignores memory@0 nodes + * and would instead create another memory node. + */ + memory { device_type = "memory"; reg = <0x0 0x0 0x0 0x80000000>; }; -- GitLab From 7e572222d3c9d67581f5409d7dbaf1e0e7b8de19 Mon Sep 17 00:00:00 2001 From: Richard Genoud Date: Tue, 27 Nov 2018 17:06:34 +0100 Subject: [PATCH 1922/2269] dmaengine: at_hdmac: fix memory leak in at_dma_xlate() commit 98f5f932254b88ce828bc8e4d1642d14e5854caa upstream. The leak was found when opening/closing a serial port a great number of time, increasing kmalloc-32 in slabinfo. Each time the port was opened, dma_request_slave_channel() was called. Then, in at_dma_xlate(), atslave was allocated with devm_kzalloc() and never freed. (Well, it was free at module unload, but that's not what we want). So, here, kzalloc is more suited for the job since it has to be freed in atc_free_chan_resources(). Cc: stable@vger.kernel.org Fixes: bbe89c8e3d59 ("at_hdmac: move to generic DMA binding") Reported-by: Mario Forner Suggested-by: Alexandre Belloni Acked-by: Alexandre Belloni Acked-by: Ludovic Desroches Signed-off-by: Richard Genoud Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index a861b5b4d4437..b87892a14ba97 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -1641,6 +1641,12 @@ static void atc_free_chan_resources(struct dma_chan *chan) atchan->descs_allocated = 0; atchan->status = 0; + /* + * Free atslave allocated in at_dma_xlate() + */ + kfree(chan->private); + chan->private = NULL; + dev_vdbg(chan2dev(chan), "free_chan_resources: done\n"); } @@ -1675,7 +1681,7 @@ static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec, dma_cap_zero(mask); dma_cap_set(DMA_SLAVE, mask); - atslave = devm_kzalloc(&dmac_pdev->dev, sizeof(*atslave), GFP_KERNEL); + atslave = kzalloc(sizeof(*atslave), GFP_KERNEL); if (!atslave) return NULL; -- GitLab From 39c49a757d7c3e27d3bfabd1e102e9544372e103 Mon Sep 17 00:00:00 2001 From: Richard Genoud Date: Tue, 27 Nov 2018 17:06:35 +0100 Subject: [PATCH 1923/2269] dmaengine: at_hdmac: fix module unloading commit 77e75fda94d2ebb86aa9d35fb1860f6395bf95de upstream. of_dma_controller_free() was not called on module onloading. This lead to a soft lockup: watchdog: BUG: soft lockup - CPU#0 stuck for 23s! Modules linked in: at_hdmac [last unloaded: at_hdmac] when of_dma_request_slave_channel() tried to call ofdma->of_dma_xlate(). Cc: stable@vger.kernel.org Fixes: bbe89c8e3d59 ("at_hdmac: move to generic DMA binding") Acked-by: Ludovic Desroches Signed-off-by: Richard Genoud Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index b87892a14ba97..21ed0e20c5d91 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -2006,6 +2006,8 @@ static int at_dma_remove(struct platform_device *pdev) struct resource *io; at_dma_off(atdma); + if (pdev->dev.of_node) + of_dma_controller_free(pdev->dev.of_node); dma_async_device_unregister(&atdma->dma_common); dma_pool_destroy(atdma->memset_pool); -- GitLab From 85df1f9f8fb0c9c4801f5cc8ab1b25e77c1f06cd Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 3 Dec 2018 13:06:57 +0200 Subject: [PATCH 1924/2269] btrfs: release metadata before running delayed refs We want to release the unused reservation we have since it refills the delayed refs reserve, which will make everything go smoother when running the delayed refs if we're short on our reservation. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Omar Sandoval Reviewed-by: Liu Bo Reviewed-by: Nikolay Borisov Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/transaction.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f74005ca8f087..73c1fbca0c357 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1955,6 +1955,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) return ret; } + btrfs_trans_release_metadata(trans, fs_info); + trans->block_rsv = NULL; + /* make a pass through all the delayed refs we have so far * any runnings procs may add more while we are here */ @@ -1964,9 +1967,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) return ret; } - btrfs_trans_release_metadata(trans, fs_info); - trans->block_rsv = NULL; - cur_trans = trans->transaction; /* -- GitLab From ca0908ddcd303d5e91982c6e5a91ad3d99cc99b6 Mon Sep 17 00:00:00 2001 From: Ben Wolsieffer Date: Sat, 3 Nov 2018 19:32:20 -0400 Subject: [PATCH 1925/2269] staging: vchiq_arm: fix compat VCHIQ_IOC_AWAIT_COMPLETION commit 5a96b2d38dc054c0bbcbcd585b116566cbd877fe upstream. The compatibility ioctl wrapper for VCHIQ_IOC_AWAIT_COMPLETION assumes that the native ioctl always uses a message buffer and decrements msgbufcount. Certain message types do not use a message buffer and in this case msgbufcount is not decremented, and completion->header for the message is NULL. Because the wrapper unconditionally decrements msgbufcount, the calling process may assume that a message buffer has been used even when it has not. This results in a memory leak in the userspace code that interfaces with this driver. When msgbufcount is decremented, the userspace code assumes that the buffer can be freed though the reference in completion->header, which cannot happen when the reference is NULL. This patch causes the wrapper to only decrement msgbufcount when the native ioctl decrements it. Note that we cannot simply copy the native ioctl's value of msgbufcount, because the wrapper only retrieves messages from the native ioctl one at a time, while userspace may request multiple messages. See https://github.com/raspberrypi/linux/pull/2703 for more discussion of this patch. Fixes: 5569a1260933 ("staging: vchiq_arm: Add compatibility wrappers for ioctls") Signed-off-by: Ben Wolsieffer Acked-by: Stefan Wahren Cc: stable Signed-off-by: Greg Kroah-Hartman --- .../staging/vc04_services/interface/vchiq_arm/vchiq_arm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c index 314ffac50bb83..f05e9af4fe810 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c @@ -1461,6 +1461,7 @@ vchiq_compat_ioctl_await_completion(struct file *file, struct vchiq_await_completion32 args32; struct vchiq_completion_data32 completion32; unsigned int *msgbufcount32; + unsigned int msgbufcount_native; compat_uptr_t msgbuf32; void *msgbuf; void **msgbufptr; @@ -1572,7 +1573,11 @@ vchiq_compat_ioctl_await_completion(struct file *file, sizeof(completion32))) return -EFAULT; - args32.msgbufcount--; + if (get_user(msgbufcount_native, &args->msgbufcount)) + return -EFAULT; + + if (!msgbufcount_native) + args32.msgbufcount--; msgbufcount32 = &((struct vchiq_await_completion32 __user *)arg)->msgbufcount; -- GitLab From 74abe400e12b45f871c98c99f0f65521cc0c8ca6 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Thu, 8 Nov 2018 23:30:09 -0600 Subject: [PATCH 1926/2269] staging: rtl8723bs: Add missing return for cfg80211_rtw_get_station commit 8561fb31a1f9594e2807681f5c0721894e367f19 upstream. With Androidx86 8.1, wificond returns "failed to get nl80211_sta_info_tx_failed" and wificondControl returns "Invalid signal poll result from wificond". The fix is to OR sinfo->filled with BIT_ULL(NL80211_STA_INFO_TX_FAILED). This missing bit is apparently not needed with NetworkManager, but it does no harm in that case. Reported-and-Tested-by: youling257 Cc: linux-wireless@vger.kernel.org Cc: youling257 Signed-off-by: Larry Finger Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c index bd4352fe2de31..83852f323c5e2 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c @@ -1293,7 +1293,7 @@ static int cfg80211_rtw_get_station(struct wiphy *wiphy, sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS); sinfo->tx_packets = psta->sta_stats.tx_pkts; - + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED); } /* for Ad-Hoc/AP mode */ -- GitLab From 403a2001bd795c43f59a5dd6695273fdfe0a3dcf Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 23 Nov 2018 08:42:19 +0000 Subject: [PATCH 1927/2269] USB: usb-storage: Add new IDs to ums-realtek commit a84a1bcc992f0545a51d2e120b8ca2ef20e2ea97 upstream. There are two new Realtek card readers require ums-realtek to work correctly. Add the new IDs to support them. Signed-off-by: Kai-Heng Feng Acked-by: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_realtek.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/usb/storage/unusual_realtek.h b/drivers/usb/storage/unusual_realtek.h index 8fe624ad302ab..7ca7794936710 100644 --- a/drivers/usb/storage/unusual_realtek.h +++ b/drivers/usb/storage/unusual_realtek.h @@ -39,4 +39,14 @@ UNUSUAL_DEV(0x0bda, 0x0159, 0x0000, 0x9999, "USB Card Reader", USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0), +UNUSUAL_DEV(0x0bda, 0x0177, 0x0000, 0x9999, + "Realtek", + "USB Card Reader", + USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0), + +UNUSUAL_DEV(0x0bda, 0x0184, 0x0000, 0x9999, + "Realtek", + "USB Card Reader", + USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0), + #endif /* defined(CONFIG_USB_STORAGE_REALTEK) || ... */ -- GitLab From 2b7456f46ff957a3f04b3658a8d71b645d8cf172 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Niew=C3=B6hner?= Date: Sun, 25 Nov 2018 17:57:33 +0100 Subject: [PATCH 1928/2269] usb: core: quirks: add RESET_RESUME quirk for Cherry G230 Stream series MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit effd14f66cc1ef6701a19c5a56e39c35f4d395a5 upstream. Cherry G230 Stream 2.0 (G85-231) and 3.0 (G85-232) need this quirk to function correctly. This fixes a but where double pressing numlock locks up the device completely with need to replug the keyboard. Signed-off-by: Michael Niewöhner Tested-by: Michael Niewöhner Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 1e8f68960014b..808437c5ec49d 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -64,6 +64,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* Microsoft LifeCam-VX700 v2.0 */ { USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Cherry Stream G230 2.0 (G85-231) and 3.0 (G85-232) */ + { USB_DEVICE(0x046a, 0x0023), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */ { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT }, { USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT }, -- GitLab From f1a4876f0435dc43fc55f42bd30763de5e5ef9ec Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Mon, 19 Nov 2018 08:34:04 +0200 Subject: [PATCH 1929/2269] Revert "usb: dwc3: gadget: skip Set/Clear Halt when invalid" commit 38317f5c0f2faae5110854f36edad810f841d62f upstream. This reverts commit ffb80fc672c3a7b6afd0cefcb1524fb99917b2f3. Turns out that commit is wrong. Host controllers are allowed to use Clear Feature HALT as means to sync data toggle between host and periperal. Cc: Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index ac8d619ff8876..b8704c0678f99 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1511,9 +1511,6 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol) unsigned transfer_in_flight; unsigned started; - if (dep->flags & DWC3_EP_STALL) - return 0; - if (dep->number > 1) trb = dwc3_ep_prev_trb(dep, dep->trb_enqueue); else @@ -1535,8 +1532,6 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol) else dep->flags |= DWC3_EP_STALL; } else { - if (!(dep->flags & DWC3_EP_STALL)) - return 0; ret = dwc3_send_clear_stall_ep_cmd(dep); if (ret) -- GitLab From d08c62878fe2040e2cbc70554f86c54895a9f708 Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Sun, 28 Oct 2018 20:18:53 -0700 Subject: [PATCH 1930/2269] iio:st_magn: Fix enable device after trigger commit fe5192ac81ad0d4dfe1395d11f393f0513c15f7f upstream. Currently, we enable the device before we enable the device trigger. At high frequencies, this can cause interrupts that don't yet have a poll function associated with them and are thus treated as spurious. At high frequencies with level interrupts, this can even cause an interrupt storm of repeated spurious interrupts (~100,000 on my Beagleboard with the LSM9DS1 magnetometer). If these repeat too much, the interrupt will get disabled and the device will stop functioning. To prevent these problems, enable the device prior to enabling the device trigger, and disable the divec prior to disabling the trigger. This means there's no window of time during which the device creates interrupts but we have no trigger to answer them. Fixes: 90efe055629 ("iio: st_sensors: harden interrupt handling") Signed-off-by: Martin Kelly Tested-by: Denis Ciocca Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/magnetometer/st_magn_buffer.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/iio/magnetometer/st_magn_buffer.c b/drivers/iio/magnetometer/st_magn_buffer.c index 0a9e8fadfa9de..37ab305664649 100644 --- a/drivers/iio/magnetometer/st_magn_buffer.c +++ b/drivers/iio/magnetometer/st_magn_buffer.c @@ -30,11 +30,6 @@ int st_magn_trig_set_state(struct iio_trigger *trig, bool state) return st_sensors_set_dataready_irq(indio_dev, state); } -static int st_magn_buffer_preenable(struct iio_dev *indio_dev) -{ - return st_sensors_set_enable(indio_dev, true); -} - static int st_magn_buffer_postenable(struct iio_dev *indio_dev) { int err; @@ -50,7 +45,7 @@ static int st_magn_buffer_postenable(struct iio_dev *indio_dev) if (err < 0) goto st_magn_buffer_postenable_error; - return err; + return st_sensors_set_enable(indio_dev, true); st_magn_buffer_postenable_error: kfree(mdata->buffer_data); @@ -63,11 +58,11 @@ static int st_magn_buffer_predisable(struct iio_dev *indio_dev) int err; struct st_sensor_data *mdata = iio_priv(indio_dev); - err = iio_triggered_buffer_predisable(indio_dev); + err = st_sensors_set_enable(indio_dev, false); if (err < 0) goto st_magn_buffer_predisable_error; - err = st_sensors_set_enable(indio_dev, false); + err = iio_triggered_buffer_predisable(indio_dev); st_magn_buffer_predisable_error: kfree(mdata->buffer_data); @@ -75,7 +70,6 @@ st_magn_buffer_predisable_error: } static const struct iio_buffer_setup_ops st_magn_buffer_setup_ops = { - .preenable = &st_magn_buffer_preenable, .postenable = &st_magn_buffer_postenable, .predisable = &st_magn_buffer_predisable, }; -- GitLab From 397dbde6149057b4b84acb37012b57801fbc9221 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 30 Nov 2018 14:09:21 -0800 Subject: [PATCH 1931/2269] lib/test_kmod.c: fix rmmod double free commit 5618cf031fecda63847cafd1091e7b8bd626cdb1 upstream. We free the misc device string twice on rmmod; fix this. Without this we cannot remove the module without crashing. Link: http://lkml.kernel.org/r/20181124050500.5257-1-mcgrof@kernel.org Signed-off-by: Luis Chamberlain Reported-by: Randy Dunlap Reviewed-by: Andrew Morton Cc: [4.12+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/test_kmod.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/test_kmod.c b/lib/test_kmod.c index 96c304fd656ad..7abb59ce6613a 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -1221,7 +1221,6 @@ void unregister_test_dev_kmod(struct kmod_test_device *test_dev) dev_info(test_dev->dev, "removing interface\n"); misc_deregister(&test_dev->misc_dev); - kfree(&test_dev->misc_dev.name); mutex_unlock(&test_dev->config_mutex); mutex_unlock(&test_dev->trigger_mutex); -- GitLab From 7c1ba1a1bb0d1a5dcebc737343f0c45840cf181d Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Fri, 30 Nov 2018 14:09:03 -0800 Subject: [PATCH 1932/2269] mm: use swp_offset as key in shmem_replace_page() commit c1cb20d43728aa9b5393bd8d489bc85c142949b2 upstream. We changed the key of swap cache tree from swp_entry_t.val to swp_offset. We need to do so in shmem_replace_page() as well. Hugh said: "shmem_replace_page() has been wrong since the day I wrote it: good enough to work on swap "type" 0, which is all most people ever use (especially those few who need shmem_replace_page() at all), but broken once there are any non-0 swp_type bits set in the higher order bits" Link: http://lkml.kernel.org/r/20181121215442.138545-1-yuzhao@google.com Fixes: f6ab1f7f6b2d ("mm, swap: use offset of swap entry as key of swap cache") Signed-off-by: Yu Zhao Reviewed-by: Matthew Wilcox Acked-by: Hugh Dickins Cc: [4.9+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/shmem.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 177fef62cbbd8..ab7ff0aeae2d3 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1532,11 +1532,13 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, { struct page *oldpage, *newpage; struct address_space *swap_mapping; + swp_entry_t entry; pgoff_t swap_index; int error; oldpage = *pagep; - swap_index = page_private(oldpage); + entry.val = page_private(oldpage); + swap_index = swp_offset(entry); swap_mapping = page_mapping(oldpage); /* @@ -1555,7 +1557,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, __SetPageLocked(newpage); __SetPageSwapBacked(newpage); SetPageUptodate(newpage); - set_page_private(newpage, swap_index); + set_page_private(newpage, entry.val); SetPageSwapCache(newpage); /* -- GitLab From 3c44b197448dfb8bef4fa9cab92b7a73153a6f76 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 26 Nov 2018 02:29:56 +0000 Subject: [PATCH 1933/2269] Drivers: hv: vmbus: check the creation_status in vmbus_establish_gpadl() commit eceb05965489784f24bbf4d61ba60e475a983016 upstream. This is a longstanding issue: if the vmbus upper-layer drivers try to consume too many GPADLs, the host may return with an error 0xC0000044 (STATUS_QUOTA_EXCEEDED), but currently we forget to check the creation_status, and hence we can pass an invalid GPADL handle into the OPEN_CHANNEL message, and get an error code 0xc0000225 in open_info->response.open_result.status, and finally we hang in vmbus_open() -> "goto error_free_info" -> vmbus_teardown_gpadl(). With this patch, we can exit gracefully on STATUS_QUOTA_EXCEEDED. Cc: Stephen Hemminger Cc: K. Y. Srinivasan Cc: Haiyang Zhang Cc: stable@vger.kernel.org Signed-off-by: Dexuan Cui Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index d96b09fea8358..e05de5032f0c2 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -454,6 +454,14 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, } wait_for_completion(&msginfo->waitevent); + if (msginfo->response.gpadl_created.creation_status != 0) { + pr_err("Failed to establish GPADL: err = 0x%x\n", + msginfo->response.gpadl_created.creation_status); + + ret = -EDQUOT; + goto cleanup; + } + if (channel->rescind) { ret = -ENODEV; goto cleanup; -- GitLab From f7d33988bd923310934c8481a2e68b53f716df13 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 14 Nov 2018 01:57:03 +0000 Subject: [PATCH 1934/2269] misc: mic/scif: fix copy-paste error in scif_create_remote_lookup commit 6484a677294aa5d08c0210f2f387ebb9be646115 upstream. gcc '-Wunused-but-set-variable' warning: drivers/misc/mic/scif/scif_rma.c: In function 'scif_create_remote_lookup': drivers/misc/mic/scif/scif_rma.c:373:25: warning: variable 'vmalloc_num_pages' set but not used [-Wunused-but-set-variable] 'vmalloc_num_pages' should be used to determine if the address is within the vmalloc range. Fixes: ba612aa8b487 ("misc: mic: SCIF memory registration and unregistration") Signed-off-by: YueHaibing Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mic/scif/scif_rma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c index 329727e00e970..95745dc4e0ecf 100644 --- a/drivers/misc/mic/scif/scif_rma.c +++ b/drivers/misc/mic/scif/scif_rma.c @@ -417,7 +417,7 @@ static int scif_create_remote_lookup(struct scif_dev *remote_dev, if (err) goto error_window; err = scif_map_page(&window->num_pages_lookup.lookup[j], - vmalloc_dma_phys ? + vmalloc_num_pages ? vmalloc_to_page(&window->num_pages[i]) : virt_to_page(&window->num_pages[i]), remote_dev); -- GitLab From fd6cc33d0775b0d902906d88dd05cc1a2a059f8d Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Tue, 6 Nov 2018 15:55:32 -0800 Subject: [PATCH 1935/2269] binder: fix race that allows malicious free of live buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7bada55ab50697861eee6bb7d60b41e68a961a9c upstream. Malicious code can attempt to free buffers using the BC_FREE_BUFFER ioctl to binder. There are protections against a user freeing a buffer while in use by the kernel, however there was a window where BC_FREE_BUFFER could be used to free a recently allocated buffer that was not completely initialized. This resulted in a use-after-free detected by KASAN with a malicious test program. This window is closed by setting the buffer's allow_user_free attribute to 0 when the buffer is allocated or when the user has previously freed it instead of waiting for the caller to set it. The problem was that when the struct buffer was recycled, allow_user_free was stale and set to 1 allowing a free to go through. Signed-off-by: Todd Kjos Acked-by: Arve Hjønnevåg Cc: stable # 4.14 Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 21 ++++++++++++--------- drivers/android/binder_alloc.c | 14 ++++++-------- drivers/android/binder_alloc.h | 3 +-- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index a86c27948fcab..96a0f940e54d9 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2918,7 +2918,6 @@ static void binder_transaction(struct binder_proc *proc, t->buffer = NULL; goto err_binder_alloc_buf_failed; } - t->buffer->allow_user_free = 0; t->buffer->debug_id = t->debug_id; t->buffer->transaction = t; t->buffer->target_node = target_node; @@ -3407,14 +3406,18 @@ static int binder_thread_write(struct binder_proc *proc, buffer = binder_alloc_prepare_to_free(&proc->alloc, data_ptr); - if (buffer == NULL) { - binder_user_error("%d:%d BC_FREE_BUFFER u%016llx no match\n", - proc->pid, thread->pid, (u64)data_ptr); - break; - } - if (!buffer->allow_user_free) { - binder_user_error("%d:%d BC_FREE_BUFFER u%016llx matched unreturned buffer\n", - proc->pid, thread->pid, (u64)data_ptr); + if (IS_ERR_OR_NULL(buffer)) { + if (PTR_ERR(buffer) == -EPERM) { + binder_user_error( + "%d:%d BC_FREE_BUFFER u%016llx matched unreturned or currently freeing buffer\n", + proc->pid, thread->pid, + (u64)data_ptr); + } else { + binder_user_error( + "%d:%d BC_FREE_BUFFER u%016llx no match\n", + proc->pid, thread->pid, + (u64)data_ptr); + } break; } binder_debug(BINDER_DEBUG_FREE_BUFFER, diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 58e4658f9dd6c..b9281f2725a6a 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -149,14 +149,12 @@ static struct binder_buffer *binder_alloc_prepare_to_free_locked( else { /* * Guard against user threads attempting to - * free the buffer twice + * free the buffer when in use by kernel or + * after it's already been freed. */ - if (buffer->free_in_progress) { - pr_err("%d:%d FREE_BUFFER u%016llx user freed buffer twice\n", - alloc->pid, current->pid, (u64)user_ptr); - return NULL; - } - buffer->free_in_progress = 1; + if (!buffer->allow_user_free) + return ERR_PTR(-EPERM); + buffer->allow_user_free = 0; return buffer; } } @@ -486,7 +484,7 @@ struct binder_buffer *binder_alloc_new_buf_locked(struct binder_alloc *alloc, rb_erase(best_fit, &alloc->free_buffers); buffer->free = 0; - buffer->free_in_progress = 0; + buffer->allow_user_free = 0; binder_insert_allocated_buffer_locked(alloc, buffer); binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, "%d: binder_alloc_buf size %zd got %pK\n", diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h index 2dd33b6df1044..a3ad7683b6f23 100644 --- a/drivers/android/binder_alloc.h +++ b/drivers/android/binder_alloc.h @@ -50,8 +50,7 @@ struct binder_buffer { unsigned free:1; unsigned allow_user_free:1; unsigned async_transaction:1; - unsigned free_in_progress:1; - unsigned debug_id:28; + unsigned debug_id:29; struct binder_transaction *transaction; -- GitLab From 14118df4e7b4738815f222f5b20fceb3957ab206 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 27 Jul 2018 19:45:36 +0200 Subject: [PATCH 1936/2269] libceph: weaken sizeof check in ceph_x_verify_authorizer_reply() commit f1d10e04637924f2b00a0fecdd2ca4565f5cfc3f upstream. Allow for extending ceph_x_authorize_reply in the future. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- net/ceph/auth_x.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 10eb759bbcb49..2bf9d9f7ddf30 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -737,8 +737,10 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, ret = ceph_x_decrypt(&au->session_key, &p, p + CEPHX_AU_ENC_BUF_LEN); if (ret < 0) return ret; - if (ret != sizeof(*reply)) - return -EPERM; + if (ret < sizeof(*reply)) { + pr_err("bad size %d for ceph_x_authorize_reply\n", ret); + return -EINVAL; + } if (au->nonce + 1 != le64_to_cpu(reply->nonce_plus_one)) ret = -EPERM; -- GitLab From 0c5f2e899241aceab46e6dcba01c6fb2223bdeb5 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 27 Jul 2018 19:40:30 +0200 Subject: [PATCH 1937/2269] libceph: check authorizer reply/challenge length before reading commit 130f52f2b203aa0aec179341916ffb2e905f3afd upstream. Avoid scribbling over memory if the received reply/challenge is larger than the buffer supplied with the authorizer. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- net/ceph/messenger.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index ad33baa2008de..f864807284d44 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1754,6 +1754,13 @@ static int read_partial_connect(struct ceph_connection *con) if (con->auth) { size = le32_to_cpu(con->in_reply.authorizer_len); + if (size > con->auth->authorizer_reply_buf_len) { + pr_err("authorizer reply too big: %d > %zu\n", size, + con->auth->authorizer_reply_buf_len); + ret = -EINVAL; + goto out; + } + end += size; ret = read_partial(con, end, size, con->auth->authorizer_reply_buf); -- GitLab From 862e7aaa90f63b1e3e3ab498dc3a3ac9a011b8d0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 27 Sep 2018 22:15:31 -0700 Subject: [PATCH 1938/2269] f2fs: fix missing up_read commit 89d13c38501df730cbb2e02c4499da1b5187119d upstream. This patch fixes missing up_read call. Fixes: c9b60788fc76 ("f2fs: fix to do sanity check with block address in main area") Cc: # 4.19+ Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/node.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 6adb6c60f017c..65de72d65562f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1399,8 +1399,10 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, } if (__is_valid_data_blkaddr(ni.blk_addr) && - !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) + !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) { + up_read(&sbi->node_write); goto redirty_out; + } if (atomic && !test_opt(sbi, NOBARRIER)) fio.op_flags |= REQ_PREFLUSH | REQ_FUA; -- GitLab From 7152401aeedd9685fcf1cb58138c868890d9164f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 5 Dec 2018 19:41:27 +0100 Subject: [PATCH 1939/2269] Linux 4.14.86 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 58a2482640906..572bd98d23444 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 85 +SUBLEVEL = 86 EXTRAVERSION = NAME = Petit Gorille -- GitLab From ca9606f24f92e0eb43893dfb55be4e8e3c119244 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 22 Nov 2017 11:51:35 -0800 Subject: [PATCH 1940/2269] UPSTREAM: crypto: chacha20 - Fix unaligned access when loading constants The four 32-bit constants for the initial state of ChaCha20 were loaded from a char array which is not guaranteed to have the needed alignment. Fix it by just assigning the constants directly instead. Signed-off-by: Eric Biggers Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu (cherry picked from commit ecf3220d882ae84844909ed6323032aac47aff93) Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: I01a70ca3762e03a0f934bcafc815e5a574362981 Signed-off-by: Eric Biggers --- crypto/chacha20_generic.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c index 4a45fa4890c0e..ec84e7837aac0 100644 --- a/crypto/chacha20_generic.c +++ b/crypto/chacha20_generic.c @@ -41,12 +41,10 @@ static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src, void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv) { - static const char constant[16] = "expand 32-byte k"; - - state[0] = le32_to_cpuvp(constant + 0); - state[1] = le32_to_cpuvp(constant + 4); - state[2] = le32_to_cpuvp(constant + 8); - state[3] = le32_to_cpuvp(constant + 12); + state[0] = 0x61707865; /* "expa" */ + state[1] = 0x3320646e; /* "nd 3" */ + state[2] = 0x79622d32; /* "2-by" */ + state[3] = 0x6b206574; /* "te k" */ state[4] = ctx->key[0]; state[5] = ctx->key[1]; state[6] = ctx->key[2]; -- GitLab From 37e76dc766539e878b7b099eaae9b489c00f3fb8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 22 Nov 2017 11:51:36 -0800 Subject: [PATCH 1941/2269] UPSTREAM: crypto: chacha20 - Use unaligned access macros when loading key and IV The generic ChaCha20 implementation has a cra_alignmask of 3, which ensures that the key passed into crypto_chacha20_setkey() and the IV passed into crypto_chacha20_init() are 4-byte aligned. However, these functions are also called from the ARM and ARM64 implementations of ChaCha20, which intentionally do not have a cra_alignmask set. This is broken because 32-bit words are being loaded from potentially-unaligned buffers without the unaligned access macros. Fix it by using the unaligned access macros when loading the key and IV. Signed-off-by: Eric Biggers Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu (cherry picked from commit dbd872a123fab81d0fa235a265c39e5ccdf735b3) Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: I1809f6ffe0b679a2554a660eaca360d0fa3b0dd1 Signed-off-by: Eric Biggers --- crypto/chacha20_generic.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c index ec84e7837aac0..b5a10ebf1b828 100644 --- a/crypto/chacha20_generic.c +++ b/crypto/chacha20_generic.c @@ -9,16 +9,12 @@ * (at your option) any later version. */ +#include #include #include #include #include -static inline u32 le32_to_cpuvp(const void *p) -{ - return le32_to_cpup(p); -} - static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src, unsigned int bytes) { @@ -53,10 +49,10 @@ void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv) state[9] = ctx->key[5]; state[10] = ctx->key[6]; state[11] = ctx->key[7]; - state[12] = le32_to_cpuvp(iv + 0); - state[13] = le32_to_cpuvp(iv + 4); - state[14] = le32_to_cpuvp(iv + 8); - state[15] = le32_to_cpuvp(iv + 12); + state[12] = get_unaligned_le32(iv + 0); + state[13] = get_unaligned_le32(iv + 4); + state[14] = get_unaligned_le32(iv + 8); + state[15] = get_unaligned_le32(iv + 12); } EXPORT_SYMBOL_GPL(crypto_chacha20_init); @@ -70,7 +66,7 @@ int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, return -EINVAL; for (i = 0; i < ARRAY_SIZE(ctx->key); i++) - ctx->key[i] = le32_to_cpuvp(key + i * sizeof(u32)); + ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32)); return 0; } -- GitLab From f7dfecb73f768fb8e475cb0fd26e87aa89cb8a63 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 22 Nov 2017 11:51:37 -0800 Subject: [PATCH 1942/2269] UPSTREAM: crypto: chacha20 - Remove cra_alignmask Now that crypto_chacha20_setkey() and crypto_chacha20_init() use the unaligned access macros and crypto_xor() also accepts unaligned buffers, there is no need to have a cra_alignmask set for chacha20-generic. Signed-off-by: Eric Biggers Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu (cherry picked from commit a1c73383c032ea467c4e766d4471edf91cf384cb) Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: Ic0ba8d0472cece16a6ca7ca2de0d679bd8f3ee32 Signed-off-by: Eric Biggers --- crypto/chacha20_generic.c | 1 - 1 file changed, 1 deletion(-) diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c index b5a10ebf1b828..bb4affbd591c3 100644 --- a/crypto/chacha20_generic.c +++ b/crypto/chacha20_generic.c @@ -105,7 +105,6 @@ static struct skcipher_alg alg = { .base.cra_priority = 100, .base.cra_blocksize = 1, .base.cra_ctxsize = sizeof(struct chacha20_ctx), - .base.cra_alignmask = sizeof(u32) - 1, .base.cra_module = THIS_MODULE, .min_keysize = CHACHA20_KEY_SIZE, -- GitLab From 6c6087bd8b1ff38e5403c27bce5518146231ae80 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 22 Nov 2017 11:51:39 -0800 Subject: [PATCH 1943/2269] UPSTREAM: crypto: chacha20 - Fix keystream alignment for chacha20_block() When chacha20_block() outputs the keystream block, it uses 'u32' stores directly. However, the callers (crypto/chacha20_generic.c and drivers/char/random.c) declare the keystream buffer as a 'u8' array, which is not guaranteed to have the needed alignment. Fix it by having both callers declare the keystream as a 'u32' array. For now this is preferable to switching over to the unaligned access macros because chacha20_block() is only being used in cases where we can easily control the alignment (stack buffers). Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu (cherry picked from commit 9f480faec58cd6197a007ea1dcac6b7c3daf1139) Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: I7557b2ca2ace0e19e97e997659857b3fa7a2b540 Signed-off-by: Eric Biggers --- crypto/chacha20_generic.c | 6 +++--- drivers/char/random.c | 24 ++++++++++++------------ include/crypto/chacha20.h | 3 ++- lib/chacha20.c | 2 +- 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c index bb4affbd591c3..e451c3cb6a56e 100644 --- a/crypto/chacha20_generic.c +++ b/crypto/chacha20_generic.c @@ -18,20 +18,20 @@ static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src, unsigned int bytes) { - u8 stream[CHACHA20_BLOCK_SIZE]; + u32 stream[CHACHA20_BLOCK_WORDS]; if (dst != src) memcpy(dst, src, bytes); while (bytes >= CHACHA20_BLOCK_SIZE) { chacha20_block(state, stream); - crypto_xor(dst, stream, CHACHA20_BLOCK_SIZE); + crypto_xor(dst, (const u8 *)stream, CHACHA20_BLOCK_SIZE); bytes -= CHACHA20_BLOCK_SIZE; dst += CHACHA20_BLOCK_SIZE; } if (bytes) { chacha20_block(state, stream); - crypto_xor(dst, stream, bytes); + crypto_xor(dst, (const u8 *)stream, bytes); } } diff --git a/drivers/char/random.c b/drivers/char/random.c index ea4dbfa306574..f04cadb7c92d2 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -433,9 +433,9 @@ static int crng_init_cnt = 0; static unsigned long crng_global_init_time = 0; #define CRNG_INIT_CNT_THRESH (2*CHACHA20_KEY_SIZE) static void _extract_crng(struct crng_state *crng, - __u8 out[CHACHA20_BLOCK_SIZE]); + __u32 out[CHACHA20_BLOCK_WORDS]); static void _crng_backtrack_protect(struct crng_state *crng, - __u8 tmp[CHACHA20_BLOCK_SIZE], int used); + __u32 tmp[CHACHA20_BLOCK_WORDS], int used); static void process_random_ready_list(void); static void _get_random_bytes(void *buf, int nbytes); @@ -911,7 +911,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) unsigned long flags; int i, num; union { - __u8 block[CHACHA20_BLOCK_SIZE]; + __u32 block[CHACHA20_BLOCK_WORDS]; __u32 key[8]; } buf; @@ -958,7 +958,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) } static void _extract_crng(struct crng_state *crng, - __u8 out[CHACHA20_BLOCK_SIZE]) + __u32 out[CHACHA20_BLOCK_WORDS]) { unsigned long v, flags; @@ -975,7 +975,7 @@ static void _extract_crng(struct crng_state *crng, spin_unlock_irqrestore(&crng->lock, flags); } -static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE]) +static void extract_crng(__u32 out[CHACHA20_BLOCK_WORDS]) { struct crng_state *crng = NULL; @@ -993,7 +993,7 @@ static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE]) * enough) to mutate the CRNG key to provide backtracking protection. */ static void _crng_backtrack_protect(struct crng_state *crng, - __u8 tmp[CHACHA20_BLOCK_SIZE], int used) + __u32 tmp[CHACHA20_BLOCK_WORDS], int used) { unsigned long flags; __u32 *s, *d; @@ -1005,14 +1005,14 @@ static void _crng_backtrack_protect(struct crng_state *crng, used = 0; } spin_lock_irqsave(&crng->lock, flags); - s = (__u32 *) &tmp[used]; + s = &tmp[used / sizeof(__u32)]; d = &crng->state[4]; for (i=0; i < 8; i++) *d++ ^= *s++; spin_unlock_irqrestore(&crng->lock, flags); } -static void crng_backtrack_protect(__u8 tmp[CHACHA20_BLOCK_SIZE], int used) +static void crng_backtrack_protect(__u32 tmp[CHACHA20_BLOCK_WORDS], int used) { struct crng_state *crng = NULL; @@ -1028,7 +1028,7 @@ static void crng_backtrack_protect(__u8 tmp[CHACHA20_BLOCK_SIZE], int used) static ssize_t extract_crng_user(void __user *buf, size_t nbytes) { ssize_t ret = 0, i = CHACHA20_BLOCK_SIZE; - __u8 tmp[CHACHA20_BLOCK_SIZE]; + __u32 tmp[CHACHA20_BLOCK_WORDS]; int large_request = (nbytes > 256); while (nbytes) { @@ -1614,7 +1614,7 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, */ static void _get_random_bytes(void *buf, int nbytes) { - __u8 tmp[CHACHA20_BLOCK_SIZE]; + __u32 tmp[CHACHA20_BLOCK_WORDS]; trace_get_random_bytes(nbytes, _RET_IP_); @@ -2224,7 +2224,7 @@ u64 get_random_u64(void) if (use_lock) read_lock_irqsave(&batched_entropy_reset_lock, flags); if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) { - extract_crng((u8 *)batch->entropy_u64); + extract_crng((__u32 *)batch->entropy_u64); batch->position = 0; } ret = batch->entropy_u64[batch->position++]; @@ -2254,7 +2254,7 @@ u32 get_random_u32(void) if (use_lock) read_lock_irqsave(&batched_entropy_reset_lock, flags); if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) { - extract_crng((u8 *)batch->entropy_u32); + extract_crng(batch->entropy_u32); batch->position = 0; } ret = batch->entropy_u32[batch->position++]; diff --git a/include/crypto/chacha20.h b/include/crypto/chacha20.h index caaa470389e0e..b83d66073db03 100644 --- a/include/crypto/chacha20.h +++ b/include/crypto/chacha20.h @@ -13,12 +13,13 @@ #define CHACHA20_IV_SIZE 16 #define CHACHA20_KEY_SIZE 32 #define CHACHA20_BLOCK_SIZE 64 +#define CHACHA20_BLOCK_WORDS (CHACHA20_BLOCK_SIZE / sizeof(u32)) struct chacha20_ctx { u32 key[8]; }; -void chacha20_block(u32 *state, void *stream); +void chacha20_block(u32 *state, u32 *stream); void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv); int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keysize); diff --git a/lib/chacha20.c b/lib/chacha20.c index 250ceed9ec9a8..29d3801dee24f 100644 --- a/lib/chacha20.c +++ b/lib/chacha20.c @@ -21,7 +21,7 @@ static inline u32 rotl32(u32 v, u8 n) return (v << n) | (v >> (sizeof(v) * 8 - n)); } -extern void chacha20_block(u32 *state, void *stream) +void chacha20_block(u32 *state, u32 *stream) { u32 x[16], *out = stream; int i; -- GitLab From ce1b79448b636a3e054942227a94c340fe762791 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 31 Dec 2017 18:02:45 -0600 Subject: [PATCH 1944/2269] UPSTREAM: crypto: chacha20 - use rol32() macro from bitops.h For chacha20_block(), use the existing 32-bit left-rotate function instead of defining one ourselves. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu (cherry picked from commit 7660b1fb367eb3723b48d3980451fc4f25a05021) Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: I694106676f58f40c20de1f47d60f873d2a7f6536 Signed-off-by: Eric Biggers --- lib/chacha20.c | 69 +++++++++++++++++++++++--------------------------- 1 file changed, 32 insertions(+), 37 deletions(-) diff --git a/lib/chacha20.c b/lib/chacha20.c index 29d3801dee24f..c1cc50fb68c9f 100644 --- a/lib/chacha20.c +++ b/lib/chacha20.c @@ -16,11 +16,6 @@ #include #include -static inline u32 rotl32(u32 v, u8 n) -{ - return (v << n) | (v >> (sizeof(v) * 8 - n)); -} - void chacha20_block(u32 *state, u32 *stream) { u32 x[16], *out = stream; @@ -30,45 +25,45 @@ void chacha20_block(u32 *state, u32 *stream) x[i] = state[i]; for (i = 0; i < 20; i += 2) { - x[0] += x[4]; x[12] = rotl32(x[12] ^ x[0], 16); - x[1] += x[5]; x[13] = rotl32(x[13] ^ x[1], 16); - x[2] += x[6]; x[14] = rotl32(x[14] ^ x[2], 16); - x[3] += x[7]; x[15] = rotl32(x[15] ^ x[3], 16); + x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16); + x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16); + x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 16); + x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 16); - x[8] += x[12]; x[4] = rotl32(x[4] ^ x[8], 12); - x[9] += x[13]; x[5] = rotl32(x[5] ^ x[9], 12); - x[10] += x[14]; x[6] = rotl32(x[6] ^ x[10], 12); - x[11] += x[15]; x[7] = rotl32(x[7] ^ x[11], 12); + x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 12); + x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 12); + x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 12); + x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 12); - x[0] += x[4]; x[12] = rotl32(x[12] ^ x[0], 8); - x[1] += x[5]; x[13] = rotl32(x[13] ^ x[1], 8); - x[2] += x[6]; x[14] = rotl32(x[14] ^ x[2], 8); - x[3] += x[7]; x[15] = rotl32(x[15] ^ x[3], 8); + x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 8); + x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 8); + x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 8); + x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 8); - x[8] += x[12]; x[4] = rotl32(x[4] ^ x[8], 7); - x[9] += x[13]; x[5] = rotl32(x[5] ^ x[9], 7); - x[10] += x[14]; x[6] = rotl32(x[6] ^ x[10], 7); - x[11] += x[15]; x[7] = rotl32(x[7] ^ x[11], 7); + x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 7); + x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 7); + x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 7); + x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 7); - x[0] += x[5]; x[15] = rotl32(x[15] ^ x[0], 16); - x[1] += x[6]; x[12] = rotl32(x[12] ^ x[1], 16); - x[2] += x[7]; x[13] = rotl32(x[13] ^ x[2], 16); - x[3] += x[4]; x[14] = rotl32(x[14] ^ x[3], 16); + x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 16); + x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 16); + x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 16); + x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 16); - x[10] += x[15]; x[5] = rotl32(x[5] ^ x[10], 12); - x[11] += x[12]; x[6] = rotl32(x[6] ^ x[11], 12); - x[8] += x[13]; x[7] = rotl32(x[7] ^ x[8], 12); - x[9] += x[14]; x[4] = rotl32(x[4] ^ x[9], 12); + x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 12); + x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 12); + x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 12); + x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 12); - x[0] += x[5]; x[15] = rotl32(x[15] ^ x[0], 8); - x[1] += x[6]; x[12] = rotl32(x[12] ^ x[1], 8); - x[2] += x[7]; x[13] = rotl32(x[13] ^ x[2], 8); - x[3] += x[4]; x[14] = rotl32(x[14] ^ x[3], 8); + x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 8); + x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 8); + x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 8); + x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 8); - x[10] += x[15]; x[5] = rotl32(x[5] ^ x[10], 7); - x[11] += x[12]; x[6] = rotl32(x[6] ^ x[11], 7); - x[8] += x[13]; x[7] = rotl32(x[7] ^ x[8], 7); - x[9] += x[14]; x[4] = rotl32(x[4] ^ x[9], 7); + x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 7); + x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 7); + x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 7); + x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 7); } for (i = 0; i < ARRAY_SIZE(x); i++) -- GitLab From e72d2307f3131015411da56a3c35b6ef42f879bd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 11 Sep 2018 20:05:10 -0700 Subject: [PATCH 1945/2269] UPSTREAM: crypto: chacha20 - Fix chacha20_block() keystream alignment (again) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 9f480faec58c ("crypto: chacha20 - Fix keystream alignment for chacha20_block()"), I had missed that chacha20_block() can be called directly on the buffer passed to get_random_bytes(), which can have any alignment. So, while my commit didn't break anything, it didn't fully solve the alignment problems. Revert my solution and just update chacha20_block() to use put_unaligned_le32(), so the output buffer need not be aligned. This is simpler, and on many CPUs it's the same speed. But, I kept the 'tmp' buffers in extract_crng_user() and _get_random_bytes() 4-byte aligned, since that alignment is actually needed for _crng_backtrack_protect() too. Reported-by: Stephan Müller Cc: Theodore Ts'o Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu (cherry picked from commit a5e9f557098e54af44ade5d501379be18435bfbf) Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: Ic355d2416330ae2f4a50cb7064633810e35a93bf Signed-off-by: Eric Biggers --- crypto/chacha20_generic.c | 7 ++++--- drivers/char/random.c | 24 ++++++++++++------------ include/crypto/chacha20.h | 3 +-- lib/chacha20.c | 6 +++--- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c index e451c3cb6a56e..3ae96587caf9a 100644 --- a/crypto/chacha20_generic.c +++ b/crypto/chacha20_generic.c @@ -18,20 +18,21 @@ static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src, unsigned int bytes) { - u32 stream[CHACHA20_BLOCK_WORDS]; + /* aligned to potentially speed up crypto_xor() */ + u8 stream[CHACHA20_BLOCK_SIZE] __aligned(sizeof(long)); if (dst != src) memcpy(dst, src, bytes); while (bytes >= CHACHA20_BLOCK_SIZE) { chacha20_block(state, stream); - crypto_xor(dst, (const u8 *)stream, CHACHA20_BLOCK_SIZE); + crypto_xor(dst, stream, CHACHA20_BLOCK_SIZE); bytes -= CHACHA20_BLOCK_SIZE; dst += CHACHA20_BLOCK_SIZE; } if (bytes) { chacha20_block(state, stream); - crypto_xor(dst, (const u8 *)stream, bytes); + crypto_xor(dst, stream, bytes); } } diff --git a/drivers/char/random.c b/drivers/char/random.c index f04cadb7c92d2..817a8227d1db4 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -433,9 +433,9 @@ static int crng_init_cnt = 0; static unsigned long crng_global_init_time = 0; #define CRNG_INIT_CNT_THRESH (2*CHACHA20_KEY_SIZE) static void _extract_crng(struct crng_state *crng, - __u32 out[CHACHA20_BLOCK_WORDS]); + __u8 out[CHACHA20_BLOCK_SIZE]); static void _crng_backtrack_protect(struct crng_state *crng, - __u32 tmp[CHACHA20_BLOCK_WORDS], int used); + __u8 tmp[CHACHA20_BLOCK_SIZE], int used); static void process_random_ready_list(void); static void _get_random_bytes(void *buf, int nbytes); @@ -911,7 +911,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) unsigned long flags; int i, num; union { - __u32 block[CHACHA20_BLOCK_WORDS]; + __u8 block[CHACHA20_BLOCK_SIZE]; __u32 key[8]; } buf; @@ -958,7 +958,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) } static void _extract_crng(struct crng_state *crng, - __u32 out[CHACHA20_BLOCK_WORDS]) + __u8 out[CHACHA20_BLOCK_SIZE]) { unsigned long v, flags; @@ -975,7 +975,7 @@ static void _extract_crng(struct crng_state *crng, spin_unlock_irqrestore(&crng->lock, flags); } -static void extract_crng(__u32 out[CHACHA20_BLOCK_WORDS]) +static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE]) { struct crng_state *crng = NULL; @@ -993,7 +993,7 @@ static void extract_crng(__u32 out[CHACHA20_BLOCK_WORDS]) * enough) to mutate the CRNG key to provide backtracking protection. */ static void _crng_backtrack_protect(struct crng_state *crng, - __u32 tmp[CHACHA20_BLOCK_WORDS], int used) + __u8 tmp[CHACHA20_BLOCK_SIZE], int used) { unsigned long flags; __u32 *s, *d; @@ -1005,14 +1005,14 @@ static void _crng_backtrack_protect(struct crng_state *crng, used = 0; } spin_lock_irqsave(&crng->lock, flags); - s = &tmp[used / sizeof(__u32)]; + s = (__u32 *) &tmp[used]; d = &crng->state[4]; for (i=0; i < 8; i++) *d++ ^= *s++; spin_unlock_irqrestore(&crng->lock, flags); } -static void crng_backtrack_protect(__u32 tmp[CHACHA20_BLOCK_WORDS], int used) +static void crng_backtrack_protect(__u8 tmp[CHACHA20_BLOCK_SIZE], int used) { struct crng_state *crng = NULL; @@ -1028,7 +1028,7 @@ static void crng_backtrack_protect(__u32 tmp[CHACHA20_BLOCK_WORDS], int used) static ssize_t extract_crng_user(void __user *buf, size_t nbytes) { ssize_t ret = 0, i = CHACHA20_BLOCK_SIZE; - __u32 tmp[CHACHA20_BLOCK_WORDS]; + __u8 tmp[CHACHA20_BLOCK_SIZE] __aligned(4); int large_request = (nbytes > 256); while (nbytes) { @@ -1614,7 +1614,7 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, */ static void _get_random_bytes(void *buf, int nbytes) { - __u32 tmp[CHACHA20_BLOCK_WORDS]; + __u8 tmp[CHACHA20_BLOCK_SIZE] __aligned(4); trace_get_random_bytes(nbytes, _RET_IP_); @@ -2224,7 +2224,7 @@ u64 get_random_u64(void) if (use_lock) read_lock_irqsave(&batched_entropy_reset_lock, flags); if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) { - extract_crng((__u32 *)batch->entropy_u64); + extract_crng((u8 *)batch->entropy_u64); batch->position = 0; } ret = batch->entropy_u64[batch->position++]; @@ -2254,7 +2254,7 @@ u32 get_random_u32(void) if (use_lock) read_lock_irqsave(&batched_entropy_reset_lock, flags); if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) { - extract_crng(batch->entropy_u32); + extract_crng((u8 *)batch->entropy_u32); batch->position = 0; } ret = batch->entropy_u32[batch->position++]; diff --git a/include/crypto/chacha20.h b/include/crypto/chacha20.h index b83d66073db03..f76302d99e2be 100644 --- a/include/crypto/chacha20.h +++ b/include/crypto/chacha20.h @@ -13,13 +13,12 @@ #define CHACHA20_IV_SIZE 16 #define CHACHA20_KEY_SIZE 32 #define CHACHA20_BLOCK_SIZE 64 -#define CHACHA20_BLOCK_WORDS (CHACHA20_BLOCK_SIZE / sizeof(u32)) struct chacha20_ctx { u32 key[8]; }; -void chacha20_block(u32 *state, u32 *stream); +void chacha20_block(u32 *state, u8 *stream); void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv); int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keysize); diff --git a/lib/chacha20.c b/lib/chacha20.c index c1cc50fb68c9f..d907fec6a9ed1 100644 --- a/lib/chacha20.c +++ b/lib/chacha20.c @@ -16,9 +16,9 @@ #include #include -void chacha20_block(u32 *state, u32 *stream) +void chacha20_block(u32 *state, u8 *stream) { - u32 x[16], *out = stream; + u32 x[16]; int i; for (i = 0; i < ARRAY_SIZE(x); i++) @@ -67,7 +67,7 @@ void chacha20_block(u32 *state, u32 *stream) } for (i = 0; i < ARRAY_SIZE(x); i++) - out[i] = cpu_to_le32(x[i] + state[i]); + put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]); state[12]++; } -- GitLab From 4cf145fc1fdb5a0cd1d9761d60751ac79e4b5994 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 29 Dec 2017 10:10:24 -0600 Subject: [PATCH 1946/2269] UPSTREAM: crypto: poly1305 - use unaligned access macros to output digest Currently the only part of poly1305-generic which is assuming special alignment is the part where the final digest is written. Switch this over to the unaligned access macros so that we'll be able to remove the cra_alignmask. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu (cherry picked from commit fcfbeedf79adc7abaea35b0f88ec23cf546d3b77) Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: Id6347cebf899e2aef6fab355f3af3d2773582127 Signed-off-by: Eric Biggers --- crypto/poly1305_generic.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c index ba39eb308c791..38a13ae0f2a34 100644 --- a/crypto/poly1305_generic.c +++ b/crypto/poly1305_generic.c @@ -204,7 +204,6 @@ EXPORT_SYMBOL_GPL(crypto_poly1305_update); int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) { struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - __le32 *mac = (__le32 *)dst; u32 h0, h1, h2, h3, h4; u32 g0, g1, g2, g3, g4; u32 mask; @@ -261,10 +260,10 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) h3 = (h3 >> 18) | (h4 << 8); /* mac = (h + s) % (2^128) */ - f = (f >> 32) + h0 + dctx->s[0]; mac[0] = cpu_to_le32(f); - f = (f >> 32) + h1 + dctx->s[1]; mac[1] = cpu_to_le32(f); - f = (f >> 32) + h2 + dctx->s[2]; mac[2] = cpu_to_le32(f); - f = (f >> 32) + h3 + dctx->s[3]; mac[3] = cpu_to_le32(f); + f = (f >> 32) + h0 + dctx->s[0]; put_unaligned_le32(f, dst + 0); + f = (f >> 32) + h1 + dctx->s[1]; put_unaligned_le32(f, dst + 4); + f = (f >> 32) + h2 + dctx->s[2]; put_unaligned_le32(f, dst + 8); + f = (f >> 32) + h3 + dctx->s[3]; put_unaligned_le32(f, dst + 12); return 0; } -- GitLab From e5577b7a50f1c7c002a05082d2a650adf6baa8bf Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 24 Jul 2018 18:29:07 -0700 Subject: [PATCH 1947/2269] UPSTREAM: crypto: arm/chacha20 - always use vrev for 16-bit rotates The 4-way ChaCha20 NEON code implements 16-bit rotates with vrev32.16, but the one-way code (used on remainder blocks) implements it with vshl + vsri, which is slower. Switch the one-way code to vrev32.16 too. Signed-off-by: Eric Biggers Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu (cherry picked from commit 4e34e51f48ab7f77a4022aa810a786daa3eb3e22) Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: If6f8ea9545aa6ed0478e812d6e17400c186b003b Signed-off-by: Eric Biggers --- arch/arm/crypto/chacha20-neon-core.S | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/arm/crypto/chacha20-neon-core.S b/arch/arm/crypto/chacha20-neon-core.S index 3fecb2124c35a..451a849ad5186 100644 --- a/arch/arm/crypto/chacha20-neon-core.S +++ b/arch/arm/crypto/chacha20-neon-core.S @@ -51,9 +51,8 @@ ENTRY(chacha20_block_xor_neon) .Ldoubleround: // x0 += x1, x3 = rotl32(x3 ^ x0, 16) vadd.i32 q0, q0, q1 - veor q4, q3, q0 - vshl.u32 q3, q4, #16 - vsri.u32 q3, q4, #16 + veor q3, q3, q0 + vrev32.16 q3, q3 // x2 += x3, x1 = rotl32(x1 ^ x2, 12) vadd.i32 q2, q2, q3 @@ -82,9 +81,8 @@ ENTRY(chacha20_block_xor_neon) // x0 += x1, x3 = rotl32(x3 ^ x0, 16) vadd.i32 q0, q0, q1 - veor q4, q3, q0 - vshl.u32 q3, q4, #16 - vsri.u32 q3, q4, #16 + veor q3, q3, q0 + vrev32.16 q3, q3 // x2 += x3, x1 = rotl32(x1 ^ x2, 12) vadd.i32 q2, q2, q3 -- GitLab From cead285da698a148cb4995c193d06ffdfc09e8cc Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 1 Sep 2018 00:17:07 -0700 Subject: [PATCH 1948/2269] UPSTREAM: crypto: arm/chacha20 - faster 8-bit rotations and other optimizations Optimize ChaCha20 NEON performance by: - Implementing the 8-bit rotations using the 'vtbl.8' instruction. - Streamlining the part that adds the original state and XORs the data. - Making some other small tweaks. On ARM Cortex-A7, these optimizations improve ChaCha20 performance from about 12.08 cycles per byte to about 11.37 -- a 5.9% improvement. There is a tradeoff involved with the 'vtbl.8' rotation method since there is at least one CPU (Cortex-A53) where it's not fastest. But it seems to be a better default; see the added comment. Overall, this patch reduces Cortex-A53 performance by less than 0.5%. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu (cherry picked from commit a1b22a5f45fe884147a99e7c381bcc48d9b2acef) Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: Id7d26e6079cee50111f6d9616459547c60e6cb3e Signed-off-by: Eric Biggers --- arch/arm/crypto/chacha20-neon-core.S | 277 ++++++++++++++------------- 1 file changed, 143 insertions(+), 134 deletions(-) diff --git a/arch/arm/crypto/chacha20-neon-core.S b/arch/arm/crypto/chacha20-neon-core.S index 451a849ad5186..50e7b98968189 100644 --- a/arch/arm/crypto/chacha20-neon-core.S +++ b/arch/arm/crypto/chacha20-neon-core.S @@ -18,6 +18,34 @@ * (at your option) any later version. */ + /* + * NEON doesn't have a rotate instruction. The alternatives are, more or less: + * + * (a) vshl.u32 + vsri.u32 (needs temporary register) + * (b) vshl.u32 + vshr.u32 + vorr (needs temporary register) + * (c) vrev32.16 (16-bit rotations only) + * (d) vtbl.8 + vtbl.8 (multiple of 8 bits rotations only, + * needs index vector) + * + * ChaCha20 has 16, 12, 8, and 7-bit rotations. For the 12 and 7-bit + * rotations, the only choices are (a) and (b). We use (a) since it takes + * two-thirds the cycles of (b) on both Cortex-A7 and Cortex-A53. + * + * For the 16-bit rotation, we use vrev32.16 since it's consistently fastest + * and doesn't need a temporary register. + * + * For the 8-bit rotation, we use vtbl.8 + vtbl.8. On Cortex-A7, this sequence + * is twice as fast as (a), even when doing (a) on multiple registers + * simultaneously to eliminate the stall between vshl and vsri. Also, it + * parallelizes better when temporary registers are scarce. + * + * A disadvantage is that on Cortex-A53, the vtbl sequence is the same speed as + * (a), so the need to load the rotation table actually makes the vtbl method + * slightly slower overall on that CPU (~1.3% slower ChaCha20). Still, it + * seems to be a good compromise to get a more significant speed boost on some + * CPUs, e.g. ~4.8% faster ChaCha20 on Cortex-A7. + */ + #include .text @@ -46,7 +74,9 @@ ENTRY(chacha20_block_xor_neon) vmov q10, q2 vmov q11, q3 + adr ip, .Lrol8_table mov r3, #10 + vld1.8 {d10}, [ip, :64] .Ldoubleround: // x0 += x1, x3 = rotl32(x3 ^ x0, 16) @@ -62,9 +92,9 @@ ENTRY(chacha20_block_xor_neon) // x0 += x1, x3 = rotl32(x3 ^ x0, 8) vadd.i32 q0, q0, q1 - veor q4, q3, q0 - vshl.u32 q3, q4, #8 - vsri.u32 q3, q4, #24 + veor q3, q3, q0 + vtbl.8 d6, {d6}, d10 + vtbl.8 d7, {d7}, d10 // x2 += x3, x1 = rotl32(x1 ^ x2, 7) vadd.i32 q2, q2, q3 @@ -92,9 +122,9 @@ ENTRY(chacha20_block_xor_neon) // x0 += x1, x3 = rotl32(x3 ^ x0, 8) vadd.i32 q0, q0, q1 - veor q4, q3, q0 - vshl.u32 q3, q4, #8 - vsri.u32 q3, q4, #24 + veor q3, q3, q0 + vtbl.8 d6, {d6}, d10 + vtbl.8 d7, {d7}, d10 // x2 += x3, x1 = rotl32(x1 ^ x2, 7) vadd.i32 q2, q2, q3 @@ -139,13 +169,17 @@ ENTRY(chacha20_block_xor_neon) bx lr ENDPROC(chacha20_block_xor_neon) + .align 4 +.Lctrinc: .word 0, 1, 2, 3 +.Lrol8_table: .byte 3, 0, 1, 2, 7, 4, 5, 6 + .align 5 ENTRY(chacha20_4block_xor_neon) - push {r4-r6, lr} - mov ip, sp // preserve the stack pointer - sub r3, sp, #0x20 // allocate a 32 byte buffer - bic r3, r3, #0x1f // aligned to 32 bytes - mov sp, r3 + push {r4-r5} + mov r4, sp // preserve the stack pointer + sub ip, sp, #0x20 // allocate a 32 byte buffer + bic ip, ip, #0x1f // aligned to 32 bytes + mov sp, ip // r0: Input state matrix, s // r1: 4 data blocks output, o @@ -155,25 +189,24 @@ ENTRY(chacha20_4block_xor_neon) // This function encrypts four consecutive ChaCha20 blocks by loading // the state matrix in NEON registers four times. The algorithm performs // each operation on the corresponding word of each state matrix, hence - // requires no word shuffling. For final XORing step we transpose the - // matrix by interleaving 32- and then 64-bit words, which allows us to - // do XOR in NEON registers. + // requires no word shuffling. The words are re-interleaved before the + // final addition of the original state and the XORing step. // - // x0..15[0-3] = s0..3[0..3] - add r3, r0, #0x20 + // x0..15[0-3] = s0..15[0-3] + add ip, r0, #0x20 vld1.32 {q0-q1}, [r0] - vld1.32 {q2-q3}, [r3] + vld1.32 {q2-q3}, [ip] - adr r3, CTRINC + adr r5, .Lctrinc vdup.32 q15, d7[1] vdup.32 q14, d7[0] - vld1.32 {q11}, [r3, :128] + vld1.32 {q4}, [r5, :128] vdup.32 q13, d6[1] vdup.32 q12, d6[0] - vadd.i32 q12, q12, q11 // x12 += counter values 0-3 vdup.32 q11, d5[1] vdup.32 q10, d5[0] + vadd.u32 q12, q12, q4 // x12 += counter values 0-3 vdup.32 q9, d4[1] vdup.32 q8, d4[0] vdup.32 q7, d3[1] @@ -185,9 +218,13 @@ ENTRY(chacha20_4block_xor_neon) vdup.32 q1, d0[1] vdup.32 q0, d0[0] + adr ip, .Lrol8_table mov r3, #10 + b 1f .Ldoubleround4: + vld1.32 {q8-q9}, [sp, :256] +1: // x0 += x4, x12 = rotl32(x12 ^ x0, 16) // x1 += x5, x13 = rotl32(x13 ^ x1, 16) // x2 += x6, x14 = rotl32(x14 ^ x2, 16) @@ -236,24 +273,25 @@ ENTRY(chacha20_4block_xor_neon) // x1 += x5, x13 = rotl32(x13 ^ x1, 8) // x2 += x6, x14 = rotl32(x14 ^ x2, 8) // x3 += x7, x15 = rotl32(x15 ^ x3, 8) + vld1.8 {d16}, [ip, :64] vadd.i32 q0, q0, q4 vadd.i32 q1, q1, q5 vadd.i32 q2, q2, q6 vadd.i32 q3, q3, q7 - veor q8, q12, q0 - veor q9, q13, q1 - vshl.u32 q12, q8, #8 - vshl.u32 q13, q9, #8 - vsri.u32 q12, q8, #24 - vsri.u32 q13, q9, #24 + veor q12, q12, q0 + veor q13, q13, q1 + veor q14, q14, q2 + veor q15, q15, q3 - veor q8, q14, q2 - veor q9, q15, q3 - vshl.u32 q14, q8, #8 - vshl.u32 q15, q9, #8 - vsri.u32 q14, q8, #24 - vsri.u32 q15, q9, #24 + vtbl.8 d24, {d24}, d16 + vtbl.8 d25, {d25}, d16 + vtbl.8 d26, {d26}, d16 + vtbl.8 d27, {d27}, d16 + vtbl.8 d28, {d28}, d16 + vtbl.8 d29, {d29}, d16 + vtbl.8 d30, {d30}, d16 + vtbl.8 d31, {d31}, d16 vld1.32 {q8-q9}, [sp, :256] @@ -332,24 +370,25 @@ ENTRY(chacha20_4block_xor_neon) // x1 += x6, x12 = rotl32(x12 ^ x1, 8) // x2 += x7, x13 = rotl32(x13 ^ x2, 8) // x3 += x4, x14 = rotl32(x14 ^ x3, 8) + vld1.8 {d16}, [ip, :64] vadd.i32 q0, q0, q5 vadd.i32 q1, q1, q6 vadd.i32 q2, q2, q7 vadd.i32 q3, q3, q4 - veor q8, q15, q0 - veor q9, q12, q1 - vshl.u32 q15, q8, #8 - vshl.u32 q12, q9, #8 - vsri.u32 q15, q8, #24 - vsri.u32 q12, q9, #24 + veor q15, q15, q0 + veor q12, q12, q1 + veor q13, q13, q2 + veor q14, q14, q3 - veor q8, q13, q2 - veor q9, q14, q3 - vshl.u32 q13, q8, #8 - vshl.u32 q14, q9, #8 - vsri.u32 q13, q8, #24 - vsri.u32 q14, q9, #24 + vtbl.8 d30, {d30}, d16 + vtbl.8 d31, {d31}, d16 + vtbl.8 d24, {d24}, d16 + vtbl.8 d25, {d25}, d16 + vtbl.8 d26, {d26}, d16 + vtbl.8 d27, {d27}, d16 + vtbl.8 d28, {d28}, d16 + vtbl.8 d29, {d29}, d16 vld1.32 {q8-q9}, [sp, :256] @@ -379,104 +418,76 @@ ENTRY(chacha20_4block_xor_neon) vsri.u32 q6, q9, #25 subs r3, r3, #1 - beq 0f - - vld1.32 {q8-q9}, [sp, :256] - b .Ldoubleround4 - - // x0[0-3] += s0[0] - // x1[0-3] += s0[1] - // x2[0-3] += s0[2] - // x3[0-3] += s0[3] -0: ldmia r0!, {r3-r6} - vdup.32 q8, r3 - vdup.32 q9, r4 - vadd.i32 q0, q0, q8 - vadd.i32 q1, q1, q9 - vdup.32 q8, r5 - vdup.32 q9, r6 - vadd.i32 q2, q2, q8 - vadd.i32 q3, q3, q9 - - // x4[0-3] += s1[0] - // x5[0-3] += s1[1] - // x6[0-3] += s1[2] - // x7[0-3] += s1[3] - ldmia r0!, {r3-r6} - vdup.32 q8, r3 - vdup.32 q9, r4 - vadd.i32 q4, q4, q8 - vadd.i32 q5, q5, q9 - vdup.32 q8, r5 - vdup.32 q9, r6 - vadd.i32 q6, q6, q8 - vadd.i32 q7, q7, q9 - - // interleave 32-bit words in state n, n+1 - vzip.32 q0, q1 - vzip.32 q2, q3 - vzip.32 q4, q5 - vzip.32 q6, q7 - - // interleave 64-bit words in state n, n+2 + bne .Ldoubleround4 + + // x0..7[0-3] are in q0-q7, x10..15[0-3] are in q10-q15. + // x8..9[0-3] are on the stack. + + // Re-interleave the words in the first two rows of each block (x0..7). + // Also add the counter values 0-3 to x12[0-3]. + vld1.32 {q8}, [r5, :128] // load counter values 0-3 + vzip.32 q0, q1 // => (0 1 0 1) (0 1 0 1) + vzip.32 q2, q3 // => (2 3 2 3) (2 3 2 3) + vzip.32 q4, q5 // => (4 5 4 5) (4 5 4 5) + vzip.32 q6, q7 // => (6 7 6 7) (6 7 6 7) + vadd.u32 q12, q8 // x12 += counter values 0-3 vswp d1, d4 vswp d3, d6 + vld1.32 {q8-q9}, [r0]! // load s0..7 vswp d9, d12 vswp d11, d14 - // xor with corresponding input, write to output + // Swap q1 and q4 so that we'll free up consecutive registers (q0-q1) + // after XORing the first 32 bytes. + vswp q1, q4 + + // First two rows of each block are (q0 q1) (q2 q6) (q4 q5) (q3 q7) + + // x0..3[0-3] += s0..3[0-3] (add orig state to 1st row of each block) + vadd.u32 q0, q0, q8 + vadd.u32 q2, q2, q8 + vadd.u32 q4, q4, q8 + vadd.u32 q3, q3, q8 + + // x4..7[0-3] += s4..7[0-3] (add orig state to 2nd row of each block) + vadd.u32 q1, q1, q9 + vadd.u32 q6, q6, q9 + vadd.u32 q5, q5, q9 + vadd.u32 q7, q7, q9 + + // XOR first 32 bytes using keystream from first two rows of first block vld1.8 {q8-q9}, [r2]! veor q8, q8, q0 - veor q9, q9, q4 + veor q9, q9, q1 vst1.8 {q8-q9}, [r1]! + // Re-interleave the words in the last two rows of each block (x8..15). vld1.32 {q8-q9}, [sp, :256] - - // x8[0-3] += s2[0] - // x9[0-3] += s2[1] - // x10[0-3] += s2[2] - // x11[0-3] += s2[3] - ldmia r0!, {r3-r6} - vdup.32 q0, r3 - vdup.32 q4, r4 - vadd.i32 q8, q8, q0 - vadd.i32 q9, q9, q4 - vdup.32 q0, r5 - vdup.32 q4, r6 - vadd.i32 q10, q10, q0 - vadd.i32 q11, q11, q4 - - // x12[0-3] += s3[0] - // x13[0-3] += s3[1] - // x14[0-3] += s3[2] - // x15[0-3] += s3[3] - ldmia r0!, {r3-r6} - vdup.32 q0, r3 - vdup.32 q4, r4 - adr r3, CTRINC - vadd.i32 q12, q12, q0 - vld1.32 {q0}, [r3, :128] - vadd.i32 q13, q13, q4 - vadd.i32 q12, q12, q0 // x12 += counter values 0-3 - - vdup.32 q0, r5 - vdup.32 q4, r6 - vadd.i32 q14, q14, q0 - vadd.i32 q15, q15, q4 - - // interleave 32-bit words in state n, n+1 - vzip.32 q8, q9 - vzip.32 q10, q11 - vzip.32 q12, q13 - vzip.32 q14, q15 - - // interleave 64-bit words in state n, n+2 - vswp d17, d20 - vswp d19, d22 + vzip.32 q12, q13 // => (12 13 12 13) (12 13 12 13) + vzip.32 q14, q15 // => (14 15 14 15) (14 15 14 15) + vzip.32 q8, q9 // => (8 9 8 9) (8 9 8 9) + vzip.32 q10, q11 // => (10 11 10 11) (10 11 10 11) + vld1.32 {q0-q1}, [r0] // load s8..15 vswp d25, d28 vswp d27, d30 + vswp d17, d20 + vswp d19, d22 + + // Last two rows of each block are (q8 q12) (q10 q14) (q9 q13) (q11 q15) + + // x8..11[0-3] += s8..11[0-3] (add orig state to 3rd row of each block) + vadd.u32 q8, q8, q0 + vadd.u32 q10, q10, q0 + vadd.u32 q9, q9, q0 + vadd.u32 q11, q11, q0 + + // x12..15[0-3] += s12..15[0-3] (add orig state to 4th row of each block) + vadd.u32 q12, q12, q1 + vadd.u32 q14, q14, q1 + vadd.u32 q13, q13, q1 + vadd.u32 q15, q15, q1 - vmov q4, q1 + // XOR the rest of the data with the keystream vld1.8 {q0-q1}, [r2]! veor q0, q0, q8 @@ -509,13 +520,11 @@ ENTRY(chacha20_4block_xor_neon) vst1.8 {q0-q1}, [r1]! vld1.8 {q0-q1}, [r2] + mov sp, r4 // restore original stack pointer veor q0, q0, q11 veor q1, q1, q15 vst1.8 {q0-q1}, [r1] - mov sp, ip - pop {r4-r6, pc} + pop {r4-r5} + bx lr ENDPROC(chacha20_4block_xor_neon) - - .align 4 -CTRINC: .word 0, 1, 2, 3 -- GitLab From 7fd53dd7e0ad763845feeaec582f784a22fb6e58 Mon Sep 17 00:00:00 2001 From: Jinbum Park Date: Mon, 12 Feb 2018 22:52:37 +0900 Subject: [PATCH 1949/2269] UPSTREAM: crypto: arm/aes-cipher - move S-box to .rodata section Move the AES inverse S-box to the .rodata section where it is safe from abuse by speculation. Signed-off-by: Jinbum Park Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu (cherry picked from commit 4ff8b1dd814ba4c2dc4a8ce3cf77274e01bd1c93) Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: I87fb974fd13a7c8290b034412a88c36cf5dd94b7 Signed-off-by: Eric Biggers --- arch/arm/crypto/aes-cipher-core.S | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S index 54b384084637b..184d6c2d15d5e 100644 --- a/arch/arm/crypto/aes-cipher-core.S +++ b/arch/arm/crypto/aes-cipher-core.S @@ -174,6 +174,16 @@ .ltorg .endm +ENTRY(__aes_arm_encrypt) + do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2 +ENDPROC(__aes_arm_encrypt) + + .align 5 +ENTRY(__aes_arm_decrypt) + do_crypt iround, crypto_it_tab, __aes_arm_inverse_sbox, 0 +ENDPROC(__aes_arm_decrypt) + + .section ".rodata", "a" .align L1_CACHE_SHIFT .type __aes_arm_inverse_sbox, %object __aes_arm_inverse_sbox: @@ -210,12 +220,3 @@ __aes_arm_inverse_sbox: .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d .size __aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox - -ENTRY(__aes_arm_encrypt) - do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2 -ENDPROC(__aes_arm_encrypt) - - .align 5 -ENTRY(__aes_arm_decrypt) - do_crypt iround, crypto_it_tab, __aes_arm_inverse_sbox, 0 -ENDPROC(__aes_arm_decrypt) -- GitLab From 14613fb1e0b0141c5c3869ff3191a15df7132916 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 17 Oct 2018 21:37:59 -0700 Subject: [PATCH 1950/2269] FROMGIT: crypto: arm/aes - add some hardening against cache-timing attacks Make the ARM scalar AES implementation closer to constant-time by disabling interrupts and prefetching the tables into L1 cache. This is feasible because due to ARM's "free" rotations, the main tables are only 1024 bytes instead of the usual 4096 used by most AES implementations. On ARM Cortex-A7, the speed loss is only about 5%. The resulting code is still over twice as fast as aes_ti.c. Responsiveness is potentially a concern, but interrupts are only disabled for a single AES block. Note that even after these changes, the implementation still isn't necessarily guaranteed to be constant-time; see https://cr.yp.to/antiforgery/cachetiming-20050414.pdf for a discussion of the many difficulties involved in writing truly constant-time AES software. But it's valuable to make such attacks more difficult. Much of this patch is based on patches suggested by Ard Biesheuvel. Suggested-by: Ard Biesheuvel Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel Signed-off-by: Herbert Xu (cherry picked from commit 913a3aa07d16e5b302f408d497a4b829910de247 https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master) Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: I453a7b71c3bb0051106b37cdb71d4511fd4e388a Signed-off-by: Eric Biggers --- arch/arm/crypto/Kconfig | 9 +++++ arch/arm/crypto/aes-cipher-core.S | 62 ++++++++++++++++++++++++++----- crypto/aes_generic.c | 9 +++-- 3 files changed, 66 insertions(+), 14 deletions(-) diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index b8e69fe282b8d..13f7a5c1fe3f9 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -69,6 +69,15 @@ config CRYPTO_AES_ARM help Use optimized AES assembler routines for ARM platforms. + On ARM processors without the Crypto Extensions, this is the + fastest AES implementation for single blocks. For multiple + blocks, the NEON bit-sliced implementation is usually faster. + + This implementation may be vulnerable to cache timing attacks, + since it uses lookup tables. However, as countermeasures it + disables IRQs and preloads the tables; it is hoped this makes + such attacks very difficult. + config CRYPTO_AES_ARM_BS tristate "Bit sliced AES using NEON instructions" depends on KERNEL_MODE_NEON diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S index 184d6c2d15d5e..f2d67c095e596 100644 --- a/arch/arm/crypto/aes-cipher-core.S +++ b/arch/arm/crypto/aes-cipher-core.S @@ -10,6 +10,7 @@ */ #include +#include #include .text @@ -41,7 +42,7 @@ .endif .endm - .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op + .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr __select \out0, \in0, 0 __select t0, \in1, 1 __load \out0, \out0, 0, \sz, \op @@ -73,6 +74,14 @@ __load t0, t0, 3, \sz, \op __load \t4, \t4, 3, \sz, \op + .ifnb \oldcpsr + /* + * This is the final round and we're done with all data-dependent table + * lookups, so we can safely re-enable interrupts. + */ + restore_irqs \oldcpsr + .endif + eor \out1, \out1, t1, ror #24 eor \out0, \out0, t2, ror #16 ldm rk!, {t1, t2} @@ -83,14 +92,14 @@ eor \out1, \out1, t2 .endm - .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op + .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op - __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op + __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr .endm - .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op + .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op - __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op + __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr .endm .macro __rev, out, in @@ -118,13 +127,14 @@ .macro do_crypt, round, ttab, ltab, bsz push {r3-r11, lr} + // Load keys first, to reduce latency in case they're not cached yet. + ldm rk!, {r8-r11} + ldr r4, [in] ldr r5, [in, #4] ldr r6, [in, #8] ldr r7, [in, #12] - ldm rk!, {r8-r11} - #ifdef CONFIG_CPU_BIG_ENDIAN __rev r4, r4 __rev r5, r5 @@ -138,6 +148,25 @@ eor r7, r7, r11 __adrl ttab, \ttab + /* + * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into + * L1 cache, assuming cacheline size >= 32. This is a hardening measure + * intended to make cache-timing attacks more difficult. They may not + * be fully prevented, however; see the paper + * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf + * ("Cache-timing attacks on AES") for a discussion of the many + * difficulties involved in writing truly constant-time AES software. + */ + save_and_disable_irqs t0 + .set i, 0 + .rept 1024 / 128 + ldr r8, [ttab, #i + 0] + ldr r9, [ttab, #i + 32] + ldr r10, [ttab, #i + 64] + ldr r11, [ttab, #i + 96] + .set i, i + 128 + .endr + push {t0} // oldcpsr tst rounds, #2 bne 1f @@ -151,8 +180,21 @@ \round r4, r5, r6, r7, r8, r9, r10, r11 b 0b -2: __adrl ttab, \ltab - \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b +2: .ifb \ltab + add ttab, ttab, #1 + .else + __adrl ttab, \ltab + // Prefetch inverse S-box for final round; see explanation above + .set i, 0 + .rept 256 / 64 + ldr t0, [ttab, #i + 0] + ldr t1, [ttab, #i + 32] + .set i, i + 64 + .endr + .endif + + pop {rounds} // oldcpsr + \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds #ifdef CONFIG_CPU_BIG_ENDIAN __rev r4, r4 @@ -175,7 +217,7 @@ .endm ENTRY(__aes_arm_encrypt) - do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2 + do_crypt fround, crypto_ft_tab,, 2 ENDPROC(__aes_arm_encrypt) .align 5 diff --git a/crypto/aes_generic.c b/crypto/aes_generic.c index ca554d57d01e9..13df33aca4631 100644 --- a/crypto/aes_generic.c +++ b/crypto/aes_generic.c @@ -63,7 +63,8 @@ static inline u8 byte(const u32 x, const unsigned n) static const u32 rco_tab[10] = { 1, 2, 4, 8, 16, 32, 64, 128, 27, 54 }; -__visible const u32 crypto_ft_tab[4][256] = { +/* cacheline-aligned to facilitate prefetching into cache */ +__visible const u32 crypto_ft_tab[4][256] __cacheline_aligned = { { 0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6, 0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591, @@ -327,7 +328,7 @@ __visible const u32 crypto_ft_tab[4][256] = { } }; -__visible const u32 crypto_fl_tab[4][256] = { +__visible const u32 crypto_fl_tab[4][256] __cacheline_aligned = { { 0x00000063, 0x0000007c, 0x00000077, 0x0000007b, 0x000000f2, 0x0000006b, 0x0000006f, 0x000000c5, @@ -591,7 +592,7 @@ __visible const u32 crypto_fl_tab[4][256] = { } }; -__visible const u32 crypto_it_tab[4][256] = { +__visible const u32 crypto_it_tab[4][256] __cacheline_aligned = { { 0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a, 0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b, @@ -855,7 +856,7 @@ __visible const u32 crypto_it_tab[4][256] = { } }; -__visible const u32 crypto_il_tab[4][256] = { +__visible const u32 crypto_il_tab[4][256] __cacheline_aligned = { { 0x00000052, 0x00000009, 0x0000006a, 0x000000d5, 0x00000030, 0x00000036, 0x000000a5, 0x00000038, -- GitLab From 20d97843e56cd5ca8db586e6eac901791e47cc0e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 16 Nov 2018 17:26:18 -0800 Subject: [PATCH 1951/2269] FROMGIT: crypto: chacha20-generic - add HChaCha20 library function Refactor the unkeyed permutation part of chacha20_block() into its own function, then add hchacha20_block() which is the ChaCha equivalent of HSalsa20 and is an intermediate step towards XChaCha20 (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). HChaCha20 skips the final addition of the initial state, and outputs only certain words of the state. It should not be used for streaming directly. Reviewed-by: Ard Biesheuvel Acked-by: Martin Willi Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu (cherry picked from commit dd333449d0fb667c5250c42488a7e90470e16c77 https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master) Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: I5b7e92b39ada49343cbdf21e4c6d7c1aa1adf183 Signed-off-by: Eric Biggers --- include/crypto/chacha20.h | 2 ++ lib/chacha20.c | 50 ++++++++++++++++++++++++++++++++++----- 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/include/crypto/chacha20.h b/include/crypto/chacha20.h index f76302d99e2be..fbec4e6a87890 100644 --- a/include/crypto/chacha20.h +++ b/include/crypto/chacha20.h @@ -19,6 +19,8 @@ struct chacha20_ctx { }; void chacha20_block(u32 *state, u8 *stream); +void hchacha20_block(const u32 *in, u32 *out); + void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv); int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keysize); diff --git a/lib/chacha20.c b/lib/chacha20.c index d907fec6a9ed1..6a484e16171d1 100644 --- a/lib/chacha20.c +++ b/lib/chacha20.c @@ -1,5 +1,5 @@ /* - * ChaCha20 256-bit cipher algorithm, RFC7539 + * The "hash function" used as the core of the ChaCha20 stream cipher (RFC7539) * * Copyright (C) 2015 Martin Willi * @@ -16,14 +16,10 @@ #include #include -void chacha20_block(u32 *state, u8 *stream) +static void chacha20_permute(u32 *x) { - u32 x[16]; int i; - for (i = 0; i < ARRAY_SIZE(x); i++) - x[i] = state[i]; - for (i = 0; i < 20; i += 2) { x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16); x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16); @@ -65,6 +61,25 @@ void chacha20_block(u32 *state, u8 *stream) x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 7); x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 7); } +} + +/** + * chacha20_block - generate one keystream block and increment block counter + * @state: input state matrix (16 32-bit words) + * @stream: output keystream block (64 bytes) + * + * This is the ChaCha20 core, a function from 64-byte strings to 64-byte + * strings. The caller has already converted the endianness of the input. This + * function also handles incrementing the block counter in the input matrix. + */ +void chacha20_block(u32 *state, u8 *stream) +{ + u32 x[16]; + int i; + + memcpy(x, state, 64); + + chacha20_permute(x); for (i = 0; i < ARRAY_SIZE(x); i++) put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]); @@ -72,3 +87,26 @@ void chacha20_block(u32 *state, u8 *stream) state[12]++; } EXPORT_SYMBOL(chacha20_block); + +/** + * hchacha20_block - abbreviated ChaCha20 core, for XChaCha20 + * @in: input state matrix (16 32-bit words) + * @out: output (8 32-bit words) + * + * HChaCha20 is the ChaCha equivalent of HSalsa20 and is an intermediate step + * towards XChaCha20 (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). + * HChaCha20 skips the final addition of the initial state, and outputs only + * certain words of the state. It should not be used for streaming directly. + */ +void hchacha20_block(const u32 *in, u32 *out) +{ + u32 x[16]; + + memcpy(x, in, 64); + + chacha20_permute(x); + + memcpy(&out[0], &x[0], 16); + memcpy(&out[4], &x[12], 16); +} +EXPORT_SYMBOL(hchacha20_block); -- GitLab From 19217c4071dccf1e003e17014c0b91c74fc27547 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 16 Nov 2018 17:26:19 -0800 Subject: [PATCH 1952/2269] FROMGIT: crypto: chacha20-generic - don't unnecessarily use atomic walk chacha20-generic doesn't use SIMD instructions or otherwise disable preemption, so passing atomic=true to skcipher_walk_virt() is unnecessary. Suggested-by: Ard Biesheuvel Acked-by: Martin Willi Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu (cherry picked from commit 5e04542a0e0763294e9fced73a149c38c4e0cee5 https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master) Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: I28a63d6f8aa59f60aed8d35107b3c546ca5152f7 Signed-off-by: Eric Biggers --- crypto/chacha20_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c index 3ae96587caf9a..3529521d72a41 100644 --- a/crypto/chacha20_generic.c +++ b/crypto/chacha20_generic.c @@ -81,7 +81,7 @@ int crypto_chacha20_crypt(struct skcipher_request *req) u32 state[16]; int err; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); crypto_chacha20_init(state, ctx, walk.iv); -- GitLab From 5fad54249fc2bcadf636eff7152462ddf2c69a7c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 16 Nov 2018 17:26:20 -0800 Subject: [PATCH 1953/2269] BACKPORT, FROMGIT: crypto: chacha20-generic - add XChaCha20 support Add support for the XChaCha20 stream cipher. XChaCha20 is the application of the XSalsa20 construction (https://cr.yp.to/snuffle/xsalsa-20081128.pdf) to ChaCha20 rather than to Salsa20. XChaCha20 extends ChaCha20's nonce length from 64 bits (or 96 bits, depending on convention) to 192 bits, while provably retaining ChaCha20's security. XChaCha20 uses the ChaCha20 permutation to map the key and first 128 nonce bits to a 256-bit subkey. Then, it does the ChaCha20 stream cipher with the subkey and remaining 64 bits of nonce. We need XChaCha support in order to add support for the Adiantum encryption mode. Note that to meet our performance requirements, we actually plan to primarily use the variant XChaCha12. But we believe it's wise to first add XChaCha20 as a baseline with a higher security margin, in case there are any situations where it can be used. Supporting both variants is straightforward. Since XChaCha20's subkey differs for each request, XChaCha20 can't be a template that wraps ChaCha20; that would require re-keying the underlying ChaCha20 for every request, which wouldn't be thread-safe. Instead, we make XChaCha20 its own top-level algorithm which calls the ChaCha20 streaming implementation internally. Similar to the existing ChaCha20 implementation, we define the IV to be the nonce and stream position concatenated together. This allows users to seek to any position in the stream. I considered splitting the code into separate chacha20-common, chacha20, and xchacha20 modules, so that chacha20 and xchacha20 could be enabled/disabled independently. However, since nearly all the code is shared anyway, I ultimately decided there would have been little benefit to the added complexity of separate modules. Reviewed-by: Ard Biesheuvel Acked-by: Martin Willi Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu (cherry picked from commit de61d7ae5d3789dcba3749a418f76613fbee8414 https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master) (adjusted test vector formatting for old testmgr) Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: I5c878e1d6577abda11d7b737cbb650baf16b6886 Signed-off-by: Eric Biggers --- crypto/Kconfig | 14 +- crypto/chacha20_generic.c | 120 +++++--- crypto/testmgr.c | 9 + crypto/testmgr.h | 583 ++++++++++++++++++++++++++++++++++++++ include/crypto/chacha20.h | 14 +- 5 files changed, 698 insertions(+), 42 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index e3ab31af413b4..9db49135bdfb1 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1325,18 +1325,22 @@ config CRYPTO_SALSA20 Bernstein . See config CRYPTO_CHACHA20 - tristate "ChaCha20 cipher algorithm" + tristate "ChaCha20 stream cipher algorithms" select CRYPTO_BLKCIPHER help - ChaCha20 cipher algorithm, RFC7539. + The ChaCha20 and XChaCha20 stream cipher algorithms. ChaCha20 is a 256-bit high-speed stream cipher designed by Daniel J. Bernstein and further specified in RFC7539 for use in IETF protocols. - This is the portable C implementation of ChaCha20. - - See also: + This is the portable C implementation of ChaCha20. See also: + XChaCha20 is the application of the XSalsa20 construction to ChaCha20 + rather than to Salsa20. XChaCha20 extends ChaCha20's nonce length + from 64 bits (or 96 bits using the RFC7539 convention) to 192 bits, + while provably retaining ChaCha20's security. See also: + + config CRYPTO_CHACHA20_X86_64 tristate "ChaCha20 cipher algorithm (x86_64/SSSE3/AVX2)" depends on X86 && 64BIT diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c index 3529521d72a41..4305b1b62b16a 100644 --- a/crypto/chacha20_generic.c +++ b/crypto/chacha20_generic.c @@ -1,7 +1,8 @@ /* - * ChaCha20 256-bit cipher algorithm, RFC7539 + * ChaCha20 (RFC7539) and XChaCha20 stream cipher algorithms * * Copyright (C) 2015 Martin Willi + * Copyright (C) 2018 Google LLC * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -36,6 +37,31 @@ static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src, } } +static int chacha20_stream_xor(struct skcipher_request *req, + struct chacha20_ctx *ctx, u8 *iv) +{ + struct skcipher_walk walk; + u32 state[16]; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + crypto_chacha20_init(state, ctx, iv); + + while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes = round_down(nbytes, walk.stride); + + chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr, + nbytes); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } + + return err; +} + void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv) { state[0] = 0x61707865; /* "expa" */ @@ -77,54 +103,74 @@ int crypto_chacha20_crypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - u32 state[16]; - int err; - - err = skcipher_walk_virt(&walk, req, false); - crypto_chacha20_init(state, ctx, walk.iv); + return chacha20_stream_xor(req, ctx, req->iv); +} +EXPORT_SYMBOL_GPL(crypto_chacha20_crypt); - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; +int crypto_xchacha20_crypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); + struct chacha20_ctx subctx; + u32 state[16]; + u8 real_iv[16]; - if (nbytes < walk.total) - nbytes = round_down(nbytes, walk.stride); + /* Compute the subkey given the original key and first 128 nonce bits */ + crypto_chacha20_init(state, ctx, req->iv); + hchacha20_block(state, subctx.key); - chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr, - nbytes); - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); - } + /* Build the real IV */ + memcpy(&real_iv[0], req->iv + 24, 8); /* stream position */ + memcpy(&real_iv[8], req->iv + 16, 8); /* remaining 64 nonce bits */ - return err; + /* Generate the stream and XOR it with the data */ + return chacha20_stream_xor(req, &subctx, real_iv); } -EXPORT_SYMBOL_GPL(crypto_chacha20_crypt); - -static struct skcipher_alg alg = { - .base.cra_name = "chacha20", - .base.cra_driver_name = "chacha20-generic", - .base.cra_priority = 100, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha20_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA20_KEY_SIZE, - .max_keysize = CHACHA20_KEY_SIZE, - .ivsize = CHACHA20_IV_SIZE, - .chunksize = CHACHA20_BLOCK_SIZE, - .setkey = crypto_chacha20_setkey, - .encrypt = crypto_chacha20_crypt, - .decrypt = crypto_chacha20_crypt, +EXPORT_SYMBOL_GPL(crypto_xchacha20_crypt); + +static struct skcipher_alg algs[] = { + { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-generic", + .base.cra_priority = 100, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha20_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA20_KEY_SIZE, + .max_keysize = CHACHA20_KEY_SIZE, + .ivsize = CHACHA20_IV_SIZE, + .chunksize = CHACHA20_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = crypto_chacha20_crypt, + .decrypt = crypto_chacha20_crypt, + }, { + .base.cra_name = "xchacha20", + .base.cra_driver_name = "xchacha20-generic", + .base.cra_priority = 100, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha20_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA20_KEY_SIZE, + .max_keysize = CHACHA20_KEY_SIZE, + .ivsize = XCHACHA20_IV_SIZE, + .chunksize = CHACHA20_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = crypto_xchacha20_crypt, + .decrypt = crypto_xchacha20_crypt, + } }; static int __init chacha20_generic_mod_init(void) { - return crypto_register_skcipher(&alg); + return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); } static void __exit chacha20_generic_mod_fini(void) { - crypto_unregister_skcipher(&alg); + crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); } module_init(chacha20_generic_mod_init); @@ -132,6 +178,8 @@ module_exit(chacha20_generic_mod_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Martin Willi "); -MODULE_DESCRIPTION("chacha20 cipher algorithm"); +MODULE_DESCRIPTION("ChaCha20 and XChaCha20 stream ciphers (generic)"); MODULE_ALIAS_CRYPTO("chacha20"); MODULE_ALIAS_CRYPTO("chacha20-generic"); +MODULE_ALIAS_CRYPTO("xchacha20"); +MODULE_ALIAS_CRYPTO("xchacha20-generic"); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 8a124d3e91131..dd92cc29e6efe 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -3547,6 +3547,15 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .hash = __VECS(aes_xcbc128_tv_template) } + }, { + .alg = "xchacha20", + .test = alg_test_skcipher, + .suite = { + .cipher = { + .enc = __VECS(xchacha20_tv_template), + .dec = __VECS(xchacha20_tv_template) + } + }, }, { .alg = "xts(aes)", .test = alg_test_skcipher, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index d39431877e81f..76420201ed7e5 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -33059,6 +33059,589 @@ static const struct cipher_testvec chacha20_enc_tv_template[] = { }, }; +static const struct cipher_testvec xchacha20_tv_template[] = { + { /* from libsodium test/default/xchacha20.c */ + .key = "\x79\xc9\x97\x98\xac\x67\x30\x0b" + "\xbb\x27\x04\xc9\x5c\x34\x1e\x32" + "\x45\xf3\xdc\xb2\x17\x61\xb9\x8e" + "\x52\xff\x45\xb2\x4f\x30\x4f\xc4", + .klen = 32, + .iv = "\xb3\x3f\xfd\x30\x96\x47\x9b\xcf" + "\xbc\x9a\xee\x49\x41\x76\x88\xa0" + "\xa2\x55\x4f\x8d\x95\x38\x94\x19" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .input = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00", + .result = "\xc6\xe9\x75\x81\x60\x08\x3a\xc6" + "\x04\xef\x90\xe7\x12\xce\x6e\x75" + "\xd7\x79\x75\x90\x74\x4e\x0c\xf0" + "\x60\xf0\x13\x73\x9c", + .ilen = 29, + .rlen = 29, + }, { /* from libsodium test/default/xchacha20.c */ + .key = "\x9d\x23\xbd\x41\x49\xcb\x97\x9c" + "\xcf\x3c\x5c\x94\xdd\x21\x7e\x98" + "\x08\xcb\x0e\x50\xcd\x0f\x67\x81" + "\x22\x35\xea\xaf\x60\x1d\x62\x32", + .klen = 32, + .iv = "\xc0\x47\x54\x82\x66\xb7\xc3\x70" + "\xd3\x35\x66\xa2\x42\x5c\xbf\x30" + "\xd8\x2d\x1e\xaf\x52\x94\x10\x9e" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .input = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00", + .result = "\xa2\x12\x09\x09\x65\x94\xde\x8c" + "\x56\x67\xb1\xd1\x3a\xd9\x3f\x74" + "\x41\x06\xd0\x54\xdf\x21\x0e\x47" + "\x82\xcd\x39\x6f\xec\x69\x2d\x35" + "\x15\xa2\x0b\xf3\x51\xee\xc0\x11" + "\xa9\x2c\x36\x78\x88\xbc\x46\x4c" + "\x32\xf0\x80\x7a\xcd\x6c\x20\x3a" + "\x24\x7e\x0d\xb8\x54\x14\x84\x68" + "\xe9\xf9\x6b\xee\x4c\xf7\x18\xd6" + "\x8d\x5f\x63\x7c\xbd\x5a\x37\x64" + "\x57\x78\x8e\x6f\xae\x90\xfc\x31" + "\x09\x7c\xfc", + .ilen = 91, + .rlen = 91, + }, { /* Taken from the ChaCha20 test vectors, appended 16 random bytes + to nonce, and recomputed the ciphertext with libsodium */ + .key = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x67\xc6\x69\x73" + "\x51\xff\x4a\xec\x29\xcd\xba\xab" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .input = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .result = "\x9c\x49\x2a\xe7\x8a\x2f\x93\xc7" + "\xb3\x33\x6f\x82\x17\xd8\xc4\x1e" + "\xad\x80\x11\x11\x1d\x4c\x16\x18" + "\x07\x73\x9b\x4f\xdb\x7c\xcb\x47" + "\xfd\xef\x59\x74\xfa\x3f\xe5\x4c" + "\x9b\xd0\xea\xbc\xba\x56\xad\x32" + "\x03\xdc\xf8\x2b\xc1\xe1\x75\x67" + "\x23\x7b\xe6\xfc\xd4\x03\x86\x54", + .ilen = 64, + .rlen = 64, + }, { /* Taken from the ChaCha20 test vectors, appended 16 random bytes + to nonce, and recomputed the ciphertext with libsodium */ + .key = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x01", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x02\xf2\xfb\xe3\x46" + "\x7c\xc2\x54\xf8\x1b\xe8\xe7\x8d" + "\x01\x00\x00\x00\x00\x00\x00\x00", + .input = "\x41\x6e\x79\x20\x73\x75\x62\x6d" + "\x69\x73\x73\x69\x6f\x6e\x20\x74" + "\x6f\x20\x74\x68\x65\x20\x49\x45" + "\x54\x46\x20\x69\x6e\x74\x65\x6e" + "\x64\x65\x64\x20\x62\x79\x20\x74" + "\x68\x65\x20\x43\x6f\x6e\x74\x72" + "\x69\x62\x75\x74\x6f\x72\x20\x66" + "\x6f\x72\x20\x70\x75\x62\x6c\x69" + "\x63\x61\x74\x69\x6f\x6e\x20\x61" + "\x73\x20\x61\x6c\x6c\x20\x6f\x72" + "\x20\x70\x61\x72\x74\x20\x6f\x66" + "\x20\x61\x6e\x20\x49\x45\x54\x46" + "\x20\x49\x6e\x74\x65\x72\x6e\x65" + "\x74\x2d\x44\x72\x61\x66\x74\x20" + "\x6f\x72\x20\x52\x46\x43\x20\x61" + "\x6e\x64\x20\x61\x6e\x79\x20\x73" + "\x74\x61\x74\x65\x6d\x65\x6e\x74" + "\x20\x6d\x61\x64\x65\x20\x77\x69" + "\x74\x68\x69\x6e\x20\x74\x68\x65" + "\x20\x63\x6f\x6e\x74\x65\x78\x74" + "\x20\x6f\x66\x20\x61\x6e\x20\x49" + "\x45\x54\x46\x20\x61\x63\x74\x69" + "\x76\x69\x74\x79\x20\x69\x73\x20" + "\x63\x6f\x6e\x73\x69\x64\x65\x72" + "\x65\x64\x20\x61\x6e\x20\x22\x49" + "\x45\x54\x46\x20\x43\x6f\x6e\x74" + "\x72\x69\x62\x75\x74\x69\x6f\x6e" + "\x22\x2e\x20\x53\x75\x63\x68\x20" + "\x73\x74\x61\x74\x65\x6d\x65\x6e" + "\x74\x73\x20\x69\x6e\x63\x6c\x75" + "\x64\x65\x20\x6f\x72\x61\x6c\x20" + "\x73\x74\x61\x74\x65\x6d\x65\x6e" + "\x74\x73\x20\x69\x6e\x20\x49\x45" + "\x54\x46\x20\x73\x65\x73\x73\x69" + "\x6f\x6e\x73\x2c\x20\x61\x73\x20" + "\x77\x65\x6c\x6c\x20\x61\x73\x20" + "\x77\x72\x69\x74\x74\x65\x6e\x20" + "\x61\x6e\x64\x20\x65\x6c\x65\x63" + "\x74\x72\x6f\x6e\x69\x63\x20\x63" + "\x6f\x6d\x6d\x75\x6e\x69\x63\x61" + "\x74\x69\x6f\x6e\x73\x20\x6d\x61" + "\x64\x65\x20\x61\x74\x20\x61\x6e" + "\x79\x20\x74\x69\x6d\x65\x20\x6f" + "\x72\x20\x70\x6c\x61\x63\x65\x2c" + "\x20\x77\x68\x69\x63\x68\x20\x61" + "\x72\x65\x20\x61\x64\x64\x72\x65" + "\x73\x73\x65\x64\x20\x74\x6f", + .result = "\xf9\xab\x7a\x4a\x60\xb8\x5f\xa0" + "\x50\xbb\x57\xce\xef\x8c\xc1\xd9" + "\x24\x15\xb3\x67\x5e\x7f\x01\xf6" + "\x1c\x22\xf6\xe5\x71\xb1\x43\x64" + "\x63\x05\xd5\xfc\x5c\x3d\xc0\x0e" + "\x23\xef\xd3\x3b\xd9\xdc\x7f\xa8" + "\x58\x26\xb3\xd0\xc2\xd5\x04\x3f" + "\x0a\x0e\x8f\x17\xe4\xcd\xf7\x2a" + "\xb4\x2c\x09\xe4\x47\xec\x8b\xfb" + "\x59\x37\x7a\xa1\xd0\x04\x7e\xaa" + "\xf1\x98\x5f\x24\x3d\x72\x9a\x43" + "\xa4\x36\x51\x92\x22\x87\xff\x26" + "\xce\x9d\xeb\x59\x78\x84\x5e\x74" + "\x97\x2e\x63\xc0\xef\x29\xf7\x8a" + "\xb9\xee\x35\x08\x77\x6a\x35\x9a" + "\x3e\xe6\x4f\x06\x03\x74\x1b\xc1" + "\x5b\xb3\x0b\x89\x11\x07\xd3\xb7" + "\x53\xd6\x25\x04\xd9\x35\xb4\x5d" + "\x4c\x33\x5a\xc2\x42\x4c\xe6\xa4" + "\x97\x6e\x0e\xd2\xb2\x8b\x2f\x7f" + "\x28\xe5\x9f\xac\x4b\x2e\x02\xab" + "\x85\xfa\xa9\x0d\x7c\x2d\x10\xe6" + "\x91\xab\x55\x63\xf0\xde\x3a\x94" + "\x25\x08\x10\x03\xc2\x68\xd1\xf4" + "\xaf\x7d\x9c\x99\xf7\x86\x96\x30" + "\x60\xfc\x0b\xe6\xa8\x80\x15\xb0" + "\x81\xb1\x0c\xbe\xb9\x12\x18\x25" + "\xe9\x0e\xb1\xe7\x23\xb2\xef\x4a" + "\x22\x8f\xc5\x61\x89\xd4\xe7\x0c" + "\x64\x36\x35\x61\xb6\x34\x60\xf7" + "\x7b\x61\x37\x37\x12\x10\xa2\xf6" + "\x7e\xdb\x7f\x39\x3f\xb6\x8e\x89" + "\x9e\xf3\xfe\x13\x98\xbb\x66\x5a" + "\xec\xea\xab\x3f\x9c\x87\xc4\x8c" + "\x8a\x04\x18\x49\xfc\x77\x11\x50" + "\x16\xe6\x71\x2b\xee\xc0\x9c\xb6" + "\x87\xfd\x80\xff\x0b\x1d\x73\x38" + "\xa4\x1d\x6f\xae\xe4\x12\xd7\x93" + "\x9d\xcd\x38\x26\x09\x40\x52\xcd" + "\x67\x01\x67\x26\xe0\x3e\x98\xa8" + "\xe8\x1a\x13\x41\xbb\x90\x4d\x87" + "\xbb\x42\x82\x39\xce\x3a\xd0\x18" + "\x6d\x7b\x71\x8f\xbb\x2c\x6a\xd1" + "\xbd\xf5\xc7\x8a\x7e\xe1\x1e\x0f" + "\x0d\x0d\x13\x7c\xd9\xd8\x3c\x91" + "\xab\xff\x1f\x12\xc3\xee\xe5\x65" + "\x12\x8d\x7b\x61\xe5\x1f\x98", + .ilen = 375, + .rlen = 375, + .also_non_np = 1, + .np = 3, + .tap = { 375 - 20, 4, 16 }, + + }, { /* Taken from the ChaCha20 test vectors, appended 16 random bytes + to nonce, and recomputed the ciphertext with libsodium */ + .key = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a" + "\xf3\x33\x88\x86\x04\xf6\xb5\xf0" + "\x47\x39\x17\xc1\x40\x2b\x80\x09" + "\x9d\xca\x5c\xbc\x20\x70\x75\xc0", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x02\x76\x5a\x2e\x63" + "\x33\x9f\xc9\x9a\x66\x32\x0d\xb7" + "\x2a\x00\x00\x00\x00\x00\x00\x00", + .input = "\x27\x54\x77\x61\x73\x20\x62\x72" + "\x69\x6c\x6c\x69\x67\x2c\x20\x61" + "\x6e\x64\x20\x74\x68\x65\x20\x73" + "\x6c\x69\x74\x68\x79\x20\x74\x6f" + "\x76\x65\x73\x0a\x44\x69\x64\x20" + "\x67\x79\x72\x65\x20\x61\x6e\x64" + "\x20\x67\x69\x6d\x62\x6c\x65\x20" + "\x69\x6e\x20\x74\x68\x65\x20\x77" + "\x61\x62\x65\x3a\x0a\x41\x6c\x6c" + "\x20\x6d\x69\x6d\x73\x79\x20\x77" + "\x65\x72\x65\x20\x74\x68\x65\x20" + "\x62\x6f\x72\x6f\x67\x6f\x76\x65" + "\x73\x2c\x0a\x41\x6e\x64\x20\x74" + "\x68\x65\x20\x6d\x6f\x6d\x65\x20" + "\x72\x61\x74\x68\x73\x20\x6f\x75" + "\x74\x67\x72\x61\x62\x65\x2e", + .result = "\x95\xb9\x51\xe7\x8f\xb4\xa4\x03" + "\xca\x37\xcc\xde\x60\x1d\x8c\xe2" + "\xf1\xbb\x8a\x13\x7f\x61\x85\xcc" + "\xad\xf4\xf0\xdc\x86\xa6\x1e\x10" + "\xbc\x8e\xcb\x38\x2b\xa5\xc8\x8f" + "\xaa\x03\x3d\x53\x4a\x42\xb1\x33" + "\xfc\xd3\xef\xf0\x8e\x7e\x10\x9c" + "\x6f\x12\x5e\xd4\x96\xfe\x5b\x08" + "\xb6\x48\xf0\x14\x74\x51\x18\x7c" + "\x07\x92\xfc\xac\x9d\xf1\x94\xc0" + "\xc1\x9d\xc5\x19\x43\x1f\x1d\xbb" + "\x07\xf0\x1b\x14\x25\x45\xbb\xcb" + "\x5c\xe2\x8b\x28\xf3\xcf\x47\x29" + "\x27\x79\x67\x24\xa6\x87\xc2\x11" + "\x65\x03\xfa\x45\xf7\x9e\x53\x7a" + "\x99\xf1\x82\x25\x4f\x8d\x07", + .ilen = 127, + .rlen = 127, + }, { /* Taken from the ChaCha20 test vectors, appended 16 random bytes + to nonce, and recomputed the ciphertext with libsodium */ + .key = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a" + "\xf3\x33\x88\x86\x04\xf6\xb5\xf0" + "\x47\x39\x17\xc1\x40\x2b\x80\x09" + "\x9d\xca\x5c\xbc\x20\x70\x75\xc0", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x01\x31\x58\xa3\x5a" + "\x25\x5d\x05\x17\x58\xe9\x5e\xd4" + "\x1c\x00\x00\x00\x00\x00\x00\x00", + .input = "\x49\xee\xe0\xdc\x24\x90\x40\xcd" + "\xc5\x40\x8f\x47\x05\xbc\xdd\x81" + "\x47\xc6\x8d\xe6\xb1\x8f\xd7\xcb" + "\x09\x0e\x6e\x22\x48\x1f\xbf\xb8" + "\x5c\xf7\x1e\x8a\xc1\x23\xf2\xd4" + "\x19\x4b\x01\x0f\x4e\xa4\x43\xce" + "\x01\xc6\x67\xda\x03\x91\x18\x90" + "\xa5\xa4\x8e\x45\x03\xb3\x2d\xac" + "\x74\x92\xd3\x53\x47\xc8\xdd\x25" + "\x53\x6c\x02\x03\x87\x0d\x11\x0c" + "\x58\xe3\x12\x18\xfd\x2a\x5b\x40" + "\x0c\x30\xf0\xb8\x3f\x43\xce\xae" + "\x65\x3a\x7d\x7c\xf4\x54\xaa\xcc" + "\x33\x97\xc3\x77\xba\xc5\x70\xde" + "\xd7\xd5\x13\xa5\x65\xc4\x5f\x0f" + "\x46\x1a\x0d\x97\xb5\xf3\xbb\x3c" + "\x84\x0f\x2b\xc5\xaa\xea\xf2\x6c" + "\xc9\xb5\x0c\xee\x15\xf3\x7d\xbe" + "\x9f\x7b\x5a\xa6\xae\x4f\x83\xb6" + "\x79\x49\x41\xf4\x58\x18\xcb\x86" + "\x7f\x30\x0e\xf8\x7d\x44\x36\xea" + "\x75\xeb\x88\x84\x40\x3c\xad\x4f" + "\x6f\x31\x6b\xaa\x5d\xe5\xa5\xc5" + "\x21\x66\xe9\xa7\xe3\xb2\x15\x88" + "\x78\xf6\x79\xa1\x59\x47\x12\x4e" + "\x9f\x9f\x64\x1a\xa0\x22\x5b\x08" + "\xbe\x7c\x36\xc2\x2b\x66\x33\x1b" + "\xdd\x60\x71\xf7\x47\x8c\x61\xc3" + "\xda\x8a\x78\x1e\x16\xfa\x1e\x86" + "\x81\xa6\x17\x2a\xa7\xb5\xc2\xe7" + "\xa4\xc7\x42\xf1\xcf\x6a\xca\xb4" + "\x45\xcf\xf3\x93\xf0\xe7\xea\xf6" + "\xf4\xe6\x33\x43\x84\x93\xa5\x67" + "\x9b\x16\x58\x58\x80\x0f\x2b\x5c" + "\x24\x74\x75\x7f\x95\x81\xb7\x30" + "\x7a\x33\xa7\xf7\x94\x87\x32\x27" + "\x10\x5d\x14\x4c\x43\x29\xdd\x26" + "\xbd\x3e\x3c\x0e\xfe\x0e\xa5\x10" + "\xea\x6b\x64\xfd\x73\xc6\xed\xec" + "\xa8\xc9\xbf\xb3\xba\x0b\x4d\x07" + "\x70\xfc\x16\xfd\x79\x1e\xd7\xc5" + "\x49\x4e\x1c\x8b\x8d\x79\x1b\xb1" + "\xec\xca\x60\x09\x4c\x6a\xd5\x09" + "\x49\x46\x00\x88\x22\x8d\xce\xea" + "\xb1\x17\x11\xde\x42\xd2\x23\xc1" + "\x72\x11\xf5\x50\x73\x04\x40\x47" + "\xf9\x5d\xe7\xa7\x26\xb1\x7e\xb0" + "\x3f\x58\xc1\x52\xab\x12\x67\x9d" + "\x3f\x43\x4b\x68\xd4\x9c\x68\x38" + "\x07\x8a\x2d\x3e\xf3\xaf\x6a\x4b" + "\xf9\xe5\x31\x69\x22\xf9\xa6\x69" + "\xc6\x9c\x96\x9a\x12\x35\x95\x1d" + "\x95\xd5\xdd\xbe\xbf\x93\x53\x24" + "\xfd\xeb\xc2\x0a\x64\xb0\x77\x00" + "\x6f\x88\xc4\x37\x18\x69\x7c\xd7" + "\x41\x92\x55\x4c\x03\xa1\x9a\x4b" + "\x15\xe5\xdf\x7f\x37\x33\x72\xc1" + "\x8b\x10\x67\xa3\x01\x57\x94\x25" + "\x7b\x38\x71\x7e\xdd\x1e\xcc\x73" + "\x55\xd2\x8e\xeb\x07\xdd\xf1\xda" + "\x58\xb1\x47\x90\xfe\x42\x21\x72" + "\xa3\x54\x7a\xa0\x40\xec\x9f\xdd" + "\xc6\x84\x6e\xca\xae\xe3\x68\xb4" + "\x9d\xe4\x78\xff\x57\xf2\xf8\x1b" + "\x03\xa1\x31\xd9\xde\x8d\xf5\x22" + "\x9c\xdd\x20\xa4\x1e\x27\xb1\x76" + "\x4f\x44\x55\xe2\x9b\xa1\x9c\xfe" + "\x54\xf7\x27\x1b\xf4\xde\x02\xf5" + "\x1b\x55\x48\x5c\xdc\x21\x4b\x9e" + "\x4b\x6e\xed\x46\x23\xdc\x65\xb2" + "\xcf\x79\x5f\x28\xe0\x9e\x8b\xe7" + "\x4c\x9d\x8a\xff\xc1\xa6\x28\xb8" + "\x65\x69\x8a\x45\x29\xef\x74\x85" + "\xde\x79\xc7\x08\xae\x30\xb0\xf4" + "\xa3\x1d\x51\x41\xab\xce\xcb\xf6" + "\xb5\xd8\x6d\xe0\x85\xe1\x98\xb3" + "\x43\xbb\x86\x83\x0a\xa0\xf5\xb7" + "\x04\x0b\xfa\x71\x1f\xb0\xf6\xd9" + "\x13\x00\x15\xf0\xc7\xeb\x0d\x5a" + "\x9f\xd7\xb9\x6c\x65\x14\x22\x45" + "\x6e\x45\x32\x3e\x7e\x60\x1a\x12" + "\x97\x82\x14\xfb\xaa\x04\x22\xfa" + "\xa0\xe5\x7e\x8c\x78\x02\x48\x5d" + "\x78\x33\x5a\x7c\xad\xdb\x29\xce" + "\xbb\x8b\x61\xa4\xb7\x42\xe2\xac" + "\x8b\x1a\xd9\x2f\x0b\x8b\x62\x21" + "\x83\x35\x7e\xad\x73\xc2\xb5\x6c" + "\x10\x26\x38\x07\xe5\xc7\x36\x80" + "\xe2\x23\x12\x61\xf5\x48\x4b\x2b" + "\xc5\xdf\x15\xd9\x87\x01\xaa\xac" + "\x1e\x7c\xad\x73\x78\x18\x63\xe0" + "\x8b\x9f\x81\xd8\x12\x6a\x28\x10" + "\xbe\x04\x68\x8a\x09\x7c\x1b\x1c" + "\x83\x66\x80\x47\x80\xe8\xfd\x35" + "\x1c\x97\x6f\xae\x49\x10\x66\xcc" + "\xc6\xd8\xcc\x3a\x84\x91\x20\x77" + "\x72\xe4\x24\xd2\x37\x9f\xc5\xc9" + "\x25\x94\x10\x5f\x40\x00\x64\x99" + "\xdc\xae\xd7\x21\x09\x78\x50\x15" + "\xac\x5f\xc6\x2c\xa2\x0b\xa9\x39" + "\x87\x6e\x6d\xab\xde\x08\x51\x16" + "\xc7\x13\xe9\xea\xed\x06\x8e\x2c" + "\xf8\x37\x8c\xf0\xa6\x96\x8d\x43" + "\xb6\x98\x37\xb2\x43\xed\xde\xdf" + "\x89\x1a\xe7\xeb\x9d\xa1\x7b\x0b" + "\x77\xb0\xe2\x75\xc0\xf1\x98\xd9" + "\x80\x55\xc9\x34\x91\xd1\x59\xe8" + "\x4b\x0f\xc1\xa9\x4b\x7a\x84\x06" + "\x20\xa8\x5d\xfa\xd1\xde\x70\x56" + "\x2f\x9e\x91\x9c\x20\xb3\x24\xd8" + "\x84\x3d\xe1\x8c\x7e\x62\x52\xe5" + "\x44\x4b\x9f\xc2\x93\x03\xea\x2b" + "\x59\xc5\xfa\x3f\x91\x2b\xbb\x23" + "\xf5\xb2\x7b\xf5\x38\xaf\xb3\xee" + "\x63\xdc\x7b\xd1\xff\xaa\x8b\xab" + "\x82\x6b\x37\x04\xeb\x74\xbe\x79" + "\xb9\x83\x90\xef\x20\x59\x46\xff" + "\xe9\x97\x3e\x2f\xee\xb6\x64\x18" + "\x38\x4c\x7a\x4a\xf9\x61\xe8\x9a" + "\xa1\xb5\x01\xa6\x47\xd3\x11\xd4" + "\xce\xd3\x91\x49\x88\xc7\xb8\x4d" + "\xb1\xb9\x07\x6d\x16\x72\xae\x46" + "\x5e\x03\xa1\x4b\xb6\x02\x30\xa8" + "\x3d\xa9\x07\x2a\x7c\x19\xe7\x62" + "\x87\xe3\x82\x2f\x6f\xe1\x09\xd9" + "\x94\x97\xea\xdd\x58\x9e\xae\x76" + "\x7e\x35\xe5\xb4\xda\x7e\xf4\xde" + "\xf7\x32\x87\xcd\x93\xbf\x11\x56" + "\x11\xbe\x08\x74\xe1\x69\xad\xe2" + "\xd7\xf8\x86\x75\x8a\x3c\xa4\xbe" + "\x70\xa7\x1b\xfc\x0b\x44\x2a\x76" + "\x35\xea\x5d\x85\x81\xaf\x85\xeb" + "\xa0\x1c\x61\xc2\xf7\x4f\xa5\xdc" + "\x02\x7f\xf6\x95\x40\x6e\x8a\x9a" + "\xf3\x5d\x25\x6e\x14\x3a\x22\xc9" + "\x37\x1c\xeb\x46\x54\x3f\xa5\x91" + "\xc2\xb5\x8c\xfe\x53\x08\x97\x32" + "\x1b\xb2\x30\x27\xfe\x25\x5d\xdc" + "\x08\x87\xd0\xe5\x94\x1a\xd4\xf1" + "\xfe\xd6\xb4\xa3\xe6\x74\x81\x3c" + "\x1b\xb7\x31\xa7\x22\xfd\xd4\xdd" + "\x20\x4e\x7c\x51\xb0\x60\x73\xb8" + "\x9c\xac\x91\x90\x7e\x01\xb0\xe1" + "\x8a\x2f\x75\x1c\x53\x2a\x98\x2a" + "\x06\x52\x95\x52\xb2\xe9\x25\x2e" + "\x4c\xe2\x5a\x00\xb2\x13\x81\x03" + "\x77\x66\x0d\xa5\x99\xda\x4e\x8c" + "\xac\xf3\x13\x53\x27\x45\xaf\x64" + "\x46\xdc\xea\x23\xda\x97\xd1\xab" + "\x7d\x6c\x30\x96\x1f\xbc\x06\x34" + "\x18\x0b\x5e\x21\x35\x11\x8d\x4c" + "\xe0\x2d\xe9\x50\x16\x74\x81\xa8" + "\xb4\x34\xb9\x72\x42\xa6\xcc\xbc" + "\xca\x34\x83\x27\x10\x5b\x68\x45" + "\x8f\x52\x22\x0c\x55\x3d\x29\x7c" + "\xe3\xc0\x66\x05\x42\x91\x5f\x58" + "\xfe\x4a\x62\xd9\x8c\xa9\x04\x19" + "\x04\xa9\x08\x4b\x57\xfc\x67\x53" + "\x08\x7c\xbc\x66\x8a\xb0\xb6\x9f" + "\x92\xd6\x41\x7c\x5b\x2a\x00\x79" + "\x72", + .result = "\x3a\x92\xee\x53\x31\xaf\x2b\x60" + "\x5f\x55\x8d\x00\x5d\xfc\x74\x97" + "\x28\x54\xf4\xa5\x75\xf1\x9b\x25" + "\x62\x1c\xc0\xe0\x13\xc8\x87\x53" + "\xd0\xf3\xa7\x97\x1f\x3b\x1e\xea" + "\xe0\xe5\x2a\xd1\xdd\xa4\x3b\x50" + "\x45\xa3\x0d\x7e\x1b\xc9\xa0\xad" + "\xb9\x2c\x54\xa6\xc7\x55\x16\xd0" + "\xc5\x2e\x02\x44\x35\xd0\x7e\x67" + "\xf2\xc4\x9b\xcd\x95\x10\xcc\x29" + "\x4b\xfa\x86\x87\xbe\x40\x36\xbe" + "\xe1\xa3\x52\x89\x55\x20\x9b\xc2" + "\xab\xf2\x31\x34\x16\xad\xc8\x17" + "\x65\x24\xc0\xff\x12\x37\xfe\x5a" + "\x62\x3b\x59\x47\x6c\x5f\x3a\x8e" + "\x3b\xd9\x30\xc8\x7f\x2f\x88\xda" + "\x80\xfd\x02\xda\x7f\x9a\x7a\x73" + "\x59\xc5\x34\x09\x9a\x11\xcb\xa7" + "\xfc\xf6\xa1\xa0\x60\xfb\x43\xbb" + "\xf1\xe9\xd7\xc6\x79\x27\x4e\xff" + "\x22\xb4\x24\xbf\x76\xee\x47\xb9" + "\x6d\x3f\x8b\xb0\x9c\x3c\x43\xdd" + "\xff\x25\x2e\x6d\xa4\x2b\xfb\x5d" + "\x1b\x97\x6c\x55\x0a\x82\x7a\x7b" + "\x94\x34\xc2\xdb\x2f\x1f\xc1\xea" + "\xd4\x4d\x17\x46\x3b\x51\x69\x09" + "\xe4\x99\x32\x25\xfd\x94\xaf\xfb" + "\x10\xf7\x4f\xdd\x0b\x3c\x8b\x41" + "\xb3\x6a\xb7\xd1\x33\xa8\x0c\x2f" + "\x62\x4c\x72\x11\xd7\x74\xe1\x3b" + "\x38\x43\x66\x7b\x6c\x36\x48\xe7" + "\xe3\xe7\x9d\xb9\x42\x73\x7a\x2a" + "\x89\x20\x1a\x41\x80\x03\xf7\x8f" + "\x61\x78\x13\xbf\xfe\x50\xf5\x04" + "\x52\xf9\xac\x47\xf8\x62\x4b\xb2" + "\x24\xa9\xbf\x64\xb0\x18\x69\xd2" + "\xf5\xe4\xce\xc8\xb1\x87\x75\xd6" + "\x2c\x24\x79\x00\x7d\x26\xfb\x44" + "\xe7\x45\x7a\xee\x58\xa5\x83\xc1" + "\xb4\x24\xab\x23\x2f\x4d\xd7\x4f" + "\x1c\xc7\xaa\xa9\x50\xf4\xa3\x07" + "\x12\x13\x89\x74\xdc\x31\x6a\xb2" + "\xf5\x0f\x13\x8b\xb9\xdb\x85\x1f" + "\xf5\xbc\x88\xd9\x95\xea\x31\x6c" + "\x36\x60\xb6\x49\xdc\xc4\xf7\x55" + "\x3f\x21\xc1\xb5\x92\x18\x5e\xbc" + "\x9f\x87\x7f\xe7\x79\x25\x40\x33" + "\xd6\xb9\x33\xd5\x50\xb3\xc7\x89" + "\x1b\x12\xa0\x46\xdd\xa7\xd8\x3e" + "\x71\xeb\x6f\x66\xa1\x26\x0c\x67" + "\xab\xb2\x38\x58\x17\xd8\x44\x3b" + "\x16\xf0\x8e\x62\x8d\x16\x10\x00" + "\x32\x8b\xef\xb9\x28\xd3\xc5\xad" + "\x0a\x19\xa2\xe4\x03\x27\x7d\x94" + "\x06\x18\xcd\xd6\x27\x00\xf9\x1f" + "\xb6\xb3\xfe\x96\x35\x5f\xc4\x1c" + "\x07\x62\x10\x79\x68\x50\xf1\x7e" + "\x29\xe7\xc4\xc4\xe7\xee\x54\xd6" + "\x58\x76\x84\x6d\x8d\xe4\x59\x31" + "\xe9\xf4\xdc\xa1\x1f\xe5\x1a\xd6" + "\xe6\x64\x46\xf5\x77\x9c\x60\x7a" + "\x5e\x62\xe3\x0a\xd4\x9f\x7a\x2d" + "\x7a\xa5\x0a\x7b\x29\x86\x7a\x74" + "\x74\x71\x6b\xca\x7d\x1d\xaa\xba" + "\x39\x84\x43\x76\x35\xfe\x4f\x9b" + "\xbb\xbb\xb5\x6a\x32\xb5\x5d\x41" + "\x51\xf0\x5b\x68\x03\x47\x4b\x8a" + "\xca\x88\xf6\x37\xbd\x73\x51\x70" + "\x66\xfe\x9e\x5f\x21\x9c\xf3\xdd" + "\xc3\xea\x27\xf9\x64\x94\xe1\x19" + "\xa0\xa9\xab\x60\xe0\x0e\xf7\x78" + "\x70\x86\xeb\xe0\xd1\x5c\x05\xd3" + "\xd7\xca\xe0\xc0\x47\x47\x34\xee" + "\x11\xa3\xa3\x54\x98\xb7\x49\x8e" + "\x84\x28\x70\x2c\x9e\xfb\x55\x54" + "\x4d\xf8\x86\xf7\x85\x7c\xbd\xf3" + "\x17\xd8\x47\xcb\xac\xf4\x20\x85" + "\x34\x66\xad\x37\x2d\x5e\x52\xda" + "\x8a\xfe\x98\x55\x30\xe7\x2d\x2b" + "\x19\x10\x8e\x7b\x66\x5e\xdc\xe0" + "\x45\x1f\x7b\xb4\x08\xfb\x8f\xf6" + "\x8c\x89\x21\x34\x55\x27\xb2\x76" + "\xb2\x07\xd9\xd6\x68\x9b\xea\x6b" + "\x2d\xb4\xc4\x35\xdd\xd2\x79\xae" + "\xc7\xd6\x26\x7f\x12\x01\x8c\xa7" + "\xe3\xdb\xa8\xf4\xf7\x2b\xec\x99" + "\x11\x00\xf1\x35\x8c\xcf\xd5\xc9" + "\xbd\x91\x36\x39\x70\xcf\x7d\x70" + "\x47\x1a\xfc\x6b\x56\xe0\x3f\x9c" + "\x60\x49\x01\x72\xa9\xaf\x2c\x9c" + "\xe8\xab\xda\x8c\x14\x19\xf3\x75" + "\x07\x17\x9d\x44\x67\x7a\x2e\xef" + "\xb7\x83\x35\x4a\xd1\x3d\x1c\x84" + "\x32\xdd\xaa\xea\xca\x1d\xdc\x72" + "\x2c\xcc\x43\xcd\x5d\xe3\x21\xa4" + "\xd0\x8a\x4b\x20\x12\xa3\xd5\x86" + "\x76\x96\xff\x5f\x04\x57\x0f\xe6" + "\xba\xe8\x76\x50\x0c\x64\x1d\x83" + "\x9c\x9b\x9a\x9a\x58\x97\x9c\x5c" + "\xb4\xa4\xa6\x3e\x19\xeb\x8f\x5a" + "\x61\xb2\x03\x7b\x35\x19\xbe\xa7" + "\x63\x0c\xfd\xdd\xf9\x90\x6c\x08" + "\x19\x11\xd3\x65\x4a\xf5\x96\x92" + "\x59\xaa\x9c\x61\x0c\x29\xa7\xf8" + "\x14\x39\x37\xbf\x3c\xf2\x16\x72" + "\x02\xfa\xa2\xf3\x18\x67\x5d\xcb" + "\xdc\x4d\xbb\x96\xff\x70\x08\x2d" + "\xc2\xa8\x52\xe1\x34\x5f\x72\xfe" + "\x64\xbf\xca\xa7\x74\x38\xfb\x74" + "\x55\x9c\xfa\x8a\xed\xfb\x98\xeb" + "\x58\x2e\x6c\xe1\x52\x76\x86\xd7" + "\xcf\xa1\xa4\xfc\xb2\x47\x41\x28" + "\xa3\xc1\xe5\xfd\x53\x19\x28\x2b" + "\x37\x04\x65\x96\x99\x7a\x28\x0f" + "\x07\x68\x4b\xc7\x52\x0a\x55\x35" + "\x40\x19\x95\x61\xe8\x59\x40\x1f" + "\x9d\xbf\x78\x7d\x8f\x84\xff\x6f" + "\xd0\xd5\x63\xd2\x22\xbd\xc8\x4e" + "\xfb\xe7\x9f\x06\xe6\xe7\x39\x6d" + "\x6a\x96\x9f\xf0\x74\x7e\xc9\x35" + "\xb7\x26\xb8\x1c\x0a\xa6\x27\x2c" + "\xa2\x2b\xfe\xbe\x0f\x07\x73\xae" + "\x7f\x7f\x54\xf5\x7c\x6a\x0a\x56" + "\x49\xd4\x81\xe5\x85\x53\x99\x1f" + "\x95\x05\x13\x58\x8d\x0e\x1b\x90" + "\xc3\x75\x48\x64\x58\x98\x67\x84" + "\xae\xe2\x21\xa2\x8a\x04\x0a\x0b" + "\x61\xaa\xb0\xd4\x28\x60\x7a\xf8" + "\xbc\x52\xfb\x24\x7f\xed\x0d\x2a" + "\x0a\xb2\xf9\xc6\x95\xb5\x11\xc9" + "\xf4\x0f\x26\x11\xcf\x2a\x57\x87" + "\x7a\xf3\xe7\x94\x65\xc2\xb5\xb3" + "\xab\x98\xe3\xc1\x2b\x59\x19\x7c" + "\xd6\xf3\xf9\xbf\xff\x6d\xc6\x82" + "\x13\x2f\x4a\x2e\xcd\x26\xfe\x2d" + "\x01\x70\xf4\xc2\x7f\x1f\x4c\xcb" + "\x47\x77\x0c\xa0\xa3\x03\xec\xda" + "\xa9\xbf\x0d\x2d\xae\xe4\xb8\x7b" + "\xa9\xbc\x08\xb4\x68\x2e\xc5\x60" + "\x8d\x87\x41\x2b\x0f\x69\xf0\xaf" + "\x5f\xba\x72\x20\x0f\x33\xcd\x6d" + "\x36\x7d\x7b\xd5\x05\xf1\x4b\x05" + "\xc4\xfc\x7f\x80\xb9\x4d\xbd\xf7" + "\x7c\x84\x07\x01\xc2\x40\x66\x5b" + "\x98\xc7\x2c\xe3\x97\xfa\xdf\x87" + "\xa0\x1f\xe9\x21\x42\x0f\x3b\xeb" + "\x89\x1c\x3b\xca\x83\x61\x77\x68" + "\x84\xbb\x60\x87\x38\x2e\x25\xd5" + "\x9e\x04\x41\x70\xac\xda\xc0\x9c" + "\x9c\x69\xea\x8d\x4e\x55\x2a\x29" + "\xed\x05\x4b\x7b\x73\x71\x90\x59" + "\x4d\xc8\xd8\x44\xf0\x4c\xe1\x5e" + "\x84\x47\x55\xcc\x32\x3f\xe7\x97" + "\x42\xc6\x32\xac\x40\xe5\xa5\xc7" + "\x8b\xed\xdb\xf7\x83\xd6\xb1\xc2" + "\x52\x5e\x34\xb7\xeb\x6e\xd9\xfc" + "\xe5\x93\x9a\x97\x3e\xb0\xdc\xd9" + "\xd7\x06\x10\xb6\x1d\x80\x59\xdd" + "\x0d\xfe\x64\x35\xcd\x5d\xec\xf0" + "\xba\xd0\x34\xc9\x2d\x91\xc5\x17" + "\x11", + .ilen = 1281, + .rlen = 1281, + .also_non_np = 1, + .np = 3, + .tap = { 1200, 1, 80 }, + }, +}; + /* * CTS (Cipher Text Stealing) mode tests */ diff --git a/include/crypto/chacha20.h b/include/crypto/chacha20.h index fbec4e6a87890..6290d997060ec 100644 --- a/include/crypto/chacha20.h +++ b/include/crypto/chacha20.h @@ -1,6 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Common values for the ChaCha20 algorithm + * Common values and helper functions for the ChaCha20 and XChaCha20 algorithms. + * + * XChaCha20 extends ChaCha20's nonce to 192 bits, while provably retaining + * ChaCha20's security. Here they share the same key size, tfm context, and + * setkey function; only their IV size and encrypt/decrypt function differ. */ #ifndef _CRYPTO_CHACHA20_H @@ -10,10 +14,15 @@ #include #include +/* 32-bit stream position, then 96-bit nonce (RFC7539 convention) */ #define CHACHA20_IV_SIZE 16 + #define CHACHA20_KEY_SIZE 32 #define CHACHA20_BLOCK_SIZE 64 +/* 192-bit nonce, then 64-bit stream position */ +#define XCHACHA20_IV_SIZE 32 + struct chacha20_ctx { u32 key[8]; }; @@ -22,8 +31,11 @@ void chacha20_block(u32 *state, u8 *stream); void hchacha20_block(const u32 *in, u32 *out); void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv); + int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keysize); + int crypto_chacha20_crypt(struct skcipher_request *req); +int crypto_xchacha20_crypt(struct skcipher_request *req); #endif -- GitLab From fd5e6015bc3646ab5efa1152b4d4989d6e3cfb32 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 16 Nov 2018 17:26:21 -0800 Subject: [PATCH 1954/2269] BACKPORT, FROMGIT: crypto: chacha20-generic - refactor to allow varying number of rounds In preparation for adding XChaCha12 support, rename/refactor chacha20-generic to support different numbers of rounds. The justification for needing XChaCha12 support is explained in more detail in the patch "crypto: chacha - add XChaCha12 support". The only difference between ChaCha{8,12,20} are the number of rounds itself; all other parts of the algorithm are the same. Therefore, remove the "20" from all definitions, structures, functions, files, etc. that will be shared by all ChaCha versions. Also make ->setkey() store the round count in the chacha_ctx (previously chacha20_ctx). The generic code then passes the round count through to chacha_block(). There will be a ->setkey() function for each explicitly allowed round count; the encrypt/decrypt functions will be the same. I decided not to do it the opposite way (same ->setkey() function for all round counts, with different encrypt/decrypt functions) because that would have required more boilerplate code in architecture-specific implementations of ChaCha and XChaCha. Reviewed-by: Ard Biesheuvel Acked-by: Martin Willi Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu (cherry picked from commit 1ca1b917940c24ca3d1f490118c5474168622953 https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master) Conflicts: arch/arm64/crypto/chacha20-neon-glue.c arch/x86/crypto/chacha20_glue.c drivers/crypto/caam/caamalg.c drivers/crypto/caam/caamalg_qi2.c drivers/crypto/caam/compat.h include/crypto/chacha20.h Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: I7fa203ddc7095ce8675a32f49b8a5230cd0cf5f6 Signed-off-by: Eric Biggers --- arch/arm/crypto/chacha20-neon-glue.c | 40 +++---- arch/arm64/crypto/chacha20-neon-glue.c | 40 +++---- arch/x86/crypto/chacha20_glue.c | 52 ++++----- crypto/Makefile | 2 +- crypto/chacha20poly1305.c | 10 +- .../{chacha20_generic.c => chacha_generic.c} | 110 ++++++++++-------- drivers/char/random.c | 51 ++++---- include/crypto/chacha.h | 46 ++++++++ include/crypto/chacha20.h | 41 ------- lib/Makefile | 2 +- lib/{chacha20.c => chacha.c} | 43 ++++--- 11 files changed, 227 insertions(+), 210 deletions(-) rename crypto/{chacha20_generic.c => chacha_generic.c} (57%) create mode 100644 include/crypto/chacha.h delete mode 100644 include/crypto/chacha20.h rename lib/{chacha20.c => chacha.c} (67%) diff --git a/arch/arm/crypto/chacha20-neon-glue.c b/arch/arm/crypto/chacha20-neon-glue.c index 59a7be08e80ce..7386eb1c1889d 100644 --- a/arch/arm/crypto/chacha20-neon-glue.c +++ b/arch/arm/crypto/chacha20-neon-glue.c @@ -19,7 +19,7 @@ */ #include -#include +#include #include #include #include @@ -34,20 +34,20 @@ asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src); static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src, unsigned int bytes) { - u8 buf[CHACHA20_BLOCK_SIZE]; + u8 buf[CHACHA_BLOCK_SIZE]; - while (bytes >= CHACHA20_BLOCK_SIZE * 4) { + while (bytes >= CHACHA_BLOCK_SIZE * 4) { chacha20_4block_xor_neon(state, dst, src); - bytes -= CHACHA20_BLOCK_SIZE * 4; - src += CHACHA20_BLOCK_SIZE * 4; - dst += CHACHA20_BLOCK_SIZE * 4; + bytes -= CHACHA_BLOCK_SIZE * 4; + src += CHACHA_BLOCK_SIZE * 4; + dst += CHACHA_BLOCK_SIZE * 4; state[12] += 4; } - while (bytes >= CHACHA20_BLOCK_SIZE) { + while (bytes >= CHACHA_BLOCK_SIZE) { chacha20_block_xor_neon(state, dst, src); - bytes -= CHACHA20_BLOCK_SIZE; - src += CHACHA20_BLOCK_SIZE; - dst += CHACHA20_BLOCK_SIZE; + bytes -= CHACHA_BLOCK_SIZE; + src += CHACHA_BLOCK_SIZE; + dst += CHACHA_BLOCK_SIZE; state[12]++; } if (bytes) { @@ -60,17 +60,17 @@ static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src, static int chacha20_neon(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; u32 state[16]; int err; - if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd()) - return crypto_chacha20_crypt(req); + if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd()) + return crypto_chacha_crypt(req); err = skcipher_walk_virt(&walk, req, true); - crypto_chacha20_init(state, ctx, walk.iv); + crypto_chacha_init(state, ctx, walk.iv); kernel_neon_begin(); while (walk.nbytes > 0) { @@ -93,14 +93,14 @@ static struct skcipher_alg alg = { .base.cra_driver_name = "chacha20-neon", .base.cra_priority = 300, .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha20_ctx), + .base.cra_ctxsize = sizeof(struct chacha_ctx), .base.cra_module = THIS_MODULE, - .min_keysize = CHACHA20_KEY_SIZE, - .max_keysize = CHACHA20_KEY_SIZE, - .ivsize = CHACHA20_IV_SIZE, - .chunksize = CHACHA20_BLOCK_SIZE, - .walksize = 4 * CHACHA20_BLOCK_SIZE, + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = CHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .walksize = 4 * CHACHA_BLOCK_SIZE, .setkey = crypto_chacha20_setkey, .encrypt = chacha20_neon, .decrypt = chacha20_neon, diff --git a/arch/arm64/crypto/chacha20-neon-glue.c b/arch/arm64/crypto/chacha20-neon-glue.c index cbdb75d15cd03..f6cfab1bc0e2a 100644 --- a/arch/arm64/crypto/chacha20-neon-glue.c +++ b/arch/arm64/crypto/chacha20-neon-glue.c @@ -19,7 +19,7 @@ */ #include -#include +#include #include #include #include @@ -34,20 +34,20 @@ asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src); static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src, unsigned int bytes) { - u8 buf[CHACHA20_BLOCK_SIZE]; + u8 buf[CHACHA_BLOCK_SIZE]; - while (bytes >= CHACHA20_BLOCK_SIZE * 4) { + while (bytes >= CHACHA_BLOCK_SIZE * 4) { chacha20_4block_xor_neon(state, dst, src); - bytes -= CHACHA20_BLOCK_SIZE * 4; - src += CHACHA20_BLOCK_SIZE * 4; - dst += CHACHA20_BLOCK_SIZE * 4; + bytes -= CHACHA_BLOCK_SIZE * 4; + src += CHACHA_BLOCK_SIZE * 4; + dst += CHACHA_BLOCK_SIZE * 4; state[12] += 4; } - while (bytes >= CHACHA20_BLOCK_SIZE) { + while (bytes >= CHACHA_BLOCK_SIZE) { chacha20_block_xor_neon(state, dst, src); - bytes -= CHACHA20_BLOCK_SIZE; - src += CHACHA20_BLOCK_SIZE; - dst += CHACHA20_BLOCK_SIZE; + bytes -= CHACHA_BLOCK_SIZE; + src += CHACHA_BLOCK_SIZE; + dst += CHACHA_BLOCK_SIZE; state[12]++; } if (bytes) { @@ -60,17 +60,17 @@ static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src, static int chacha20_neon(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; u32 state[16]; int err; - if (!may_use_simd() || req->cryptlen <= CHACHA20_BLOCK_SIZE) - return crypto_chacha20_crypt(req); + if (!may_use_simd() || req->cryptlen <= CHACHA_BLOCK_SIZE) + return crypto_chacha_crypt(req); err = skcipher_walk_virt(&walk, req, true); - crypto_chacha20_init(state, ctx, walk.iv); + crypto_chacha_init(state, ctx, walk.iv); kernel_neon_begin(); while (walk.nbytes > 0) { @@ -93,14 +93,14 @@ static struct skcipher_alg alg = { .base.cra_driver_name = "chacha20-neon", .base.cra_priority = 300, .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha20_ctx), + .base.cra_ctxsize = sizeof(struct chacha_ctx), .base.cra_module = THIS_MODULE, - .min_keysize = CHACHA20_KEY_SIZE, - .max_keysize = CHACHA20_KEY_SIZE, - .ivsize = CHACHA20_IV_SIZE, - .chunksize = CHACHA20_BLOCK_SIZE, - .walksize = 4 * CHACHA20_BLOCK_SIZE, + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = CHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .walksize = 4 * CHACHA_BLOCK_SIZE, .setkey = crypto_chacha20_setkey, .encrypt = chacha20_neon, .decrypt = chacha20_neon, diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c index 1e6af1b35f7b4..dfc91b938323c 100644 --- a/arch/x86/crypto/chacha20_glue.c +++ b/arch/x86/crypto/chacha20_glue.c @@ -10,7 +10,7 @@ */ #include -#include +#include #include #include #include @@ -29,31 +29,31 @@ static bool chacha20_use_avx2; static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src, unsigned int bytes) { - u8 buf[CHACHA20_BLOCK_SIZE]; + u8 buf[CHACHA_BLOCK_SIZE]; #ifdef CONFIG_AS_AVX2 if (chacha20_use_avx2) { - while (bytes >= CHACHA20_BLOCK_SIZE * 8) { + while (bytes >= CHACHA_BLOCK_SIZE * 8) { chacha20_8block_xor_avx2(state, dst, src); - bytes -= CHACHA20_BLOCK_SIZE * 8; - src += CHACHA20_BLOCK_SIZE * 8; - dst += CHACHA20_BLOCK_SIZE * 8; + bytes -= CHACHA_BLOCK_SIZE * 8; + src += CHACHA_BLOCK_SIZE * 8; + dst += CHACHA_BLOCK_SIZE * 8; state[12] += 8; } } #endif - while (bytes >= CHACHA20_BLOCK_SIZE * 4) { + while (bytes >= CHACHA_BLOCK_SIZE * 4) { chacha20_4block_xor_ssse3(state, dst, src); - bytes -= CHACHA20_BLOCK_SIZE * 4; - src += CHACHA20_BLOCK_SIZE * 4; - dst += CHACHA20_BLOCK_SIZE * 4; + bytes -= CHACHA_BLOCK_SIZE * 4; + src += CHACHA_BLOCK_SIZE * 4; + dst += CHACHA_BLOCK_SIZE * 4; state[12] += 4; } - while (bytes >= CHACHA20_BLOCK_SIZE) { + while (bytes >= CHACHA_BLOCK_SIZE) { chacha20_block_xor_ssse3(state, dst, src); - bytes -= CHACHA20_BLOCK_SIZE; - src += CHACHA20_BLOCK_SIZE; - dst += CHACHA20_BLOCK_SIZE; + bytes -= CHACHA_BLOCK_SIZE; + src += CHACHA_BLOCK_SIZE; + dst += CHACHA_BLOCK_SIZE; state[12]++; } if (bytes) { @@ -66,7 +66,7 @@ static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src, static int chacha20_simd(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); u32 *state, state_buf[16 + 2] __aligned(8); struct skcipher_walk walk; int err; @@ -74,20 +74,20 @@ static int chacha20_simd(struct skcipher_request *req) BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16); state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN); - if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd()) - return crypto_chacha20_crypt(req); + if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd()) + return crypto_chacha_crypt(req); err = skcipher_walk_virt(&walk, req, true); - crypto_chacha20_init(state, ctx, walk.iv); + crypto_chacha_init(state, ctx, walk.iv); kernel_fpu_begin(); - while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { + while (walk.nbytes >= CHACHA_BLOCK_SIZE) { chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, - rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE)); + rounddown(walk.nbytes, CHACHA_BLOCK_SIZE)); err = skcipher_walk_done(&walk, - walk.nbytes % CHACHA20_BLOCK_SIZE); + walk.nbytes % CHACHA_BLOCK_SIZE); } if (walk.nbytes) { @@ -106,14 +106,14 @@ static struct skcipher_alg alg = { .base.cra_driver_name = "chacha20-simd", .base.cra_priority = 300, .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha20_ctx), + .base.cra_ctxsize = sizeof(struct chacha_ctx), .base.cra_alignmask = sizeof(u32) - 1, .base.cra_module = THIS_MODULE, - .min_keysize = CHACHA20_KEY_SIZE, - .max_keysize = CHACHA20_KEY_SIZE, - .ivsize = CHACHA20_IV_SIZE, - .chunksize = CHACHA20_BLOCK_SIZE, + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = CHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, .setkey = crypto_chacha20_setkey, .encrypt = chacha20_simd, .decrypt = chacha20_simd, diff --git a/crypto/Makefile b/crypto/Makefile index e24b837d786bc..4d77e00507285 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -110,7 +110,7 @@ obj-$(CONFIG_CRYPTO_KHAZAD) += khazad.o obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o obj-$(CONFIG_CRYPTO_SEED) += seed.o obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o -obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_generic.o +obj-$(CONFIG_CRYPTO_CHACHA20) += chacha_generic.o obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c index 600afa99941fe..573c07e6f189e 100644 --- a/crypto/chacha20poly1305.c +++ b/crypto/chacha20poly1305.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include @@ -51,7 +51,7 @@ struct poly_req { }; struct chacha_req { - u8 iv[CHACHA20_IV_SIZE]; + u8 iv[CHACHA_IV_SIZE]; struct scatterlist src[1]; struct skcipher_request req; /* must be last member */ }; @@ -91,7 +91,7 @@ static void chacha_iv(u8 *iv, struct aead_request *req, u32 icb) memcpy(iv, &leicb, sizeof(leicb)); memcpy(iv + sizeof(leicb), ctx->salt, ctx->saltlen); memcpy(iv + sizeof(leicb) + ctx->saltlen, req->iv, - CHACHA20_IV_SIZE - sizeof(leicb) - ctx->saltlen); + CHACHA_IV_SIZE - sizeof(leicb) - ctx->saltlen); } static int poly_verify_tag(struct aead_request *req) @@ -494,7 +494,7 @@ static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key, struct chachapoly_ctx *ctx = crypto_aead_ctx(aead); int err; - if (keylen != ctx->saltlen + CHACHA20_KEY_SIZE) + if (keylen != ctx->saltlen + CHACHA_KEY_SIZE) return -EINVAL; keylen -= ctx->saltlen; @@ -639,7 +639,7 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, err = -EINVAL; /* Need 16-byte IV size, including Initial Block Counter value */ - if (crypto_skcipher_alg_ivsize(chacha) != CHACHA20_IV_SIZE) + if (crypto_skcipher_alg_ivsize(chacha) != CHACHA_IV_SIZE) goto out_drop_chacha; /* Not a stream cipher? */ if (chacha->base.cra_blocksize != 1) diff --git a/crypto/chacha20_generic.c b/crypto/chacha_generic.c similarity index 57% rename from crypto/chacha20_generic.c rename to crypto/chacha_generic.c index 4305b1b62b16a..438f15a14054f 100644 --- a/crypto/chacha20_generic.c +++ b/crypto/chacha_generic.c @@ -12,33 +12,33 @@ #include #include -#include +#include #include #include -static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src, - unsigned int bytes) +static void chacha_docrypt(u32 *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) { /* aligned to potentially speed up crypto_xor() */ - u8 stream[CHACHA20_BLOCK_SIZE] __aligned(sizeof(long)); + u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long)); if (dst != src) memcpy(dst, src, bytes); - while (bytes >= CHACHA20_BLOCK_SIZE) { - chacha20_block(state, stream); - crypto_xor(dst, stream, CHACHA20_BLOCK_SIZE); - bytes -= CHACHA20_BLOCK_SIZE; - dst += CHACHA20_BLOCK_SIZE; + while (bytes >= CHACHA_BLOCK_SIZE) { + chacha_block(state, stream, nrounds); + crypto_xor(dst, stream, CHACHA_BLOCK_SIZE); + bytes -= CHACHA_BLOCK_SIZE; + dst += CHACHA_BLOCK_SIZE; } if (bytes) { - chacha20_block(state, stream); + chacha_block(state, stream, nrounds); crypto_xor(dst, stream, bytes); } } -static int chacha20_stream_xor(struct skcipher_request *req, - struct chacha20_ctx *ctx, u8 *iv) +static int chacha_stream_xor(struct skcipher_request *req, + struct chacha_ctx *ctx, u8 *iv) { struct skcipher_walk walk; u32 state[16]; @@ -46,7 +46,7 @@ static int chacha20_stream_xor(struct skcipher_request *req, err = skcipher_walk_virt(&walk, req, false); - crypto_chacha20_init(state, ctx, iv); + crypto_chacha_init(state, ctx, iv); while (walk.nbytes > 0) { unsigned int nbytes = walk.nbytes; @@ -54,15 +54,15 @@ static int chacha20_stream_xor(struct skcipher_request *req, if (nbytes < walk.total) nbytes = round_down(nbytes, walk.stride); - chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr, - nbytes); + chacha_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr, + nbytes, ctx->nrounds); err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } return err; } -void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv) +void crypto_chacha_init(u32 *state, struct chacha_ctx *ctx, u8 *iv) { state[0] = 0x61707865; /* "expa" */ state[1] = 0x3320646e; /* "nd 3" */ @@ -81,53 +81,61 @@ void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv) state[14] = get_unaligned_le32(iv + 8); state[15] = get_unaligned_le32(iv + 12); } -EXPORT_SYMBOL_GPL(crypto_chacha20_init); +EXPORT_SYMBOL_GPL(crypto_chacha_init); -int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keysize) +static int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keysize, int nrounds) { - struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); int i; - if (keysize != CHACHA20_KEY_SIZE) + if (keysize != CHACHA_KEY_SIZE) return -EINVAL; for (i = 0; i < ARRAY_SIZE(ctx->key); i++) ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32)); + ctx->nrounds = nrounds; return 0; } + +int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keysize) +{ + return chacha_setkey(tfm, key, keysize, 20); +} EXPORT_SYMBOL_GPL(crypto_chacha20_setkey); -int crypto_chacha20_crypt(struct skcipher_request *req) +int crypto_chacha_crypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - return chacha20_stream_xor(req, ctx, req->iv); + return chacha_stream_xor(req, ctx, req->iv); } -EXPORT_SYMBOL_GPL(crypto_chacha20_crypt); +EXPORT_SYMBOL_GPL(crypto_chacha_crypt); -int crypto_xchacha20_crypt(struct skcipher_request *req) +int crypto_xchacha_crypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); - struct chacha20_ctx subctx; + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + struct chacha_ctx subctx; u32 state[16]; u8 real_iv[16]; /* Compute the subkey given the original key and first 128 nonce bits */ - crypto_chacha20_init(state, ctx, req->iv); - hchacha20_block(state, subctx.key); + crypto_chacha_init(state, ctx, req->iv); + hchacha_block(state, subctx.key, ctx->nrounds); + subctx.nrounds = ctx->nrounds; /* Build the real IV */ memcpy(&real_iv[0], req->iv + 24, 8); /* stream position */ memcpy(&real_iv[8], req->iv + 16, 8); /* remaining 64 nonce bits */ /* Generate the stream and XOR it with the data */ - return chacha20_stream_xor(req, &subctx, real_iv); + return chacha_stream_xor(req, &subctx, real_iv); } -EXPORT_SYMBOL_GPL(crypto_xchacha20_crypt); +EXPORT_SYMBOL_GPL(crypto_xchacha_crypt); static struct skcipher_alg algs[] = { { @@ -135,50 +143,50 @@ static struct skcipher_alg algs[] = { .base.cra_driver_name = "chacha20-generic", .base.cra_priority = 100, .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha20_ctx), + .base.cra_ctxsize = sizeof(struct chacha_ctx), .base.cra_module = THIS_MODULE, - .min_keysize = CHACHA20_KEY_SIZE, - .max_keysize = CHACHA20_KEY_SIZE, - .ivsize = CHACHA20_IV_SIZE, - .chunksize = CHACHA20_BLOCK_SIZE, + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = CHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, .setkey = crypto_chacha20_setkey, - .encrypt = crypto_chacha20_crypt, - .decrypt = crypto_chacha20_crypt, + .encrypt = crypto_chacha_crypt, + .decrypt = crypto_chacha_crypt, }, { .base.cra_name = "xchacha20", .base.cra_driver_name = "xchacha20-generic", .base.cra_priority = 100, .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha20_ctx), + .base.cra_ctxsize = sizeof(struct chacha_ctx), .base.cra_module = THIS_MODULE, - .min_keysize = CHACHA20_KEY_SIZE, - .max_keysize = CHACHA20_KEY_SIZE, - .ivsize = XCHACHA20_IV_SIZE, - .chunksize = CHACHA20_BLOCK_SIZE, + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, .setkey = crypto_chacha20_setkey, - .encrypt = crypto_xchacha20_crypt, - .decrypt = crypto_xchacha20_crypt, + .encrypt = crypto_xchacha_crypt, + .decrypt = crypto_xchacha_crypt, } }; -static int __init chacha20_generic_mod_init(void) +static int __init chacha_generic_mod_init(void) { return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); } -static void __exit chacha20_generic_mod_fini(void) +static void __exit chacha_generic_mod_fini(void) { crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); } -module_init(chacha20_generic_mod_init); -module_exit(chacha20_generic_mod_fini); +module_init(chacha_generic_mod_init); +module_exit(chacha_generic_mod_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Martin Willi "); -MODULE_DESCRIPTION("ChaCha20 and XChaCha20 stream ciphers (generic)"); +MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (generic)"); MODULE_ALIAS_CRYPTO("chacha20"); MODULE_ALIAS_CRYPTO("chacha20-generic"); MODULE_ALIAS_CRYPTO("xchacha20"); diff --git a/drivers/char/random.c b/drivers/char/random.c index 817a8227d1db4..7eba05e2d5f2e 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -265,7 +265,7 @@ #include #include #include -#include +#include #include #include @@ -431,11 +431,10 @@ static int crng_init = 0; #define crng_ready() (likely(crng_init > 1)) static int crng_init_cnt = 0; static unsigned long crng_global_init_time = 0; -#define CRNG_INIT_CNT_THRESH (2*CHACHA20_KEY_SIZE) -static void _extract_crng(struct crng_state *crng, - __u8 out[CHACHA20_BLOCK_SIZE]); +#define CRNG_INIT_CNT_THRESH (2*CHACHA_KEY_SIZE) +static void _extract_crng(struct crng_state *crng, __u8 out[CHACHA_BLOCK_SIZE]); static void _crng_backtrack_protect(struct crng_state *crng, - __u8 tmp[CHACHA20_BLOCK_SIZE], int used); + __u8 tmp[CHACHA_BLOCK_SIZE], int used); static void process_random_ready_list(void); static void _get_random_bytes(void *buf, int nbytes); @@ -848,7 +847,7 @@ static int crng_fast_load(const char *cp, size_t len) } p = (unsigned char *) &primary_crng.state[4]; while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) { - p[crng_init_cnt % CHACHA20_KEY_SIZE] ^= *cp; + p[crng_init_cnt % CHACHA_KEY_SIZE] ^= *cp; cp++; crng_init_cnt++; len--; } spin_unlock_irqrestore(&primary_crng.lock, flags); @@ -880,7 +879,7 @@ static int crng_slow_load(const char *cp, size_t len) unsigned long flags; static unsigned char lfsr = 1; unsigned char tmp; - unsigned i, max = CHACHA20_KEY_SIZE; + unsigned i, max = CHACHA_KEY_SIZE; const char * src_buf = cp; char * dest_buf = (char *) &primary_crng.state[4]; @@ -898,8 +897,8 @@ static int crng_slow_load(const char *cp, size_t len) lfsr >>= 1; if (tmp & 1) lfsr ^= 0xE1; - tmp = dest_buf[i % CHACHA20_KEY_SIZE]; - dest_buf[i % CHACHA20_KEY_SIZE] ^= src_buf[i % len] ^ lfsr; + tmp = dest_buf[i % CHACHA_KEY_SIZE]; + dest_buf[i % CHACHA_KEY_SIZE] ^= src_buf[i % len] ^ lfsr; lfsr += (tmp << 3) | (tmp >> 5); } spin_unlock_irqrestore(&primary_crng.lock, flags); @@ -911,7 +910,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) unsigned long flags; int i, num; union { - __u8 block[CHACHA20_BLOCK_SIZE]; + __u8 block[CHACHA_BLOCK_SIZE]; __u32 key[8]; } buf; @@ -922,7 +921,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) } else { _extract_crng(&primary_crng, buf.block); _crng_backtrack_protect(&primary_crng, buf.block, - CHACHA20_KEY_SIZE); + CHACHA_KEY_SIZE); } spin_lock_irqsave(&crng->lock, flags); for (i = 0; i < 8; i++) { @@ -958,7 +957,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) } static void _extract_crng(struct crng_state *crng, - __u8 out[CHACHA20_BLOCK_SIZE]) + __u8 out[CHACHA_BLOCK_SIZE]) { unsigned long v, flags; @@ -975,7 +974,7 @@ static void _extract_crng(struct crng_state *crng, spin_unlock_irqrestore(&crng->lock, flags); } -static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE]) +static void extract_crng(__u8 out[CHACHA_BLOCK_SIZE]) { struct crng_state *crng = NULL; @@ -993,14 +992,14 @@ static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE]) * enough) to mutate the CRNG key to provide backtracking protection. */ static void _crng_backtrack_protect(struct crng_state *crng, - __u8 tmp[CHACHA20_BLOCK_SIZE], int used) + __u8 tmp[CHACHA_BLOCK_SIZE], int used) { unsigned long flags; __u32 *s, *d; int i; used = round_up(used, sizeof(__u32)); - if (used + CHACHA20_KEY_SIZE > CHACHA20_BLOCK_SIZE) { + if (used + CHACHA_KEY_SIZE > CHACHA_BLOCK_SIZE) { extract_crng(tmp); used = 0; } @@ -1012,7 +1011,7 @@ static void _crng_backtrack_protect(struct crng_state *crng, spin_unlock_irqrestore(&crng->lock, flags); } -static void crng_backtrack_protect(__u8 tmp[CHACHA20_BLOCK_SIZE], int used) +static void crng_backtrack_protect(__u8 tmp[CHACHA_BLOCK_SIZE], int used) { struct crng_state *crng = NULL; @@ -1027,8 +1026,8 @@ static void crng_backtrack_protect(__u8 tmp[CHACHA20_BLOCK_SIZE], int used) static ssize_t extract_crng_user(void __user *buf, size_t nbytes) { - ssize_t ret = 0, i = CHACHA20_BLOCK_SIZE; - __u8 tmp[CHACHA20_BLOCK_SIZE] __aligned(4); + ssize_t ret = 0, i = CHACHA_BLOCK_SIZE; + __u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4); int large_request = (nbytes > 256); while (nbytes) { @@ -1042,7 +1041,7 @@ static ssize_t extract_crng_user(void __user *buf, size_t nbytes) } extract_crng(tmp); - i = min_t(int, nbytes, CHACHA20_BLOCK_SIZE); + i = min_t(int, nbytes, CHACHA_BLOCK_SIZE); if (copy_to_user(buf, tmp, i)) { ret = -EFAULT; break; @@ -1614,14 +1613,14 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, */ static void _get_random_bytes(void *buf, int nbytes) { - __u8 tmp[CHACHA20_BLOCK_SIZE] __aligned(4); + __u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4); trace_get_random_bytes(nbytes, _RET_IP_); - while (nbytes >= CHACHA20_BLOCK_SIZE) { + while (nbytes >= CHACHA_BLOCK_SIZE) { extract_crng(buf); - buf += CHACHA20_BLOCK_SIZE; - nbytes -= CHACHA20_BLOCK_SIZE; + buf += CHACHA_BLOCK_SIZE; + nbytes -= CHACHA_BLOCK_SIZE; } if (nbytes > 0) { @@ -1629,7 +1628,7 @@ static void _get_random_bytes(void *buf, int nbytes) memcpy(buf, tmp, nbytes); crng_backtrack_protect(tmp, nbytes); } else - crng_backtrack_protect(tmp, CHACHA20_BLOCK_SIZE); + crng_backtrack_protect(tmp, CHACHA_BLOCK_SIZE); memzero_explicit(tmp, sizeof(tmp)); } @@ -2184,8 +2183,8 @@ struct ctl_table random_table[] = { struct batched_entropy { union { - u64 entropy_u64[CHACHA20_BLOCK_SIZE / sizeof(u64)]; - u32 entropy_u32[CHACHA20_BLOCK_SIZE / sizeof(u32)]; + u64 entropy_u64[CHACHA_BLOCK_SIZE / sizeof(u64)]; + u32 entropy_u32[CHACHA_BLOCK_SIZE / sizeof(u32)]; }; unsigned int position; }; diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h new file mode 100644 index 0000000000000..ae79e9983c72f --- /dev/null +++ b/include/crypto/chacha.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common values and helper functions for the ChaCha and XChaCha stream ciphers. + * + * XChaCha extends ChaCha's nonce to 192 bits, while provably retaining ChaCha's + * security. Here they share the same key size, tfm context, and setkey + * function; only their IV size and encrypt/decrypt function differ. + */ + +#ifndef _CRYPTO_CHACHA_H +#define _CRYPTO_CHACHA_H + +#include +#include +#include + +/* 32-bit stream position, then 96-bit nonce (RFC7539 convention) */ +#define CHACHA_IV_SIZE 16 + +#define CHACHA_KEY_SIZE 32 +#define CHACHA_BLOCK_SIZE 64 + +/* 192-bit nonce, then 64-bit stream position */ +#define XCHACHA_IV_SIZE 32 + +struct chacha_ctx { + u32 key[8]; + int nrounds; +}; + +void chacha_block(u32 *state, u8 *stream, int nrounds); +static inline void chacha20_block(u32 *state, u8 *stream) +{ + chacha_block(state, stream, 20); +} +void hchacha_block(const u32 *in, u32 *out, int nrounds); + +void crypto_chacha_init(u32 *state, struct chacha_ctx *ctx, u8 *iv); + +int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keysize); + +int crypto_chacha_crypt(struct skcipher_request *req); +int crypto_xchacha_crypt(struct skcipher_request *req); + +#endif /* _CRYPTO_CHACHA_H */ diff --git a/include/crypto/chacha20.h b/include/crypto/chacha20.h deleted file mode 100644 index 6290d997060ec..0000000000000 --- a/include/crypto/chacha20.h +++ /dev/null @@ -1,41 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Common values and helper functions for the ChaCha20 and XChaCha20 algorithms. - * - * XChaCha20 extends ChaCha20's nonce to 192 bits, while provably retaining - * ChaCha20's security. Here they share the same key size, tfm context, and - * setkey function; only their IV size and encrypt/decrypt function differ. - */ - -#ifndef _CRYPTO_CHACHA20_H -#define _CRYPTO_CHACHA20_H - -#include -#include -#include - -/* 32-bit stream position, then 96-bit nonce (RFC7539 convention) */ -#define CHACHA20_IV_SIZE 16 - -#define CHACHA20_KEY_SIZE 32 -#define CHACHA20_BLOCK_SIZE 64 - -/* 192-bit nonce, then 64-bit stream position */ -#define XCHACHA20_IV_SIZE 32 - -struct chacha20_ctx { - u32 key[8]; -}; - -void chacha20_block(u32 *state, u8 *stream); -void hchacha20_block(const u32 *in, u32 *out); - -void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv); - -int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keysize); - -int crypto_chacha20_crypt(struct skcipher_request *req); -int crypto_xchacha20_crypt(struct skcipher_request *req); - -#endif diff --git a/lib/Makefile b/lib/Makefile index b8f2c16fccaa9..a7c93b916010f 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -20,7 +20,7 @@ KCOV_INSTRUMENT_dynamic_debug.o := n lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o timerqueue.o\ idr.o int_sqrt.o extable.o \ - sha1.o chacha20.o irq_regs.o argv_split.o \ + sha1.o chacha.o irq_regs.o argv_split.o \ flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o seq_buf.o siphash.o \ diff --git a/lib/chacha20.c b/lib/chacha.c similarity index 67% rename from lib/chacha20.c rename to lib/chacha.c index 6a484e16171d1..1bdc688c18df6 100644 --- a/lib/chacha20.c +++ b/lib/chacha.c @@ -1,5 +1,5 @@ /* - * The "hash function" used as the core of the ChaCha20 stream cipher (RFC7539) + * The "hash function" used as the core of the ChaCha stream cipher (RFC7539) * * Copyright (C) 2015 Martin Willi * @@ -14,13 +14,16 @@ #include #include #include -#include +#include -static void chacha20_permute(u32 *x) +static void chacha_permute(u32 *x, int nrounds) { int i; - for (i = 0; i < 20; i += 2) { + /* whitelist the allowed round counts */ + WARN_ON_ONCE(nrounds != 20); + + for (i = 0; i < nrounds; i += 2) { x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16); x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16); x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 16); @@ -64,49 +67,51 @@ static void chacha20_permute(u32 *x) } /** - * chacha20_block - generate one keystream block and increment block counter + * chacha_block - generate one keystream block and increment block counter * @state: input state matrix (16 32-bit words) * @stream: output keystream block (64 bytes) + * @nrounds: number of rounds (currently must be 20) * - * This is the ChaCha20 core, a function from 64-byte strings to 64-byte - * strings. The caller has already converted the endianness of the input. This - * function also handles incrementing the block counter in the input matrix. + * This is the ChaCha core, a function from 64-byte strings to 64-byte strings. + * The caller has already converted the endianness of the input. This function + * also handles incrementing the block counter in the input matrix. */ -void chacha20_block(u32 *state, u8 *stream) +void chacha_block(u32 *state, u8 *stream, int nrounds) { u32 x[16]; int i; memcpy(x, state, 64); - chacha20_permute(x); + chacha_permute(x, nrounds); for (i = 0; i < ARRAY_SIZE(x); i++) put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]); state[12]++; } -EXPORT_SYMBOL(chacha20_block); +EXPORT_SYMBOL(chacha_block); /** - * hchacha20_block - abbreviated ChaCha20 core, for XChaCha20 + * hchacha_block - abbreviated ChaCha core, for XChaCha * @in: input state matrix (16 32-bit words) * @out: output (8 32-bit words) + * @nrounds: number of rounds (currently must be 20) * - * HChaCha20 is the ChaCha equivalent of HSalsa20 and is an intermediate step - * towards XChaCha20 (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). - * HChaCha20 skips the final addition of the initial state, and outputs only - * certain words of the state. It should not be used for streaming directly. + * HChaCha is the ChaCha equivalent of HSalsa and is an intermediate step + * towards XChaCha (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). HChaCha + * skips the final addition of the initial state, and outputs only certain words + * of the state. It should not be used for streaming directly. */ -void hchacha20_block(const u32 *in, u32 *out) +void hchacha_block(const u32 *in, u32 *out, int nrounds) { u32 x[16]; memcpy(x, in, 64); - chacha20_permute(x); + chacha_permute(x, nrounds); memcpy(&out[0], &x[0], 16); memcpy(&out[4], &x[12], 16); } -EXPORT_SYMBOL(hchacha20_block); +EXPORT_SYMBOL(hchacha_block); -- GitLab From 4c11fdd7011e33345609a07b3514cdf63ced92ed Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 16 Nov 2018 17:26:22 -0800 Subject: [PATCH 1955/2269] BACKPORT, FROMGIT: crypto: chacha - add XChaCha12 support Now that the generic implementation of ChaCha20 has been refactored to allow varying the number of rounds, add support for XChaCha12, which is the XSalsa construction applied to ChaCha12. ChaCha12 is one of the three ciphers specified by the original ChaCha paper (https://cr.yp.to/chacha/chacha-20080128.pdf: "ChaCha, a variant of Salsa20"), alongside ChaCha8 and ChaCha20. ChaCha12 is faster than ChaCha20 but has a lower, but still large, security margin. We need XChaCha12 support so that it can be used in the Adiantum encryption mode, which enables disk/file encryption on low-end mobile devices where AES-XTS is too slow as the CPUs lack AES instructions. We'd prefer XChaCha20 (the more popular variant), but it's too slow on some of our target devices, so at least in some cases we do need the XChaCha12-based version. In more detail, the problem is that Adiantum is still much slower than we're happy with, and encryption still has a quite noticeable effect on the feel of low-end devices. Users and vendors push back hard against encryption that degrades the user experience, which always risks encryption being disabled entirely. So we need to choose the fastest option that gives us a solid margin of security, and here that's XChaCha12. The best known attack on ChaCha breaks only 7 rounds and has 2^235 time complexity, so ChaCha12's security margin is still better than AES-256's. Much has been learned about cryptanalysis of ARX ciphers since Salsa20 was originally designed in 2005, and it now seems we can be comfortable with a smaller number of rounds. The eSTREAM project also suggests the 12-round version of Salsa20 as providing the best balance among the different variants: combining very good performance with a "comfortable margin of security". Note that it would be trivial to add vanilla ChaCha12 in addition to XChaCha12. However, it's unneeded for now and therefore is omitted. As discussed in the patch that introduced XChaCha20 support, I considered splitting the code into separate chacha-common, chacha20, xchacha20, and xchacha12 modules, so that these algorithms could be enabled/disabled independently. However, since nearly all the code is shared anyway, I ultimately decided there would have been little benefit to the added complexity. Reviewed-by: Ard Biesheuvel Acked-by: Martin Willi Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu (cherry picked from commit aa7624093cb7fbf4fea95e612580d8d29a819f67 https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master) (adjusted test vector formatting for old testmgr) Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: I876a5be92e9f583effcd35a4b66a36608ac581f0 Signed-off-by: Eric Biggers --- crypto/Kconfig | 8 +- crypto/chacha_generic.c | 26 +- crypto/testmgr.c | 9 + crypto/testmgr.h | 584 ++++++++++++++++++++++++++++++++++++++++ include/crypto/chacha.h | 7 + lib/chacha.c | 6 +- 6 files changed, 634 insertions(+), 6 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 9db49135bdfb1..4d1f995cb4e48 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1325,10 +1325,10 @@ config CRYPTO_SALSA20 Bernstein . See config CRYPTO_CHACHA20 - tristate "ChaCha20 stream cipher algorithms" + tristate "ChaCha stream cipher algorithms" select CRYPTO_BLKCIPHER help - The ChaCha20 and XChaCha20 stream cipher algorithms. + The ChaCha20, XChaCha20, and XChaCha12 stream cipher algorithms. ChaCha20 is a 256-bit high-speed stream cipher designed by Daniel J. Bernstein and further specified in RFC7539 for use in IETF protocols. @@ -1341,6 +1341,10 @@ config CRYPTO_CHACHA20 while provably retaining ChaCha20's security. See also: + XChaCha12 is XChaCha20 reduced to 12 rounds, with correspondingly + reduced security margin but increased performance. It can be needed + in some performance-sensitive scenarios. + config CRYPTO_CHACHA20_X86_64 tristate "ChaCha20 cipher algorithm (x86_64/SSSE3/AVX2)" depends on X86 && 64BIT diff --git a/crypto/chacha_generic.c b/crypto/chacha_generic.c index 438f15a14054f..35b583101f4f3 100644 --- a/crypto/chacha_generic.c +++ b/crypto/chacha_generic.c @@ -1,5 +1,5 @@ /* - * ChaCha20 (RFC7539) and XChaCha20 stream cipher algorithms + * ChaCha and XChaCha stream ciphers, including ChaCha20 (RFC7539) * * Copyright (C) 2015 Martin Willi * Copyright (C) 2018 Google LLC @@ -106,6 +106,13 @@ int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, } EXPORT_SYMBOL_GPL(crypto_chacha20_setkey); +int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keysize) +{ + return chacha_setkey(tfm, key, keysize, 12); +} +EXPORT_SYMBOL_GPL(crypto_chacha12_setkey); + int crypto_chacha_crypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); @@ -168,6 +175,21 @@ static struct skcipher_alg algs[] = { .setkey = crypto_chacha20_setkey, .encrypt = crypto_xchacha_crypt, .decrypt = crypto_xchacha_crypt, + }, { + .base.cra_name = "xchacha12", + .base.cra_driver_name = "xchacha12-generic", + .base.cra_priority = 100, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = crypto_chacha12_setkey, + .encrypt = crypto_xchacha_crypt, + .decrypt = crypto_xchacha_crypt, } }; @@ -191,3 +213,5 @@ MODULE_ALIAS_CRYPTO("chacha20"); MODULE_ALIAS_CRYPTO("chacha20-generic"); MODULE_ALIAS_CRYPTO("xchacha20"); MODULE_ALIAS_CRYPTO("xchacha20-generic"); +MODULE_ALIAS_CRYPTO("xchacha12"); +MODULE_ALIAS_CRYPTO("xchacha12-generic"); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index dd92cc29e6efe..b93c367e99649 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -3547,6 +3547,15 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .hash = __VECS(aes_xcbc128_tv_template) } + }, { + .alg = "xchacha12", + .test = alg_test_skcipher, + .suite = { + .cipher = { + .enc = __VECS(xchacha12_tv_template), + .dec = __VECS(xchacha12_tv_template) + } + }, }, { .alg = "xchacha20", .test = alg_test_skcipher, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 76420201ed7e5..5ad625a8ed2dc 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -33642,6 +33642,590 @@ static const struct cipher_testvec xchacha20_tv_template[] = { }, }; +/* + * Same as XChaCha20 test vectors above, but recomputed the ciphertext with + * XChaCha12, using a modified libsodium. + */ +static const struct cipher_testvec xchacha12_tv_template[] = { + { + .key = "\x79\xc9\x97\x98\xac\x67\x30\x0b" + "\xbb\x27\x04\xc9\x5c\x34\x1e\x32" + "\x45\xf3\xdc\xb2\x17\x61\xb9\x8e" + "\x52\xff\x45\xb2\x4f\x30\x4f\xc4", + .klen = 32, + .iv = "\xb3\x3f\xfd\x30\x96\x47\x9b\xcf" + "\xbc\x9a\xee\x49\x41\x76\x88\xa0" + "\xa2\x55\x4f\x8d\x95\x38\x94\x19" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .input = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00", + .result = "\x1b\x78\x7f\xd7\xa1\x41\x68\xab" + "\x3d\x3f\xd1\x7b\x69\x56\xb2\xd5" + "\x43\xce\xeb\xaf\x36\xf0\x29\x9d" + "\x3a\xfb\x18\xae\x1b", + .ilen = 29, + .rlen = 29, + }, { + .key = "\x9d\x23\xbd\x41\x49\xcb\x97\x9c" + "\xcf\x3c\x5c\x94\xdd\x21\x7e\x98" + "\x08\xcb\x0e\x50\xcd\x0f\x67\x81" + "\x22\x35\xea\xaf\x60\x1d\x62\x32", + .klen = 32, + .iv = "\xc0\x47\x54\x82\x66\xb7\xc3\x70" + "\xd3\x35\x66\xa2\x42\x5c\xbf\x30" + "\xd8\x2d\x1e\xaf\x52\x94\x10\x9e" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .input = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00", + .result = "\xfb\x32\x09\x1d\x83\x05\xae\x4c" + "\x13\x1f\x12\x71\xf2\xca\xb2\xeb" + "\x5b\x83\x14\x7d\x83\xf6\x57\x77" + "\x2e\x40\x1f\x92\x2c\xf9\xec\x35" + "\x34\x1f\x93\xdf\xfb\x30\xd7\x35" + "\x03\x05\x78\xc1\x20\x3b\x7a\xe3" + "\x62\xa3\x89\xdc\x11\x11\x45\xa8" + "\x82\x89\xa0\xf1\x4e\xc7\x0f\x11" + "\x69\xdd\x0c\x84\x2b\x89\x5c\xdc" + "\xf0\xde\x01\xef\xc5\x65\x79\x23" + "\x87\x67\xd6\x50\xd9\x8d\xd9\x92" + "\x54\x5b\x0e", + .ilen = 91, + .rlen = 91, + }, { + .key = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x67\xc6\x69\x73" + "\x51\xff\x4a\xec\x29\xcd\xba\xab" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .input = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .result = "\xdf\x2d\xc6\x21\x2a\x9d\xa1\xbb" + "\xc2\x77\x66\x0c\x5c\x46\xef\xa7" + "\x79\x1b\xb9\xdf\x55\xe2\xf9\x61" + "\x4c\x7b\xa4\x52\x24\xaf\xa2\xda" + "\xd1\x8f\x8f\xa2\x9e\x53\x4d\xc4" + "\xb8\x55\x98\x08\x7c\x08\xd4\x18" + "\x67\x8f\xef\x50\xb1\x5f\xa5\x77" + "\x4c\x25\xe7\x86\x26\x42\xca\x44", + .ilen = 64, + .rlen = 64, + }, { + .key = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x01", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x02\xf2\xfb\xe3\x46" + "\x7c\xc2\x54\xf8\x1b\xe8\xe7\x8d" + "\x01\x00\x00\x00\x00\x00\x00\x00", + .input = "\x41\x6e\x79\x20\x73\x75\x62\x6d" + "\x69\x73\x73\x69\x6f\x6e\x20\x74" + "\x6f\x20\x74\x68\x65\x20\x49\x45" + "\x54\x46\x20\x69\x6e\x74\x65\x6e" + "\x64\x65\x64\x20\x62\x79\x20\x74" + "\x68\x65\x20\x43\x6f\x6e\x74\x72" + "\x69\x62\x75\x74\x6f\x72\x20\x66" + "\x6f\x72\x20\x70\x75\x62\x6c\x69" + "\x63\x61\x74\x69\x6f\x6e\x20\x61" + "\x73\x20\x61\x6c\x6c\x20\x6f\x72" + "\x20\x70\x61\x72\x74\x20\x6f\x66" + "\x20\x61\x6e\x20\x49\x45\x54\x46" + "\x20\x49\x6e\x74\x65\x72\x6e\x65" + "\x74\x2d\x44\x72\x61\x66\x74\x20" + "\x6f\x72\x20\x52\x46\x43\x20\x61" + "\x6e\x64\x20\x61\x6e\x79\x20\x73" + "\x74\x61\x74\x65\x6d\x65\x6e\x74" + "\x20\x6d\x61\x64\x65\x20\x77\x69" + "\x74\x68\x69\x6e\x20\x74\x68\x65" + "\x20\x63\x6f\x6e\x74\x65\x78\x74" + "\x20\x6f\x66\x20\x61\x6e\x20\x49" + "\x45\x54\x46\x20\x61\x63\x74\x69" + "\x76\x69\x74\x79\x20\x69\x73\x20" + "\x63\x6f\x6e\x73\x69\x64\x65\x72" + "\x65\x64\x20\x61\x6e\x20\x22\x49" + "\x45\x54\x46\x20\x43\x6f\x6e\x74" + "\x72\x69\x62\x75\x74\x69\x6f\x6e" + "\x22\x2e\x20\x53\x75\x63\x68\x20" + "\x73\x74\x61\x74\x65\x6d\x65\x6e" + "\x74\x73\x20\x69\x6e\x63\x6c\x75" + "\x64\x65\x20\x6f\x72\x61\x6c\x20" + "\x73\x74\x61\x74\x65\x6d\x65\x6e" + "\x74\x73\x20\x69\x6e\x20\x49\x45" + "\x54\x46\x20\x73\x65\x73\x73\x69" + "\x6f\x6e\x73\x2c\x20\x61\x73\x20" + "\x77\x65\x6c\x6c\x20\x61\x73\x20" + "\x77\x72\x69\x74\x74\x65\x6e\x20" + "\x61\x6e\x64\x20\x65\x6c\x65\x63" + "\x74\x72\x6f\x6e\x69\x63\x20\x63" + "\x6f\x6d\x6d\x75\x6e\x69\x63\x61" + "\x74\x69\x6f\x6e\x73\x20\x6d\x61" + "\x64\x65\x20\x61\x74\x20\x61\x6e" + "\x79\x20\x74\x69\x6d\x65\x20\x6f" + "\x72\x20\x70\x6c\x61\x63\x65\x2c" + "\x20\x77\x68\x69\x63\x68\x20\x61" + "\x72\x65\x20\x61\x64\x64\x72\x65" + "\x73\x73\x65\x64\x20\x74\x6f", + .result = "\xe4\xa6\xc8\x30\xc4\x23\x13\xd6" + "\x08\x4d\xc9\xb7\xa5\x64\x7c\xb9" + "\x71\xe2\xab\x3e\xa8\x30\x8a\x1c" + "\x4a\x94\x6d\x9b\xe0\xb3\x6f\xf1" + "\xdc\xe3\x1b\xb3\xa9\x6d\x0d\xd6" + "\xd0\xca\x12\xef\xe7\x5f\xd8\x61" + "\x3c\x82\xd3\x99\x86\x3c\x6f\x66" + "\x02\x06\xdc\x55\xf9\xed\xdf\x38" + "\xb4\xa6\x17\x00\x7f\xef\xbf\x4f" + "\xf8\x36\xf1\x60\x7e\x47\xaf\xdb" + "\x55\x9b\x12\xcb\x56\x44\xa7\x1f" + "\xd3\x1a\x07\x3b\x00\xec\xe6\x4c" + "\xa2\x43\x27\xdf\x86\x19\x4f\x16" + "\xed\xf9\x4a\xf3\x63\x6f\xfa\x7f" + "\x78\x11\xf6\x7d\x97\x6f\xec\x6f" + "\x85\x0f\x5c\x36\x13\x8d\x87\xe0" + "\x80\xb1\x69\x0b\x98\x89\x9c\x4e" + "\xf8\xdd\xee\x5c\x0a\x85\xce\xd4" + "\xea\x1b\x48\xbe\x08\xf8\xe2\xa8" + "\xa5\xb0\x3c\x79\xb1\x15\xb4\xb9" + "\x75\x10\x95\x35\x81\x7e\x26\xe6" + "\x78\xa4\x88\xcf\xdb\x91\x34\x18" + "\xad\xd7\x8e\x07\x7d\xab\x39\xf9" + "\xa3\x9e\xa5\x1d\xbb\xed\x61\xfd" + "\xdc\xb7\x5a\x27\xfc\xb5\xc9\x10" + "\xa8\xcc\x52\x7f\x14\x76\x90\xe7" + "\x1b\x29\x60\x74\xc0\x98\x77\xbb" + "\xe0\x54\xbb\x27\x49\x59\x1e\x62" + "\x3d\xaf\x74\x06\xa4\x42\x6f\xc6" + "\x52\x97\xc4\x1d\xc4\x9f\xe2\xe5" + "\x38\x57\x91\xd1\xa2\x28\xcc\x40" + "\xcc\x70\x59\x37\xfc\x9f\x4b\xda" + "\xa0\xeb\x97\x9a\x7d\xed\x14\x5c" + "\x9c\xb7\x93\x26\x41\xa8\x66\xdd" + "\x87\x6a\xc0\xd3\xc2\xa9\x3e\xae" + "\xe9\x72\xfe\xd1\xb3\xac\x38\xea" + "\x4d\x15\xa9\xd5\x36\x61\xe9\x96" + "\x6c\x23\xf8\x43\xe4\x92\x29\xd9" + "\x8b\x78\xf7\x0a\x52\xe0\x19\x5b" + "\x59\x69\x5b\x5d\xa1\x53\xc4\x68" + "\xe1\xbb\xac\x89\x14\xe2\xe2\x85" + "\x41\x18\xf5\xb3\xd1\xfa\x68\x19" + "\x44\x78\xdc\xcf\xe7\x88\x2d\x52" + "\x5f\x40\xb5\x7e\xf8\x88\xa2\xae" + "\x4a\xb2\x07\x35\x9d\x9b\x07\x88" + "\xb7\x00\xd0\x0c\xb6\xa0\x47\x59" + "\xda\x4e\xc9\xab\x9b\x8a\x7b", + + .ilen = 375, + .rlen = 375, + .also_non_np = 1, + .np = 3, + .tap = { 375 - 20, 4, 16 }, + + }, { + .key = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a" + "\xf3\x33\x88\x86\x04\xf6\xb5\xf0" + "\x47\x39\x17\xc1\x40\x2b\x80\x09" + "\x9d\xca\x5c\xbc\x20\x70\x75\xc0", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x02\x76\x5a\x2e\x63" + "\x33\x9f\xc9\x9a\x66\x32\x0d\xb7" + "\x2a\x00\x00\x00\x00\x00\x00\x00", + .input = "\x27\x54\x77\x61\x73\x20\x62\x72" + "\x69\x6c\x6c\x69\x67\x2c\x20\x61" + "\x6e\x64\x20\x74\x68\x65\x20\x73" + "\x6c\x69\x74\x68\x79\x20\x74\x6f" + "\x76\x65\x73\x0a\x44\x69\x64\x20" + "\x67\x79\x72\x65\x20\x61\x6e\x64" + "\x20\x67\x69\x6d\x62\x6c\x65\x20" + "\x69\x6e\x20\x74\x68\x65\x20\x77" + "\x61\x62\x65\x3a\x0a\x41\x6c\x6c" + "\x20\x6d\x69\x6d\x73\x79\x20\x77" + "\x65\x72\x65\x20\x74\x68\x65\x20" + "\x62\x6f\x72\x6f\x67\x6f\x76\x65" + "\x73\x2c\x0a\x41\x6e\x64\x20\x74" + "\x68\x65\x20\x6d\x6f\x6d\x65\x20" + "\x72\x61\x74\x68\x73\x20\x6f\x75" + "\x74\x67\x72\x61\x62\x65\x2e", + .result = "\xb9\x68\xbc\x6a\x24\xbc\xcc\xd8" + "\x9b\x2a\x8d\x5b\x96\xaf\x56\xe3" + "\x11\x61\xe7\xa7\x9b\xce\x4e\x7d" + "\x60\x02\x48\xac\xeb\xd5\x3a\x26" + "\x9d\x77\x3b\xb5\x32\x13\x86\x8e" + "\x20\x82\x26\x72\xae\x64\x1b\x7e" + "\x2e\x01\x68\xb4\x87\x45\xa1\x24" + "\xe4\x48\x40\xf0\xaa\xac\xee\xa9" + "\xfc\x31\xad\x9d\x89\xa3\xbb\xd2" + "\xe4\x25\x13\xad\x0f\x5e\xdf\x3c" + "\x27\xab\xb8\x62\x46\x22\x30\x48" + "\x55\x2c\x4e\x84\x78\x1d\x0d\x34" + "\x8d\x3c\x91\x0a\x7f\x5b\x19\x9f" + "\x97\x05\x4c\xa7\x62\x47\x8b\xc5" + "\x44\x2e\x20\x33\xdd\xa0\x82\xa9" + "\x25\x76\x37\xe6\x3c\x67\x5b", + .ilen = 127, + .rlen = 127, + }, { + .key = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a" + "\xf3\x33\x88\x86\x04\xf6\xb5\xf0" + "\x47\x39\x17\xc1\x40\x2b\x80\x09" + "\x9d\xca\x5c\xbc\x20\x70\x75\xc0", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x01\x31\x58\xa3\x5a" + "\x25\x5d\x05\x17\x58\xe9\x5e\xd4" + "\x1c\x00\x00\x00\x00\x00\x00\x00", + .input = "\x49\xee\xe0\xdc\x24\x90\x40\xcd" + "\xc5\x40\x8f\x47\x05\xbc\xdd\x81" + "\x47\xc6\x8d\xe6\xb1\x8f\xd7\xcb" + "\x09\x0e\x6e\x22\x48\x1f\xbf\xb8" + "\x5c\xf7\x1e\x8a\xc1\x23\xf2\xd4" + "\x19\x4b\x01\x0f\x4e\xa4\x43\xce" + "\x01\xc6\x67\xda\x03\x91\x18\x90" + "\xa5\xa4\x8e\x45\x03\xb3\x2d\xac" + "\x74\x92\xd3\x53\x47\xc8\xdd\x25" + "\x53\x6c\x02\x03\x87\x0d\x11\x0c" + "\x58\xe3\x12\x18\xfd\x2a\x5b\x40" + "\x0c\x30\xf0\xb8\x3f\x43\xce\xae" + "\x65\x3a\x7d\x7c\xf4\x54\xaa\xcc" + "\x33\x97\xc3\x77\xba\xc5\x70\xde" + "\xd7\xd5\x13\xa5\x65\xc4\x5f\x0f" + "\x46\x1a\x0d\x97\xb5\xf3\xbb\x3c" + "\x84\x0f\x2b\xc5\xaa\xea\xf2\x6c" + "\xc9\xb5\x0c\xee\x15\xf3\x7d\xbe" + "\x9f\x7b\x5a\xa6\xae\x4f\x83\xb6" + "\x79\x49\x41\xf4\x58\x18\xcb\x86" + "\x7f\x30\x0e\xf8\x7d\x44\x36\xea" + "\x75\xeb\x88\x84\x40\x3c\xad\x4f" + "\x6f\x31\x6b\xaa\x5d\xe5\xa5\xc5" + "\x21\x66\xe9\xa7\xe3\xb2\x15\x88" + "\x78\xf6\x79\xa1\x59\x47\x12\x4e" + "\x9f\x9f\x64\x1a\xa0\x22\x5b\x08" + "\xbe\x7c\x36\xc2\x2b\x66\x33\x1b" + "\xdd\x60\x71\xf7\x47\x8c\x61\xc3" + "\xda\x8a\x78\x1e\x16\xfa\x1e\x86" + "\x81\xa6\x17\x2a\xa7\xb5\xc2\xe7" + "\xa4\xc7\x42\xf1\xcf\x6a\xca\xb4" + "\x45\xcf\xf3\x93\xf0\xe7\xea\xf6" + "\xf4\xe6\x33\x43\x84\x93\xa5\x67" + "\x9b\x16\x58\x58\x80\x0f\x2b\x5c" + "\x24\x74\x75\x7f\x95\x81\xb7\x30" + "\x7a\x33\xa7\xf7\x94\x87\x32\x27" + "\x10\x5d\x14\x4c\x43\x29\xdd\x26" + "\xbd\x3e\x3c\x0e\xfe\x0e\xa5\x10" + "\xea\x6b\x64\xfd\x73\xc6\xed\xec" + "\xa8\xc9\xbf\xb3\xba\x0b\x4d\x07" + "\x70\xfc\x16\xfd\x79\x1e\xd7\xc5" + "\x49\x4e\x1c\x8b\x8d\x79\x1b\xb1" + "\xec\xca\x60\x09\x4c\x6a\xd5\x09" + "\x49\x46\x00\x88\x22\x8d\xce\xea" + "\xb1\x17\x11\xde\x42\xd2\x23\xc1" + "\x72\x11\xf5\x50\x73\x04\x40\x47" + "\xf9\x5d\xe7\xa7\x26\xb1\x7e\xb0" + "\x3f\x58\xc1\x52\xab\x12\x67\x9d" + "\x3f\x43\x4b\x68\xd4\x9c\x68\x38" + "\x07\x8a\x2d\x3e\xf3\xaf\x6a\x4b" + "\xf9\xe5\x31\x69\x22\xf9\xa6\x69" + "\xc6\x9c\x96\x9a\x12\x35\x95\x1d" + "\x95\xd5\xdd\xbe\xbf\x93\x53\x24" + "\xfd\xeb\xc2\x0a\x64\xb0\x77\x00" + "\x6f\x88\xc4\x37\x18\x69\x7c\xd7" + "\x41\x92\x55\x4c\x03\xa1\x9a\x4b" + "\x15\xe5\xdf\x7f\x37\x33\x72\xc1" + "\x8b\x10\x67\xa3\x01\x57\x94\x25" + "\x7b\x38\x71\x7e\xdd\x1e\xcc\x73" + "\x55\xd2\x8e\xeb\x07\xdd\xf1\xda" + "\x58\xb1\x47\x90\xfe\x42\x21\x72" + "\xa3\x54\x7a\xa0\x40\xec\x9f\xdd" + "\xc6\x84\x6e\xca\xae\xe3\x68\xb4" + "\x9d\xe4\x78\xff\x57\xf2\xf8\x1b" + "\x03\xa1\x31\xd9\xde\x8d\xf5\x22" + "\x9c\xdd\x20\xa4\x1e\x27\xb1\x76" + "\x4f\x44\x55\xe2\x9b\xa1\x9c\xfe" + "\x54\xf7\x27\x1b\xf4\xde\x02\xf5" + "\x1b\x55\x48\x5c\xdc\x21\x4b\x9e" + "\x4b\x6e\xed\x46\x23\xdc\x65\xb2" + "\xcf\x79\x5f\x28\xe0\x9e\x8b\xe7" + "\x4c\x9d\x8a\xff\xc1\xa6\x28\xb8" + "\x65\x69\x8a\x45\x29\xef\x74\x85" + "\xde\x79\xc7\x08\xae\x30\xb0\xf4" + "\xa3\x1d\x51\x41\xab\xce\xcb\xf6" + "\xb5\xd8\x6d\xe0\x85\xe1\x98\xb3" + "\x43\xbb\x86\x83\x0a\xa0\xf5\xb7" + "\x04\x0b\xfa\x71\x1f\xb0\xf6\xd9" + "\x13\x00\x15\xf0\xc7\xeb\x0d\x5a" + "\x9f\xd7\xb9\x6c\x65\x14\x22\x45" + "\x6e\x45\x32\x3e\x7e\x60\x1a\x12" + "\x97\x82\x14\xfb\xaa\x04\x22\xfa" + "\xa0\xe5\x7e\x8c\x78\x02\x48\x5d" + "\x78\x33\x5a\x7c\xad\xdb\x29\xce" + "\xbb\x8b\x61\xa4\xb7\x42\xe2\xac" + "\x8b\x1a\xd9\x2f\x0b\x8b\x62\x21" + "\x83\x35\x7e\xad\x73\xc2\xb5\x6c" + "\x10\x26\x38\x07\xe5\xc7\x36\x80" + "\xe2\x23\x12\x61\xf5\x48\x4b\x2b" + "\xc5\xdf\x15\xd9\x87\x01\xaa\xac" + "\x1e\x7c\xad\x73\x78\x18\x63\xe0" + "\x8b\x9f\x81\xd8\x12\x6a\x28\x10" + "\xbe\x04\x68\x8a\x09\x7c\x1b\x1c" + "\x83\x66\x80\x47\x80\xe8\xfd\x35" + "\x1c\x97\x6f\xae\x49\x10\x66\xcc" + "\xc6\xd8\xcc\x3a\x84\x91\x20\x77" + "\x72\xe4\x24\xd2\x37\x9f\xc5\xc9" + "\x25\x94\x10\x5f\x40\x00\x64\x99" + "\xdc\xae\xd7\x21\x09\x78\x50\x15" + "\xac\x5f\xc6\x2c\xa2\x0b\xa9\x39" + "\x87\x6e\x6d\xab\xde\x08\x51\x16" + "\xc7\x13\xe9\xea\xed\x06\x8e\x2c" + "\xf8\x37\x8c\xf0\xa6\x96\x8d\x43" + "\xb6\x98\x37\xb2\x43\xed\xde\xdf" + "\x89\x1a\xe7\xeb\x9d\xa1\x7b\x0b" + "\x77\xb0\xe2\x75\xc0\xf1\x98\xd9" + "\x80\x55\xc9\x34\x91\xd1\x59\xe8" + "\x4b\x0f\xc1\xa9\x4b\x7a\x84\x06" + "\x20\xa8\x5d\xfa\xd1\xde\x70\x56" + "\x2f\x9e\x91\x9c\x20\xb3\x24\xd8" + "\x84\x3d\xe1\x8c\x7e\x62\x52\xe5" + "\x44\x4b\x9f\xc2\x93\x03\xea\x2b" + "\x59\xc5\xfa\x3f\x91\x2b\xbb\x23" + "\xf5\xb2\x7b\xf5\x38\xaf\xb3\xee" + "\x63\xdc\x7b\xd1\xff\xaa\x8b\xab" + "\x82\x6b\x37\x04\xeb\x74\xbe\x79" + "\xb9\x83\x90\xef\x20\x59\x46\xff" + "\xe9\x97\x3e\x2f\xee\xb6\x64\x18" + "\x38\x4c\x7a\x4a\xf9\x61\xe8\x9a" + "\xa1\xb5\x01\xa6\x47\xd3\x11\xd4" + "\xce\xd3\x91\x49\x88\xc7\xb8\x4d" + "\xb1\xb9\x07\x6d\x16\x72\xae\x46" + "\x5e\x03\xa1\x4b\xb6\x02\x30\xa8" + "\x3d\xa9\x07\x2a\x7c\x19\xe7\x62" + "\x87\xe3\x82\x2f\x6f\xe1\x09\xd9" + "\x94\x97\xea\xdd\x58\x9e\xae\x76" + "\x7e\x35\xe5\xb4\xda\x7e\xf4\xde" + "\xf7\x32\x87\xcd\x93\xbf\x11\x56" + "\x11\xbe\x08\x74\xe1\x69\xad\xe2" + "\xd7\xf8\x86\x75\x8a\x3c\xa4\xbe" + "\x70\xa7\x1b\xfc\x0b\x44\x2a\x76" + "\x35\xea\x5d\x85\x81\xaf\x85\xeb" + "\xa0\x1c\x61\xc2\xf7\x4f\xa5\xdc" + "\x02\x7f\xf6\x95\x40\x6e\x8a\x9a" + "\xf3\x5d\x25\x6e\x14\x3a\x22\xc9" + "\x37\x1c\xeb\x46\x54\x3f\xa5\x91" + "\xc2\xb5\x8c\xfe\x53\x08\x97\x32" + "\x1b\xb2\x30\x27\xfe\x25\x5d\xdc" + "\x08\x87\xd0\xe5\x94\x1a\xd4\xf1" + "\xfe\xd6\xb4\xa3\xe6\x74\x81\x3c" + "\x1b\xb7\x31\xa7\x22\xfd\xd4\xdd" + "\x20\x4e\x7c\x51\xb0\x60\x73\xb8" + "\x9c\xac\x91\x90\x7e\x01\xb0\xe1" + "\x8a\x2f\x75\x1c\x53\x2a\x98\x2a" + "\x06\x52\x95\x52\xb2\xe9\x25\x2e" + "\x4c\xe2\x5a\x00\xb2\x13\x81\x03" + "\x77\x66\x0d\xa5\x99\xda\x4e\x8c" + "\xac\xf3\x13\x53\x27\x45\xaf\x64" + "\x46\xdc\xea\x23\xda\x97\xd1\xab" + "\x7d\x6c\x30\x96\x1f\xbc\x06\x34" + "\x18\x0b\x5e\x21\x35\x11\x8d\x4c" + "\xe0\x2d\xe9\x50\x16\x74\x81\xa8" + "\xb4\x34\xb9\x72\x42\xa6\xcc\xbc" + "\xca\x34\x83\x27\x10\x5b\x68\x45" + "\x8f\x52\x22\x0c\x55\x3d\x29\x7c" + "\xe3\xc0\x66\x05\x42\x91\x5f\x58" + "\xfe\x4a\x62\xd9\x8c\xa9\x04\x19" + "\x04\xa9\x08\x4b\x57\xfc\x67\x53" + "\x08\x7c\xbc\x66\x8a\xb0\xb6\x9f" + "\x92\xd6\x41\x7c\x5b\x2a\x00\x79" + "\x72", + .result = "\xe1\xb6\x8b\x5c\x80\xb8\xcc\x08" + "\x1b\x84\xb2\xd1\xad\xa4\x70\xac" + "\x67\xa9\x39\x27\xac\xb4\x5b\xb7" + "\x4c\x26\x77\x23\x1d\xce\x0a\xbe" + "\x18\x9e\x42\x8b\xbd\x7f\xd6\xf1" + "\xf1\x6b\xe2\x6d\x7f\x92\x0e\xcb" + "\xb8\x79\xba\xb4\xac\x7e\x2d\xc0" + "\x9e\x83\x81\x91\xd5\xea\xc3\x12" + "\x8d\xa4\x26\x70\xa4\xf9\x71\x0b" + "\xbd\x2e\xe1\xb3\x80\x42\x25\xb3" + "\x0b\x31\x99\xe1\x0d\xde\xa6\x90" + "\xf2\xa3\x10\xf7\xe5\xf3\x83\x1e" + "\x2c\xfb\x4d\xf0\x45\x3d\x28\x3c" + "\xb8\xf1\xcb\xbf\x67\xd8\x43\x5a" + "\x9d\x7b\x73\x29\x88\x0f\x13\x06" + "\x37\x50\x0d\x7c\xe6\x9b\x07\xdd" + "\x7e\x01\x1f\x81\x90\x10\x69\xdb" + "\xa4\xad\x8a\x5e\xac\x30\x72\xf2" + "\x36\xcd\xe3\x23\x49\x02\x93\xfa" + "\x3d\xbb\xe2\x98\x83\xeb\xe9\x8d" + "\xb3\x8f\x11\xaa\x53\xdb\xaf\x2e" + "\x95\x13\x99\x3d\x71\xbd\x32\x92" + "\xdd\xfc\x9d\x5e\x6f\x63\x2c\xee" + "\x91\x1f\x4c\x64\x3d\x87\x55\x0f" + "\xcc\x3d\x89\x61\x53\x02\x57\x8f" + "\xe4\x77\x29\x32\xaf\xa6\x2f\x0a" + "\xae\x3c\x3f\x3f\xf4\xfb\x65\x52" + "\xc5\xc1\x78\x78\x53\x28\xad\xed" + "\xd1\x67\x37\xc7\x59\x70\xcd\x0a" + "\xb8\x0f\x80\x51\x9f\xc0\x12\x5e" + "\x06\x0a\x7e\xec\x24\x5f\x73\x00" + "\xb1\x0b\x31\x47\x4f\x73\x8d\xb4" + "\xce\xf3\x55\x45\x6c\x84\x27\xba" + "\xb9\x6f\x03\x4a\xeb\x98\x88\x6e" + "\x53\xed\x25\x19\x0d\x8f\xfe\xca" + "\x60\xe5\x00\x93\x6e\x3c\xff\x19" + "\xae\x08\x3b\x8a\xa6\x84\x05\xfe" + "\x9b\x59\xa0\x8c\xc8\x05\x45\xf5" + "\x05\x37\xdc\x45\x6f\x8b\x95\x8c" + "\x4e\x11\x45\x7a\xce\x21\xa5\xf7" + "\x71\x67\xb9\xce\xd7\xf9\xe9\x5e" + "\x60\xf5\x53\x7a\xa8\x85\x14\x03" + "\xa0\x92\xec\xf3\x51\x80\x84\xc4" + "\xdc\x11\x9e\x57\xce\x4b\x45\xcf" + "\x90\x95\x85\x0b\x96\xe9\xee\x35" + "\x10\xb8\x9b\xf2\x59\x4a\xc6\x7e" + "\x85\xe5\x6f\x38\x51\x93\x40\x0c" + "\x99\xd7\x7f\x32\xa8\x06\x27\xd1" + "\x2b\xd5\xb5\x3a\x1a\xe1\x5e\xda" + "\xcd\x5a\x50\x30\x3c\xc7\xe7\x65" + "\xa6\x07\x0b\x98\x91\xc6\x20\x27" + "\x2a\x03\x63\x1b\x1e\x3d\xaf\xc8" + "\x71\x48\x46\x6a\x64\x28\xf9\x3d" + "\xd1\x1d\xab\xc8\x40\x76\xc2\x39" + "\x4e\x00\x75\xd2\x0e\x82\x58\x8c" + "\xd3\x73\x5a\xea\x46\x89\xbe\xfd" + "\x4e\x2c\x0d\x94\xaa\x9b\x68\xac" + "\x86\x87\x30\x7e\xa9\x16\xcd\x59" + "\xd2\xa6\xbe\x0a\xd8\xf5\xfd\x2d" + "\x49\x69\xd2\x1a\x90\xd2\x1b\xed" + "\xff\x71\x04\x87\x87\x21\xc4\xb8" + "\x1f\x5b\x51\x33\xd0\xd6\x59\x9a" + "\x03\x0e\xd3\x8b\xfb\x57\x73\xfd" + "\x5a\x52\x63\x82\xc8\x85\x2f\xcb" + "\x74\x6d\x4e\xd9\x68\x37\x85\x6a" + "\xd4\xfb\x94\xed\x8d\xd1\x1a\xaf" + "\x76\xa7\xb7\x88\xd0\x2b\x4e\xda" + "\xec\x99\x94\x27\x6f\x87\x8c\xdf" + "\x4b\x5e\xa6\x66\xdd\xcb\x33\x7b" + "\x64\x94\x31\xa8\x37\xa6\x1d\xdb" + "\x0d\x5c\x93\xa4\x40\xf9\x30\x53" + "\x4b\x74\x8d\xdd\xf6\xde\x3c\xac" + "\x5c\x80\x01\x3a\xef\xb1\x9a\x02" + "\x0c\x22\x8e\xe7\x44\x09\x74\x4c" + "\xf2\x9a\x27\x69\x7f\x12\x32\x36" + "\xde\x92\xdf\xde\x8f\x5b\x31\xab" + "\x4a\x01\x26\xe0\xb1\xda\xe8\x37" + "\x21\x64\xe8\xff\x69\xfc\x9e\x41" + "\xd2\x96\x2d\x18\x64\x98\x33\x78" + "\x24\x61\x73\x9b\x47\x29\xf1\xa7" + "\xcb\x27\x0f\xf0\x85\x6d\x8c\x9d" + "\x2c\x95\x9e\xe5\xb2\x8e\x30\x29" + "\x78\x8a\x9d\x65\xb4\x8e\xde\x7b" + "\xd9\x00\x50\xf5\x7f\x81\xc3\x1b" + "\x25\x85\xeb\xc2\x8c\x33\x22\x1e" + "\x68\x38\x22\x30\xd8\x2e\x00\x98" + "\x85\x16\x06\x56\xb4\x81\x74\x20" + "\x95\xdb\x1c\x05\x19\xe8\x23\x4d" + "\x65\x5d\xcc\xd8\x7f\xc4\x2d\x0f" + "\x57\x26\x71\x07\xad\xaa\x71\x9f" + "\x19\x76\x2f\x25\x51\x88\xe4\xc0" + "\x82\x6e\x08\x05\x37\x04\xee\x25" + "\x23\x90\xe9\x4e\xce\x9b\x16\xc1" + "\x31\xe7\x6e\x2c\x1b\xe1\x85\x9a" + "\x0c\x8c\xbb\x12\x1e\x68\x7b\x93" + "\xa9\x3c\x39\x56\x23\x3e\x6e\xc7" + "\x77\x84\xd3\xe0\x86\x59\xaa\xb9" + "\xd5\x53\x58\xc9\x0a\x83\x5f\x85" + "\xd8\x47\x14\x67\x8a\x3c\x17\xe0" + "\xab\x02\x51\xea\xf1\xf0\x4f\x30" + "\x7d\xe0\x92\xc2\x5f\xfb\x19\x5a" + "\x3f\xbd\xf4\x39\xa4\x31\x0c\x39" + "\xd1\xae\x4e\xf7\x65\x7f\x1f\xce" + "\xc2\x39\xd1\x84\xd4\xe5\x02\xe0" + "\x58\xaa\xf1\x5e\x81\xaf\x7f\x72" + "\x0f\x08\x99\x43\xb9\xd8\xac\x41" + "\x35\x55\xf2\xb2\xd4\x98\xb8\x3b" + "\x2b\x3c\x3e\x16\x06\x31\xfc\x79" + "\x47\x38\x63\x51\xc5\xd0\x26\xd7" + "\x43\xb4\x2b\xd9\xc5\x05\xf2\x9d" + "\x18\xc9\x26\x82\x56\xd2\x11\x05" + "\xb6\x89\xb4\x43\x9c\xb5\x9d\x11" + "\x6c\x83\x37\x71\x27\x1c\xae\xbf" + "\xcd\x57\xd2\xee\x0d\x5a\x15\x26" + "\x67\x88\x80\x80\x1b\xdc\xc1\x62" + "\xdd\x4c\xff\x92\x5c\x6c\xe1\xa0" + "\xe3\x79\xa9\x65\x8c\x8c\x14\x42" + "\xe5\x11\xd2\x1a\xad\xa9\x56\x6f" + "\x98\xfc\x8a\x7b\x56\x1f\xc6\xc1" + "\x52\x12\x92\x9b\x41\x0f\x4b\xae" + "\x1b\x4a\xbc\xfe\x23\xb6\x94\x70" + "\x04\x30\x9e\x69\x47\xbe\xb8\x8f" + "\xca\x45\xd7\x8a\xf4\x78\x3e\xaa" + "\x71\x17\xd8\x1e\xb8\x11\x8f\xbc" + "\xc8\x1a\x65\x7b\x41\x89\x72\xc7" + "\x5f\xbe\xc5\x2a\xdb\x5c\x54\xf9" + "\x25\xa3\x7a\x80\x56\x9c\x8c\xab" + "\x26\x19\x10\x36\xa6\xf3\x14\x79" + "\x40\x98\x70\x68\xb7\x35\xd9\xb9" + "\x27\xd4\xe7\x74\x5b\x3d\x97\xb4" + "\xd9\xaa\xd9\xf2\xb5\x14\x84\x1f" + "\xa9\xde\x12\x44\x5b\x00\xc0\xbc" + "\xc8\x11\x25\x1b\x67\x7a\x15\x72" + "\xa6\x31\x6f\xf4\x68\x7a\x86\x9d" + "\x43\x1c\x5f\x16\xd3\xad\x2e\x52" + "\xf3\xb4\xc3\xfa\x27\x2e\x68\x6c" + "\x06\xe7\x4c\x4f\xa2\xe0\xe4\x21" + "\x5d\x9e\x33\x58\x8d\xbf\xd5\x70" + "\xf8\x80\xa5\xdd\xe7\x18\x79\xfa" + "\x7b\xfd\x09\x69\x2c\x37\x32\xa8" + "\x65\xfa\x8d\x8b\x5c\xcc\xe8\xf3" + "\x37\xf6\xa6\xc6\x5c\xa2\x66\x79" + "\xfa\x8a\xa7\xd1\x0b\x2e\x1b\x5e" + "\x95\x35\x00\x76\xae\x42\xf7\x50" + "\x51\x78\xfb\xb4\x28\x24\xde\x1a" + "\x70\x8b\xed\xca\x3c\x5e\xe4\xbd" + "\x28\xb5\xf3\x76\x4f\x67\x5d\x81" + "\xb2\x60\x87\xd9\x7b\x19\x1a\xa7" + "\x79\xa2\xfa\x3f\x9e\xa9\xd7\x25" + "\x61\xe1\x74\x31\xa2\x77\xa0\x1b" + "\xf6\xf7\xcb\xc5\xaa\x9e\xce\xf9" + "\x9b\x96\xef\x51\xc3\x1a\x44\x96" + "\xae\x17\x50\xab\x29\x08\xda\xcc" + "\x1a\xb3\x12\xd0\x24\xe4\xe2\xe0" + "\xc6\xe3\xcc\x82\xd0\xba\x47\x4c" + "\x3f\x49\xd7\xe8\xb6\x61\xaa\x65" + "\x25\x18\x40\x2d\x62\x25\x02\x71" + "\x61\xa2\xc1\xb2\x13\xd2\x71\x3f" + "\x43\x1a\xc9\x09\x92\xff\xd5\x57" + "\xf0\xfc\x5e\x1c\xf1\xf5\xf9\xf3" + "\x5b", + .ilen = 1281, + .rlen = 1281, + .also_non_np = 1, + .np = 3, + .tap = { 1200, 1, 80 }, + }, +}; + /* * CTS (Cipher Text Stealing) mode tests */ diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h index ae79e9983c72f..3d261f5cd156d 100644 --- a/include/crypto/chacha.h +++ b/include/crypto/chacha.h @@ -5,6 +5,11 @@ * XChaCha extends ChaCha's nonce to 192 bits, while provably retaining ChaCha's * security. Here they share the same key size, tfm context, and setkey * function; only their IV size and encrypt/decrypt function differ. + * + * The ChaCha paper specifies 20, 12, and 8-round variants. In general, it is + * recommended to use the 20-round variant ChaCha20. However, the other + * variants can be needed in some performance-sensitive scenarios. The generic + * ChaCha code currently allows only the 20 and 12-round variants. */ #ifndef _CRYPTO_CHACHA_H @@ -39,6 +44,8 @@ void crypto_chacha_init(u32 *state, struct chacha_ctx *ctx, u8 *iv); int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keysize); +int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keysize); int crypto_chacha_crypt(struct skcipher_request *req); int crypto_xchacha_crypt(struct skcipher_request *req); diff --git a/lib/chacha.c b/lib/chacha.c index 1bdc688c18df6..a46d2832dbabd 100644 --- a/lib/chacha.c +++ b/lib/chacha.c @@ -21,7 +21,7 @@ static void chacha_permute(u32 *x, int nrounds) int i; /* whitelist the allowed round counts */ - WARN_ON_ONCE(nrounds != 20); + WARN_ON_ONCE(nrounds != 20 && nrounds != 12); for (i = 0; i < nrounds; i += 2) { x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16); @@ -70,7 +70,7 @@ static void chacha_permute(u32 *x, int nrounds) * chacha_block - generate one keystream block and increment block counter * @state: input state matrix (16 32-bit words) * @stream: output keystream block (64 bytes) - * @nrounds: number of rounds (currently must be 20) + * @nrounds: number of rounds (20 or 12; 20 is recommended) * * This is the ChaCha core, a function from 64-byte strings to 64-byte strings. * The caller has already converted the endianness of the input. This function @@ -96,7 +96,7 @@ EXPORT_SYMBOL(chacha_block); * hchacha_block - abbreviated ChaCha core, for XChaCha * @in: input state matrix (16 32-bit words) * @out: output (8 32-bit words) - * @nrounds: number of rounds (currently must be 20) + * @nrounds: number of rounds (20 or 12; 20 is recommended) * * HChaCha is the ChaCha equivalent of HSalsa and is an intermediate step * towards XChaCha (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). HChaCha -- GitLab From 1fe9e8ba3f7b9bb062dd284b325839a427fb88a1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 16 Nov 2018 17:26:23 -0800 Subject: [PATCH 1956/2269] FROMGIT: crypto: arm/chacha20 - limit the preemption-disabled section To improve responsivesess, disable preemption for each step of the walk (which is at most PAGE_SIZE) rather than for the entire encryption/decryption operation. Suggested-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu (cherry picked from commit be2830b15b60011845ad701076511e8b93b2fd76 https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master) Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: I21bfa9c14635e695b128c87df53fea505c3cdd4e Signed-off-by: Eric Biggers --- arch/arm/crypto/chacha20-neon-glue.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/crypto/chacha20-neon-glue.c b/arch/arm/crypto/chacha20-neon-glue.c index 7386eb1c1889d..2bc035cb8f23a 100644 --- a/arch/arm/crypto/chacha20-neon-glue.c +++ b/arch/arm/crypto/chacha20-neon-glue.c @@ -68,22 +68,22 @@ static int chacha20_neon(struct skcipher_request *req) if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd()) return crypto_chacha_crypt(req); - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); crypto_chacha_init(state, ctx, walk.iv); - kernel_neon_begin(); while (walk.nbytes > 0) { unsigned int nbytes = walk.nbytes; if (nbytes < walk.total) nbytes = round_down(nbytes, walk.stride); + kernel_neon_begin(); chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, nbytes); + kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } - kernel_neon_end(); return err; } -- GitLab From 3d83a028bdbc9586041cb31590f1363b3189923a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 16 Nov 2018 17:26:24 -0800 Subject: [PATCH 1957/2269] FROMGIT: crypto: arm/chacha20 - add XChaCha20 support Add an XChaCha20 implementation that is hooked up to the ARM NEON implementation of ChaCha20. This is needed for use in the Adiantum encryption mode; see the generic code patch, "crypto: chacha20-generic - add XChaCha20 support", for more details. We also update the NEON code to support HChaCha20 on one block, so we can use that in XChaCha20 rather than calling the generic HChaCha20. This required factoring the permutation out into its own macro. Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu (cherry picked from commit d97a94309d764ed907d4281da6246f5d935166f8 https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master) Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: I84c3a019e22598f8f8eb25e7a0fefbc79c9660c9 Signed-off-by: Eric Biggers --- arch/arm/crypto/Kconfig | 2 +- arch/arm/crypto/chacha20-neon-core.S | 70 ++++++++++++------ arch/arm/crypto/chacha20-neon-glue.c | 103 ++++++++++++++++++++------- 3 files changed, 126 insertions(+), 49 deletions(-) diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 13f7a5c1fe3f9..686eae8c93555 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -125,7 +125,7 @@ config CRYPTO_CRC32_ARM_CE select CRYPTO_HASH config CRYPTO_CHACHA20_NEON - tristate "NEON accelerated ChaCha20 symmetric cipher" + tristate "NEON accelerated ChaCha20 stream cipher algorithms" depends on KERNEL_MODE_NEON select CRYPTO_BLKCIPHER select CRYPTO_CHACHA20 diff --git a/arch/arm/crypto/chacha20-neon-core.S b/arch/arm/crypto/chacha20-neon-core.S index 50e7b98968189..2335e5055d2b1 100644 --- a/arch/arm/crypto/chacha20-neon-core.S +++ b/arch/arm/crypto/chacha20-neon-core.S @@ -52,27 +52,16 @@ .fpu neon .align 5 -ENTRY(chacha20_block_xor_neon) - // r0: Input state matrix, s - // r1: 1 data block output, o - // r2: 1 data block input, i - - // - // This function encrypts one ChaCha20 block by loading the state matrix - // in four NEON registers. It performs matrix operation on four words in - // parallel, but requireds shuffling to rearrange the words after each - // round. - // - - // x0..3 = s0..3 - add ip, r0, #0x20 - vld1.32 {q0-q1}, [r0] - vld1.32 {q2-q3}, [ip] - - vmov q8, q0 - vmov q9, q1 - vmov q10, q2 - vmov q11, q3 +/* + * chacha20_permute - permute one block + * + * Permute one 64-byte block where the state matrix is stored in the four NEON + * registers q0-q3. It performs matrix operations on four words in parallel, + * but requires shuffling to rearrange the words after each round. + * + * Clobbers: r3, ip, q4-q5 + */ +chacha20_permute: adr ip, .Lrol8_table mov r3, #10 @@ -142,6 +131,27 @@ ENTRY(chacha20_block_xor_neon) subs r3, r3, #1 bne .Ldoubleround + bx lr +ENDPROC(chacha20_permute) + +ENTRY(chacha20_block_xor_neon) + // r0: Input state matrix, s + // r1: 1 data block output, o + // r2: 1 data block input, i + push {lr} + + // x0..3 = s0..3 + add ip, r0, #0x20 + vld1.32 {q0-q1}, [r0] + vld1.32 {q2-q3}, [ip] + + vmov q8, q0 + vmov q9, q1 + vmov q10, q2 + vmov q11, q3 + + bl chacha20_permute + add ip, r2, #0x20 vld1.8 {q4-q5}, [r2] vld1.8 {q6-q7}, [ip] @@ -166,9 +176,25 @@ ENTRY(chacha20_block_xor_neon) vst1.8 {q0-q1}, [r1] vst1.8 {q2-q3}, [ip] - bx lr + pop {pc} ENDPROC(chacha20_block_xor_neon) +ENTRY(hchacha20_block_neon) + // r0: Input state matrix, s + // r1: output (8 32-bit words) + push {lr} + + vld1.32 {q0-q1}, [r0]! + vld1.32 {q2-q3}, [r0] + + bl chacha20_permute + + vst1.32 {q0}, [r1]! + vst1.32 {q3}, [r1] + + pop {pc} +ENDPROC(hchacha20_block_neon) + .align 4 .Lctrinc: .word 0, 1, 2, 3 .Lrol8_table: .byte 3, 0, 1, 2, 7, 4, 5, 6 diff --git a/arch/arm/crypto/chacha20-neon-glue.c b/arch/arm/crypto/chacha20-neon-glue.c index 2bc035cb8f23a..f2d3b0f70a8de 100644 --- a/arch/arm/crypto/chacha20-neon-glue.c +++ b/arch/arm/crypto/chacha20-neon-glue.c @@ -1,5 +1,5 @@ /* - * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions + * ChaCha20 (RFC7539) and XChaCha20 stream ciphers, NEON accelerated * * Copyright (C) 2016 Linaro, Ltd. * @@ -30,6 +30,7 @@ asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src); asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src); +asmlinkage void hchacha20_block_neon(const u32 *state, u32 *out); static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src, unsigned int bytes) @@ -57,20 +58,16 @@ static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src, } } -static int chacha20_neon(struct skcipher_request *req) +static int chacha20_neon_stream_xor(struct skcipher_request *req, + struct chacha_ctx *ctx, u8 *iv) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; u32 state[16]; int err; - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd()) - return crypto_chacha_crypt(req); - err = skcipher_walk_virt(&walk, req, false); - crypto_chacha_init(state, ctx, walk.iv); + crypto_chacha_init(state, ctx, iv); while (walk.nbytes > 0) { unsigned int nbytes = walk.nbytes; @@ -88,22 +85,73 @@ static int chacha20_neon(struct skcipher_request *req) return err; } -static struct skcipher_alg alg = { - .base.cra_name = "chacha20", - .base.cra_driver_name = "chacha20-neon", - .base.cra_priority = 300, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = CHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .walksize = 4 * CHACHA_BLOCK_SIZE, - .setkey = crypto_chacha20_setkey, - .encrypt = chacha20_neon, - .decrypt = chacha20_neon, +static int chacha20_neon(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + + if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd()) + return crypto_chacha_crypt(req); + + return chacha20_neon_stream_xor(req, ctx, req->iv); +} + +static int xchacha20_neon(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + struct chacha_ctx subctx; + u32 state[16]; + u8 real_iv[16]; + + if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd()) + return crypto_xchacha_crypt(req); + + crypto_chacha_init(state, ctx, req->iv); + + kernel_neon_begin(); + hchacha20_block_neon(state, subctx.key); + kernel_neon_end(); + + memcpy(&real_iv[0], req->iv + 24, 8); + memcpy(&real_iv[8], req->iv + 16, 8); + return chacha20_neon_stream_xor(req, &subctx, real_iv); +} + +static struct skcipher_alg algs[] = { + { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-neon", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = CHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .walksize = 4 * CHACHA_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = chacha20_neon, + .decrypt = chacha20_neon, + }, { + .base.cra_name = "xchacha20", + .base.cra_driver_name = "xchacha20-neon", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .walksize = 4 * CHACHA_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = xchacha20_neon, + .decrypt = xchacha20_neon, + } }; static int __init chacha20_simd_mod_init(void) @@ -111,12 +159,12 @@ static int __init chacha20_simd_mod_init(void) if (!(elf_hwcap & HWCAP_NEON)) return -ENODEV; - return crypto_register_skcipher(&alg); + return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); } static void __exit chacha20_simd_mod_fini(void) { - crypto_unregister_skcipher(&alg); + crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); } module_init(chacha20_simd_mod_init); @@ -125,3 +173,6 @@ module_exit(chacha20_simd_mod_fini); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); MODULE_ALIAS_CRYPTO("chacha20"); +MODULE_ALIAS_CRYPTO("chacha20-neon"); +MODULE_ALIAS_CRYPTO("xchacha20"); +MODULE_ALIAS_CRYPTO("xchacha20-neon"); -- GitLab From 25b77ab33627c3293634584203c65ad5fe861e2a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 16 Nov 2018 17:26:25 -0800 Subject: [PATCH 1958/2269] FROMGIT: crypto: arm/chacha20 - refactor to allow varying number of rounds In preparation for adding XChaCha12 support, rename/refactor the NEON implementation of ChaCha20 to support different numbers of rounds. Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu (cherry picked from commit 3cc215198eac75cc4130729ddd94a5cdbdb4d300 https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master) Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: Iba4b7ea5ecee72c28b7e4188df438335d0d5b9aa Signed-off-by: Eric Biggers --- arch/arm/crypto/Makefile | 4 +- ...hacha20-neon-core.S => chacha-neon-core.S} | 44 ++++++++------- ...hacha20-neon-glue.c => chacha-neon-glue.c} | 56 ++++++++++--------- 3 files changed, 56 insertions(+), 48 deletions(-) rename arch/arm/crypto/{chacha20-neon-core.S => chacha-neon-core.S} (94%) rename arch/arm/crypto/{chacha20-neon-glue.c => chacha-neon-glue.c} (72%) diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index c9919c2b7ad11..85f7940bfb990 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o -obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o +obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o @@ -52,7 +52,7 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o -chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o +chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o ifdef REGENERATE_ARM_CRYPTO quiet_cmd_perl = PERL $@ diff --git a/arch/arm/crypto/chacha20-neon-core.S b/arch/arm/crypto/chacha-neon-core.S similarity index 94% rename from arch/arm/crypto/chacha20-neon-core.S rename to arch/arm/crypto/chacha-neon-core.S index 2335e5055d2b1..eb22926d49127 100644 --- a/arch/arm/crypto/chacha20-neon-core.S +++ b/arch/arm/crypto/chacha-neon-core.S @@ -1,5 +1,5 @@ /* - * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions + * ChaCha/XChaCha NEON helper functions * * Copyright (C) 2016 Linaro, Ltd. * @@ -27,9 +27,9 @@ * (d) vtbl.8 + vtbl.8 (multiple of 8 bits rotations only, * needs index vector) * - * ChaCha20 has 16, 12, 8, and 7-bit rotations. For the 12 and 7-bit - * rotations, the only choices are (a) and (b). We use (a) since it takes - * two-thirds the cycles of (b) on both Cortex-A7 and Cortex-A53. + * ChaCha has 16, 12, 8, and 7-bit rotations. For the 12 and 7-bit rotations, + * the only choices are (a) and (b). We use (a) since it takes two-thirds the + * cycles of (b) on both Cortex-A7 and Cortex-A53. * * For the 16-bit rotation, we use vrev32.16 since it's consistently fastest * and doesn't need a temporary register. @@ -53,18 +53,19 @@ .align 5 /* - * chacha20_permute - permute one block + * chacha_permute - permute one block * * Permute one 64-byte block where the state matrix is stored in the four NEON * registers q0-q3. It performs matrix operations on four words in parallel, * but requires shuffling to rearrange the words after each round. * + * The round count is given in r3. + * * Clobbers: r3, ip, q4-q5 */ -chacha20_permute: +chacha_permute: adr ip, .Lrol8_table - mov r3, #10 vld1.8 {d10}, [ip, :64] .Ldoubleround: @@ -128,16 +129,17 @@ chacha20_permute: // x3 = shuffle32(x3, MASK(0, 3, 2, 1)) vext.8 q3, q3, q3, #4 - subs r3, r3, #1 + subs r3, r3, #2 bne .Ldoubleround bx lr -ENDPROC(chacha20_permute) +ENDPROC(chacha_permute) -ENTRY(chacha20_block_xor_neon) +ENTRY(chacha_block_xor_neon) // r0: Input state matrix, s // r1: 1 data block output, o // r2: 1 data block input, i + // r3: nrounds push {lr} // x0..3 = s0..3 @@ -150,7 +152,7 @@ ENTRY(chacha20_block_xor_neon) vmov q10, q2 vmov q11, q3 - bl chacha20_permute + bl chacha_permute add ip, r2, #0x20 vld1.8 {q4-q5}, [r2] @@ -177,30 +179,32 @@ ENTRY(chacha20_block_xor_neon) vst1.8 {q2-q3}, [ip] pop {pc} -ENDPROC(chacha20_block_xor_neon) +ENDPROC(chacha_block_xor_neon) -ENTRY(hchacha20_block_neon) +ENTRY(hchacha_block_neon) // r0: Input state matrix, s // r1: output (8 32-bit words) + // r2: nrounds push {lr} vld1.32 {q0-q1}, [r0]! vld1.32 {q2-q3}, [r0] - bl chacha20_permute + mov r3, r2 + bl chacha_permute vst1.32 {q0}, [r1]! vst1.32 {q3}, [r1] pop {pc} -ENDPROC(hchacha20_block_neon) +ENDPROC(hchacha_block_neon) .align 4 .Lctrinc: .word 0, 1, 2, 3 .Lrol8_table: .byte 3, 0, 1, 2, 7, 4, 5, 6 .align 5 -ENTRY(chacha20_4block_xor_neon) +ENTRY(chacha_4block_xor_neon) push {r4-r5} mov r4, sp // preserve the stack pointer sub ip, sp, #0x20 // allocate a 32 byte buffer @@ -210,9 +214,10 @@ ENTRY(chacha20_4block_xor_neon) // r0: Input state matrix, s // r1: 4 data blocks output, o // r2: 4 data blocks input, i + // r3: nrounds // - // This function encrypts four consecutive ChaCha20 blocks by loading + // This function encrypts four consecutive ChaCha blocks by loading // the state matrix in NEON registers four times. The algorithm performs // each operation on the corresponding word of each state matrix, hence // requires no word shuffling. The words are re-interleaved before the @@ -245,7 +250,6 @@ ENTRY(chacha20_4block_xor_neon) vdup.32 q0, d0[0] adr ip, .Lrol8_table - mov r3, #10 b 1f .Ldoubleround4: @@ -443,7 +447,7 @@ ENTRY(chacha20_4block_xor_neon) vsri.u32 q5, q8, #25 vsri.u32 q6, q9, #25 - subs r3, r3, #1 + subs r3, r3, #2 bne .Ldoubleround4 // x0..7[0-3] are in q0-q7, x10..15[0-3] are in q10-q15. @@ -553,4 +557,4 @@ ENTRY(chacha20_4block_xor_neon) pop {r4-r5} bx lr -ENDPROC(chacha20_4block_xor_neon) +ENDPROC(chacha_4block_xor_neon) diff --git a/arch/arm/crypto/chacha20-neon-glue.c b/arch/arm/crypto/chacha-neon-glue.c similarity index 72% rename from arch/arm/crypto/chacha20-neon-glue.c rename to arch/arm/crypto/chacha-neon-glue.c index f2d3b0f70a8de..385557d386341 100644 --- a/arch/arm/crypto/chacha20-neon-glue.c +++ b/arch/arm/crypto/chacha-neon-glue.c @@ -28,24 +28,26 @@ #include #include -asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src); -asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src); -asmlinkage void hchacha20_block_neon(const u32 *state, u32 *out); - -static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src, - unsigned int bytes) +asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, + int nrounds); +asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, + int nrounds); +asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); + +static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) { u8 buf[CHACHA_BLOCK_SIZE]; while (bytes >= CHACHA_BLOCK_SIZE * 4) { - chacha20_4block_xor_neon(state, dst, src); + chacha_4block_xor_neon(state, dst, src, nrounds); bytes -= CHACHA_BLOCK_SIZE * 4; src += CHACHA_BLOCK_SIZE * 4; dst += CHACHA_BLOCK_SIZE * 4; state[12] += 4; } while (bytes >= CHACHA_BLOCK_SIZE) { - chacha20_block_xor_neon(state, dst, src); + chacha_block_xor_neon(state, dst, src, nrounds); bytes -= CHACHA_BLOCK_SIZE; src += CHACHA_BLOCK_SIZE; dst += CHACHA_BLOCK_SIZE; @@ -53,13 +55,13 @@ static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src, } if (bytes) { memcpy(buf, src, bytes); - chacha20_block_xor_neon(state, buf, buf); + chacha_block_xor_neon(state, buf, buf, nrounds); memcpy(dst, buf, bytes); } } -static int chacha20_neon_stream_xor(struct skcipher_request *req, - struct chacha_ctx *ctx, u8 *iv) +static int chacha_neon_stream_xor(struct skcipher_request *req, + struct chacha_ctx *ctx, u8 *iv) { struct skcipher_walk walk; u32 state[16]; @@ -76,8 +78,8 @@ static int chacha20_neon_stream_xor(struct skcipher_request *req, nbytes = round_down(nbytes, walk.stride); kernel_neon_begin(); - chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, - nbytes); + chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, + nbytes, ctx->nrounds); kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } @@ -85,7 +87,7 @@ static int chacha20_neon_stream_xor(struct skcipher_request *req, return err; } -static int chacha20_neon(struct skcipher_request *req) +static int chacha_neon(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); @@ -93,10 +95,10 @@ static int chacha20_neon(struct skcipher_request *req) if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd()) return crypto_chacha_crypt(req); - return chacha20_neon_stream_xor(req, ctx, req->iv); + return chacha_neon_stream_xor(req, ctx, req->iv); } -static int xchacha20_neon(struct skcipher_request *req) +static int xchacha_neon(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); @@ -110,12 +112,13 @@ static int xchacha20_neon(struct skcipher_request *req) crypto_chacha_init(state, ctx, req->iv); kernel_neon_begin(); - hchacha20_block_neon(state, subctx.key); + hchacha_block_neon(state, subctx.key, ctx->nrounds); kernel_neon_end(); + subctx.nrounds = ctx->nrounds; memcpy(&real_iv[0], req->iv + 24, 8); memcpy(&real_iv[8], req->iv + 16, 8); - return chacha20_neon_stream_xor(req, &subctx, real_iv); + return chacha_neon_stream_xor(req, &subctx, real_iv); } static struct skcipher_alg algs[] = { @@ -133,8 +136,8 @@ static struct skcipher_alg algs[] = { .chunksize = CHACHA_BLOCK_SIZE, .walksize = 4 * CHACHA_BLOCK_SIZE, .setkey = crypto_chacha20_setkey, - .encrypt = chacha20_neon, - .decrypt = chacha20_neon, + .encrypt = chacha_neon, + .decrypt = chacha_neon, }, { .base.cra_name = "xchacha20", .base.cra_driver_name = "xchacha20-neon", @@ -149,12 +152,12 @@ static struct skcipher_alg algs[] = { .chunksize = CHACHA_BLOCK_SIZE, .walksize = 4 * CHACHA_BLOCK_SIZE, .setkey = crypto_chacha20_setkey, - .encrypt = xchacha20_neon, - .decrypt = xchacha20_neon, + .encrypt = xchacha_neon, + .decrypt = xchacha_neon, } }; -static int __init chacha20_simd_mod_init(void) +static int __init chacha_simd_mod_init(void) { if (!(elf_hwcap & HWCAP_NEON)) return -ENODEV; @@ -162,14 +165,15 @@ static int __init chacha20_simd_mod_init(void) return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); } -static void __exit chacha20_simd_mod_fini(void) +static void __exit chacha_simd_mod_fini(void) { crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); } -module_init(chacha20_simd_mod_init); -module_exit(chacha20_simd_mod_fini); +module_init(chacha_simd_mod_init); +module_exit(chacha_simd_mod_fini); +MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); MODULE_ALIAS_CRYPTO("chacha20"); -- GitLab From 398ff9d18d706a03e6bdf9dd566a14f2e3a8773c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 16 Nov 2018 17:26:26 -0800 Subject: [PATCH 1959/2269] FROMGIT: crypto: arm/chacha - add XChaCha12 support Now that the 32-bit ARM NEON implementation of ChaCha20 and XChaCha20 has been refactored to support varying the number of rounds, add support for XChaCha12. This is identical to XChaCha20 except for the number of rounds, which is 12 instead of 20. XChaCha12 is faster than XChaCha20 but has a lower security margin, though still greater than AES-256's since the best known attacks make it through only 7 rounds. See the patch "crypto: chacha - add XChaCha12 support" for more details about why we need XChaCha12 support. Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu (cherry picked from commit bdb063a79f6da589af1de3f10a7c8f654fba9ae8 https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master) Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: I08fba7f6f8bc9f1d08a75f5e6f6b73ceba6b8109 Signed-off-by: Eric Biggers --- arch/arm/crypto/Kconfig | 2 +- arch/arm/crypto/chacha-neon-glue.c | 21 ++++++++++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 686eae8c93555..8284a40edf5aa 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -125,7 +125,7 @@ config CRYPTO_CRC32_ARM_CE select CRYPTO_HASH config CRYPTO_CHACHA20_NEON - tristate "NEON accelerated ChaCha20 stream cipher algorithms" + tristate "NEON accelerated ChaCha stream cipher algorithms" depends on KERNEL_MODE_NEON select CRYPTO_BLKCIPHER select CRYPTO_CHACHA20 diff --git a/arch/arm/crypto/chacha-neon-glue.c b/arch/arm/crypto/chacha-neon-glue.c index 385557d386341..9d6fda81986da 100644 --- a/arch/arm/crypto/chacha-neon-glue.c +++ b/arch/arm/crypto/chacha-neon-glue.c @@ -1,5 +1,6 @@ /* - * ChaCha20 (RFC7539) and XChaCha20 stream ciphers, NEON accelerated + * ARM NEON accelerated ChaCha and XChaCha stream ciphers, + * including ChaCha20 (RFC7539) * * Copyright (C) 2016 Linaro, Ltd. * @@ -154,6 +155,22 @@ static struct skcipher_alg algs[] = { .setkey = crypto_chacha20_setkey, .encrypt = xchacha_neon, .decrypt = xchacha_neon, + }, { + .base.cra_name = "xchacha12", + .base.cra_driver_name = "xchacha12-neon", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .walksize = 4 * CHACHA_BLOCK_SIZE, + .setkey = crypto_chacha12_setkey, + .encrypt = xchacha_neon, + .decrypt = xchacha_neon, } }; @@ -180,3 +197,5 @@ MODULE_ALIAS_CRYPTO("chacha20"); MODULE_ALIAS_CRYPTO("chacha20-neon"); MODULE_ALIAS_CRYPTO("xchacha20"); MODULE_ALIAS_CRYPTO("xchacha20-neon"); +MODULE_ALIAS_CRYPTO("xchacha12"); +MODULE_ALIAS_CRYPTO("xchacha12-neon"); -- GitLab From 14b22e55f1f763a3aebeaa04c04353f4436f0640 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 16 Nov 2018 17:26:27 -0800 Subject: [PATCH 1960/2269] FROMGIT: crypto: poly1305 - use structures for key and accumulator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation for exposing a low-level Poly1305 API which implements the ε-almost-∆-universal (εA∆U) hash function underlying the Poly1305 MAC and supports block-aligned inputs only, create structures poly1305_key and poly1305_state which hold the limbs of the Poly1305 "r" key and accumulator, respectively. These structures could actually have the same type (e.g. poly1305_val), but different types are preferable, to prevent misuse. Acked-by: Martin Willi Signed-off-by: Eric Biggers Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu (cherry picked from commit 878afc35cd28bcd93cd3c5e1985ef39a104a4d45 https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master) Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: If20a0f9d29d8ba1efd43a5eb3fafce7720afe565 Signed-off-by: Eric Biggers --- arch/x86/crypto/poly1305_glue.c | 20 +++++++------ crypto/poly1305_generic.c | 52 ++++++++++++++++----------------- include/crypto/poly1305.h | 12 ++++++-- 3 files changed, 47 insertions(+), 37 deletions(-) diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index 28c372003e44c..a69670b5b849c 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -83,35 +83,37 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx, if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) { if (unlikely(!sctx->wset)) { if (!sctx->uset) { - memcpy(sctx->u, dctx->r, sizeof(sctx->u)); - poly1305_simd_mult(sctx->u, dctx->r); + memcpy(sctx->u, dctx->r.r, sizeof(sctx->u)); + poly1305_simd_mult(sctx->u, dctx->r.r); sctx->uset = true; } memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u)); - poly1305_simd_mult(sctx->u + 5, dctx->r); + poly1305_simd_mult(sctx->u + 5, dctx->r.r); memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u)); - poly1305_simd_mult(sctx->u + 10, dctx->r); + poly1305_simd_mult(sctx->u + 10, dctx->r.r); sctx->wset = true; } blocks = srclen / (POLY1305_BLOCK_SIZE * 4); - poly1305_4block_avx2(dctx->h, src, dctx->r, blocks, sctx->u); + poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks, + sctx->u); src += POLY1305_BLOCK_SIZE * 4 * blocks; srclen -= POLY1305_BLOCK_SIZE * 4 * blocks; } #endif if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) { if (unlikely(!sctx->uset)) { - memcpy(sctx->u, dctx->r, sizeof(sctx->u)); - poly1305_simd_mult(sctx->u, dctx->r); + memcpy(sctx->u, dctx->r.r, sizeof(sctx->u)); + poly1305_simd_mult(sctx->u, dctx->r.r); sctx->uset = true; } blocks = srclen / (POLY1305_BLOCK_SIZE * 2); - poly1305_2block_sse2(dctx->h, src, dctx->r, blocks, sctx->u); + poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks, + sctx->u); src += POLY1305_BLOCK_SIZE * 2 * blocks; srclen -= POLY1305_BLOCK_SIZE * 2 * blocks; } if (srclen >= POLY1305_BLOCK_SIZE) { - poly1305_block_sse2(dctx->h, src, dctx->r, 1); + poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1); srclen -= POLY1305_BLOCK_SIZE; } return srclen; diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c index 38a13ae0f2a34..18b1aae408053 100644 --- a/crypto/poly1305_generic.c +++ b/crypto/poly1305_generic.c @@ -38,7 +38,7 @@ int crypto_poly1305_init(struct shash_desc *desc) { struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - memset(dctx->h, 0, sizeof(dctx->h)); + memset(dctx->h.h, 0, sizeof(dctx->h.h)); dctx->buflen = 0; dctx->rset = false; dctx->sset = false; @@ -50,11 +50,11 @@ EXPORT_SYMBOL_GPL(crypto_poly1305_init); static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key) { /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ - dctx->r[0] = (get_unaligned_le32(key + 0) >> 0) & 0x3ffffff; - dctx->r[1] = (get_unaligned_le32(key + 3) >> 2) & 0x3ffff03; - dctx->r[2] = (get_unaligned_le32(key + 6) >> 4) & 0x3ffc0ff; - dctx->r[3] = (get_unaligned_le32(key + 9) >> 6) & 0x3f03fff; - dctx->r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff; + dctx->r.r[0] = (get_unaligned_le32(key + 0) >> 0) & 0x3ffffff; + dctx->r.r[1] = (get_unaligned_le32(key + 3) >> 2) & 0x3ffff03; + dctx->r.r[2] = (get_unaligned_le32(key + 6) >> 4) & 0x3ffc0ff; + dctx->r.r[3] = (get_unaligned_le32(key + 9) >> 6) & 0x3f03fff; + dctx->r.r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff; } static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key) @@ -107,22 +107,22 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx, srclen = datalen; } - r0 = dctx->r[0]; - r1 = dctx->r[1]; - r2 = dctx->r[2]; - r3 = dctx->r[3]; - r4 = dctx->r[4]; + r0 = dctx->r.r[0]; + r1 = dctx->r.r[1]; + r2 = dctx->r.r[2]; + r3 = dctx->r.r[3]; + r4 = dctx->r.r[4]; s1 = r1 * 5; s2 = r2 * 5; s3 = r3 * 5; s4 = r4 * 5; - h0 = dctx->h[0]; - h1 = dctx->h[1]; - h2 = dctx->h[2]; - h3 = dctx->h[3]; - h4 = dctx->h[4]; + h0 = dctx->h.h[0]; + h1 = dctx->h.h[1]; + h2 = dctx->h.h[2]; + h3 = dctx->h.h[3]; + h4 = dctx->h.h[4]; while (likely(srclen >= POLY1305_BLOCK_SIZE)) { @@ -157,11 +157,11 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx, srclen -= POLY1305_BLOCK_SIZE; } - dctx->h[0] = h0; - dctx->h[1] = h1; - dctx->h[2] = h2; - dctx->h[3] = h3; - dctx->h[4] = h4; + dctx->h.h[0] = h0; + dctx->h.h[1] = h1; + dctx->h.h[2] = h2; + dctx->h.h[3] = h3; + dctx->h.h[4] = h4; return srclen; } @@ -220,11 +220,11 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) } /* fully carry h */ - h0 = dctx->h[0]; - h1 = dctx->h[1]; - h2 = dctx->h[2]; - h3 = dctx->h[3]; - h4 = dctx->h[4]; + h0 = dctx->h.h[0]; + h1 = dctx->h.h[1]; + h2 = dctx->h.h[2]; + h3 = dctx->h.h[3]; + h4 = dctx->h.h[4]; h2 += (h1 >> 26); h1 = h1 & 0x3ffffff; h3 += (h2 >> 26); h2 = h2 & 0x3ffffff; diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h index f718a19da82f7..493244c46664c 100644 --- a/include/crypto/poly1305.h +++ b/include/crypto/poly1305.h @@ -13,13 +13,21 @@ #define POLY1305_KEY_SIZE 32 #define POLY1305_DIGEST_SIZE 16 +struct poly1305_key { + u32 r[5]; /* key, base 2^26 */ +}; + +struct poly1305_state { + u32 h[5]; /* accumulator, base 2^26 */ +}; + struct poly1305_desc_ctx { /* key */ - u32 r[5]; + struct poly1305_key r; /* finalize key */ u32 s[4]; /* accumulator */ - u32 h[5]; + struct poly1305_state h; /* partial buffer */ u8 buf[POLY1305_BLOCK_SIZE]; /* bytes used in partial buffer */ -- GitLab From daa2c34e09f20d0e4312395c8ad34d4bb88b266b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 16 Nov 2018 17:26:28 -0800 Subject: [PATCH 1961/2269] FROMGIT: crypto: poly1305 - add Poly1305 core API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expose a low-level Poly1305 API which implements the ε-almost-∆-universal (εA∆U) hash function underlying the Poly1305 MAC and supports block-aligned inputs only. This is needed for Adiantum hashing, which builds an εA∆U hash function from NH and a polynomial evaluation in GF(2^{130}-5); this polynomial evaluation is identical to the one the Poly1305 MAC does. However, the crypto_shash Poly1305 API isn't very appropriate for this because its calling convention assumes it is used as a MAC, with a 32-byte "one-time key" provided for every digest. But by design, in Adiantum hashing the performance of the polynomial evaluation isn't nearly as critical as NH. So it suffices to just have some C helper functions. Thus, this patch adds such functions. Acked-by: Martin Willi Signed-off-by: Eric Biggers Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu (cherry picked from commit 1b6fd3d5d18bbc1b1abf3b0cbc4b95a9a63d407b https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master) Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: I5c7da7832b84dfe29c300e117a158740d3e39069 Signed-off-by: Eric Biggers --- crypto/poly1305_generic.c | 174 ++++++++++++++++++++++---------------- include/crypto/poly1305.h | 16 ++++ 2 files changed, 115 insertions(+), 75 deletions(-) diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c index 18b1aae408053..b60c1eee9839f 100644 --- a/crypto/poly1305_generic.c +++ b/crypto/poly1305_generic.c @@ -38,7 +38,7 @@ int crypto_poly1305_init(struct shash_desc *desc) { struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - memset(dctx->h.h, 0, sizeof(dctx->h.h)); + poly1305_core_init(&dctx->h); dctx->buflen = 0; dctx->rset = false; dctx->sset = false; @@ -47,23 +47,16 @@ int crypto_poly1305_init(struct shash_desc *desc) } EXPORT_SYMBOL_GPL(crypto_poly1305_init); -static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key) +void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key) { /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ - dctx->r.r[0] = (get_unaligned_le32(key + 0) >> 0) & 0x3ffffff; - dctx->r.r[1] = (get_unaligned_le32(key + 3) >> 2) & 0x3ffff03; - dctx->r.r[2] = (get_unaligned_le32(key + 6) >> 4) & 0x3ffc0ff; - dctx->r.r[3] = (get_unaligned_le32(key + 9) >> 6) & 0x3f03fff; - dctx->r.r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff; -} - -static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key) -{ - dctx->s[0] = get_unaligned_le32(key + 0); - dctx->s[1] = get_unaligned_le32(key + 4); - dctx->s[2] = get_unaligned_le32(key + 8); - dctx->s[3] = get_unaligned_le32(key + 12); + key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff; + key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03; + key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff; + key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff; + key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff; } +EXPORT_SYMBOL_GPL(poly1305_core_setkey); /* * Poly1305 requires a unique key for each tag, which implies that we can't set @@ -75,13 +68,16 @@ unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, { if (!dctx->sset) { if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) { - poly1305_setrkey(dctx, src); + poly1305_core_setkey(&dctx->r, src); src += POLY1305_BLOCK_SIZE; srclen -= POLY1305_BLOCK_SIZE; dctx->rset = true; } if (srclen >= POLY1305_BLOCK_SIZE) { - poly1305_setskey(dctx, src); + dctx->s[0] = get_unaligned_le32(src + 0); + dctx->s[1] = get_unaligned_le32(src + 4); + dctx->s[2] = get_unaligned_le32(src + 8); + dctx->s[3] = get_unaligned_le32(src + 12); src += POLY1305_BLOCK_SIZE; srclen -= POLY1305_BLOCK_SIZE; dctx->sset = true; @@ -91,41 +87,37 @@ unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, } EXPORT_SYMBOL_GPL(crypto_poly1305_setdesckey); -static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx, - const u8 *src, unsigned int srclen, - u32 hibit) +static void poly1305_blocks_internal(struct poly1305_state *state, + const struct poly1305_key *key, + const void *src, unsigned int nblocks, + u32 hibit) { u32 r0, r1, r2, r3, r4; u32 s1, s2, s3, s4; u32 h0, h1, h2, h3, h4; u64 d0, d1, d2, d3, d4; - unsigned int datalen; - if (unlikely(!dctx->sset)) { - datalen = crypto_poly1305_setdesckey(dctx, src, srclen); - src += srclen - datalen; - srclen = datalen; - } + if (!nblocks) + return; - r0 = dctx->r.r[0]; - r1 = dctx->r.r[1]; - r2 = dctx->r.r[2]; - r3 = dctx->r.r[3]; - r4 = dctx->r.r[4]; + r0 = key->r[0]; + r1 = key->r[1]; + r2 = key->r[2]; + r3 = key->r[3]; + r4 = key->r[4]; s1 = r1 * 5; s2 = r2 * 5; s3 = r3 * 5; s4 = r4 * 5; - h0 = dctx->h.h[0]; - h1 = dctx->h.h[1]; - h2 = dctx->h.h[2]; - h3 = dctx->h.h[3]; - h4 = dctx->h.h[4]; - - while (likely(srclen >= POLY1305_BLOCK_SIZE)) { + h0 = state->h[0]; + h1 = state->h[1]; + h2 = state->h[2]; + h3 = state->h[3]; + h4 = state->h[4]; + do { /* h += m[i] */ h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff; h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff; @@ -154,16 +146,36 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx, h1 += h0 >> 26; h0 = h0 & 0x3ffffff; src += POLY1305_BLOCK_SIZE; - srclen -= POLY1305_BLOCK_SIZE; - } + } while (--nblocks); - dctx->h.h[0] = h0; - dctx->h.h[1] = h1; - dctx->h.h[2] = h2; - dctx->h.h[3] = h3; - dctx->h.h[4] = h4; + state->h[0] = h0; + state->h[1] = h1; + state->h[2] = h2; + state->h[3] = h3; + state->h[4] = h4; +} - return srclen; +void poly1305_core_blocks(struct poly1305_state *state, + const struct poly1305_key *key, + const void *src, unsigned int nblocks) +{ + poly1305_blocks_internal(state, key, src, nblocks, 1 << 24); +} +EXPORT_SYMBOL_GPL(poly1305_core_blocks); + +static void poly1305_blocks(struct poly1305_desc_ctx *dctx, + const u8 *src, unsigned int srclen, u32 hibit) +{ + unsigned int datalen; + + if (unlikely(!dctx->sset)) { + datalen = crypto_poly1305_setdesckey(dctx, src, srclen); + src += srclen - datalen; + srclen = datalen; + } + + poly1305_blocks_internal(&dctx->h, &dctx->r, + src, srclen / POLY1305_BLOCK_SIZE, hibit); } int crypto_poly1305_update(struct shash_desc *desc, @@ -187,9 +199,9 @@ int crypto_poly1305_update(struct shash_desc *desc, } if (likely(srclen >= POLY1305_BLOCK_SIZE)) { - bytes = poly1305_blocks(dctx, src, srclen, 1 << 24); - src += srclen - bytes; - srclen = bytes; + poly1305_blocks(dctx, src, srclen, 1 << 24); + src += srclen - (srclen % POLY1305_BLOCK_SIZE); + srclen %= POLY1305_BLOCK_SIZE; } if (unlikely(srclen)) { @@ -201,30 +213,18 @@ int crypto_poly1305_update(struct shash_desc *desc, } EXPORT_SYMBOL_GPL(crypto_poly1305_update); -int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) +void poly1305_core_emit(const struct poly1305_state *state, void *dst) { - struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); u32 h0, h1, h2, h3, h4; u32 g0, g1, g2, g3, g4; u32 mask; - u64 f = 0; - - if (unlikely(!dctx->sset)) - return -ENOKEY; - - if (unlikely(dctx->buflen)) { - dctx->buf[dctx->buflen++] = 1; - memset(dctx->buf + dctx->buflen, 0, - POLY1305_BLOCK_SIZE - dctx->buflen); - poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 0); - } /* fully carry h */ - h0 = dctx->h.h[0]; - h1 = dctx->h.h[1]; - h2 = dctx->h.h[2]; - h3 = dctx->h.h[3]; - h4 = dctx->h.h[4]; + h0 = state->h[0]; + h1 = state->h[1]; + h2 = state->h[2]; + h3 = state->h[3]; + h4 = state->h[4]; h2 += (h1 >> 26); h1 = h1 & 0x3ffffff; h3 += (h2 >> 26); h2 = h2 & 0x3ffffff; @@ -254,16 +254,40 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) h4 = (h4 & mask) | g4; /* h = h % (2^128) */ - h0 = (h0 >> 0) | (h1 << 26); - h1 = (h1 >> 6) | (h2 << 20); - h2 = (h2 >> 12) | (h3 << 14); - h3 = (h3 >> 18) | (h4 << 8); + put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0); + put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4); + put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8); + put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12); +} +EXPORT_SYMBOL_GPL(poly1305_core_emit); + +int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + __le32 digest[4]; + u64 f = 0; + + if (unlikely(!dctx->sset)) + return -ENOKEY; + + if (unlikely(dctx->buflen)) { + dctx->buf[dctx->buflen++] = 1; + memset(dctx->buf + dctx->buflen, 0, + POLY1305_BLOCK_SIZE - dctx->buflen); + poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 0); + } + + poly1305_core_emit(&dctx->h, digest); /* mac = (h + s) % (2^128) */ - f = (f >> 32) + h0 + dctx->s[0]; put_unaligned_le32(f, dst + 0); - f = (f >> 32) + h1 + dctx->s[1]; put_unaligned_le32(f, dst + 4); - f = (f >> 32) + h2 + dctx->s[2]; put_unaligned_le32(f, dst + 8); - f = (f >> 32) + h3 + dctx->s[3]; put_unaligned_le32(f, dst + 12); + f = (f >> 32) + le32_to_cpu(digest[0]) + dctx->s[0]; + put_unaligned_le32(f, dst + 0); + f = (f >> 32) + le32_to_cpu(digest[1]) + dctx->s[1]; + put_unaligned_le32(f, dst + 4); + f = (f >> 32) + le32_to_cpu(digest[2]) + dctx->s[2]; + put_unaligned_le32(f, dst + 8); + f = (f >> 32) + le32_to_cpu(digest[3]) + dctx->s[3]; + put_unaligned_le32(f, dst + 12); return 0; } diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h index 493244c46664c..34317ed2071e6 100644 --- a/include/crypto/poly1305.h +++ b/include/crypto/poly1305.h @@ -38,6 +38,22 @@ struct poly1305_desc_ctx { bool sset; }; +/* + * Poly1305 core functions. These implement the ε-almost-∆-universal hash + * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce + * ("s key") at the end. They also only support block-aligned inputs. + */ +void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key); +static inline void poly1305_core_init(struct poly1305_state *state) +{ + memset(state->h, 0, sizeof(state->h)); +} +void poly1305_core_blocks(struct poly1305_state *state, + const struct poly1305_key *key, + const void *src, unsigned int nblocks); +void poly1305_core_emit(const struct poly1305_state *state, void *dst); + +/* Crypto API helper functions for the Poly1305 MAC */ int crypto_poly1305_init(struct shash_desc *desc); unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int srclen); -- GitLab From 7db2b9c5161ce58b54ab37a6a614f3188429302d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 16 Nov 2018 17:26:29 -0800 Subject: [PATCH 1962/2269] BACKPORT, FROMGIT: crypto: nhpoly1305 - add NHPoly1305 support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a generic implementation of NHPoly1305, an ε-almost-∆-universal hash function used in the Adiantum encryption mode. CONFIG_NHPOLY1305 is not selectable by itself since there won't be any real reason to enable it without also enabling Adiantum support. Signed-off-by: Eric Biggers Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu (cherry picked from commit 26609a21a9460145e37d90947ad957b358a05288 https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master) Conflicts: crypto/testmgr.c crypto/testmgr.h Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: If6f00c01fab530fc2458c44ca111f84604cb85c1 Signed-off-by: Eric Biggers --- crypto/Kconfig | 5 + crypto/Makefile | 1 + crypto/nhpoly1305.c | 254 +++++++ crypto/testmgr.c | 6 + crypto/testmgr.h | 1239 ++++++++++++++++++++++++++++++++++- include/crypto/nhpoly1305.h | 74 +++ 6 files changed, 1575 insertions(+), 4 deletions(-) create mode 100644 crypto/nhpoly1305.c create mode 100644 include/crypto/nhpoly1305.h diff --git a/crypto/Kconfig b/crypto/Kconfig index 4d1f995cb4e48..e92d51eb13789 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -389,6 +389,11 @@ config CRYPTO_KEYWRAP Support for key wrapping (NIST SP800-38F / RFC3394) without padding. +config CRYPTO_NHPOLY1305 + tristate + select CRYPTO_HASH + select CRYPTO_POLY1305 + comment "Hash modes" config CRYPTO_CMAC diff --git a/crypto/Makefile b/crypto/Makefile index 4d77e00507285..bfa498533d6a6 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -83,6 +83,7 @@ obj-$(CONFIG_CRYPTO_LRW) += lrw.o obj-$(CONFIG_CRYPTO_XTS) += xts.o obj-$(CONFIG_CRYPTO_CTR) += ctr.o obj-$(CONFIG_CRYPTO_KEYWRAP) += keywrap.o +obj-$(CONFIG_CRYPTO_NHPOLY1305) += nhpoly1305.o obj-$(CONFIG_CRYPTO_GCM) += gcm.o obj-$(CONFIG_CRYPTO_CCM) += ccm.o obj-$(CONFIG_CRYPTO_CHACHA20POLY1305) += chacha20poly1305.o diff --git a/crypto/nhpoly1305.c b/crypto/nhpoly1305.c new file mode 100644 index 0000000000000..c8385853f699d --- /dev/null +++ b/crypto/nhpoly1305.c @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum + * + * Copyright 2018 Google LLC + */ + +/* + * "NHPoly1305" is the main component of Adiantum hashing. + * Specifically, it is the calculation + * + * H_M ← Poly1305_{K_M}(NH_{K_N}(pad_{128}(M))) + * + * from the procedure in section A.5 of the Adiantum paper [1]. It is an + * ε-almost-∆-universal (εA∆U) hash function for equal-length inputs over + * Z/(2^{128}Z), where the "∆" operation is addition. It hashes 1024-byte + * chunks of the input with the NH hash function [2], reducing the input length + * by 32x. The resulting NH digests are evaluated as a polynomial in + * GF(2^{130}-5), like in the Poly1305 MAC [3]. Note that the polynomial + * evaluation by itself would suffice to achieve the εA∆U property; NH is used + * for performance since it's over twice as fast as Poly1305. + * + * This is *not* a cryptographic hash function; do not use it as such! + * + * [1] Adiantum: length-preserving encryption for entry-level processors + * (https://eprint.iacr.org/2018/720.pdf) + * [2] UMAC: Fast and Secure Message Authentication + * (https://fastcrypto.org/umac/umac_proc.pdf) + * [3] The Poly1305-AES message-authentication code + * (https://cr.yp.to/mac/poly1305-20050329.pdf) + */ + +#include +#include +#include +#include +#include +#include +#include + +static void nh_generic(const u32 *key, const u8 *message, size_t message_len, + __le64 hash[NH_NUM_PASSES]) +{ + u64 sums[4] = { 0, 0, 0, 0 }; + + BUILD_BUG_ON(NH_PAIR_STRIDE != 2); + BUILD_BUG_ON(NH_NUM_PASSES != 4); + + while (message_len) { + u32 m0 = get_unaligned_le32(message + 0); + u32 m1 = get_unaligned_le32(message + 4); + u32 m2 = get_unaligned_le32(message + 8); + u32 m3 = get_unaligned_le32(message + 12); + + sums[0] += (u64)(u32)(m0 + key[ 0]) * (u32)(m2 + key[ 2]); + sums[1] += (u64)(u32)(m0 + key[ 4]) * (u32)(m2 + key[ 6]); + sums[2] += (u64)(u32)(m0 + key[ 8]) * (u32)(m2 + key[10]); + sums[3] += (u64)(u32)(m0 + key[12]) * (u32)(m2 + key[14]); + sums[0] += (u64)(u32)(m1 + key[ 1]) * (u32)(m3 + key[ 3]); + sums[1] += (u64)(u32)(m1 + key[ 5]) * (u32)(m3 + key[ 7]); + sums[2] += (u64)(u32)(m1 + key[ 9]) * (u32)(m3 + key[11]); + sums[3] += (u64)(u32)(m1 + key[13]) * (u32)(m3 + key[15]); + key += NH_MESSAGE_UNIT / sizeof(key[0]); + message += NH_MESSAGE_UNIT; + message_len -= NH_MESSAGE_UNIT; + } + + hash[0] = cpu_to_le64(sums[0]); + hash[1] = cpu_to_le64(sums[1]); + hash[2] = cpu_to_le64(sums[2]); + hash[3] = cpu_to_le64(sums[3]); +} + +/* Pass the next NH hash value through Poly1305 */ +static void process_nh_hash_value(struct nhpoly1305_state *state, + const struct nhpoly1305_key *key) +{ + BUILD_BUG_ON(NH_HASH_BYTES % POLY1305_BLOCK_SIZE != 0); + + poly1305_core_blocks(&state->poly_state, &key->poly_key, state->nh_hash, + NH_HASH_BYTES / POLY1305_BLOCK_SIZE); +} + +/* + * Feed the next portion of the source data, as a whole number of 16-byte + * "NH message units", through NH and Poly1305. Each NH hash is taken over + * 1024 bytes, except possibly the final one which is taken over a multiple of + * 16 bytes up to 1024. Also, in the case where data is passed in misaligned + * chunks, we combine partial hashes; the end result is the same either way. + */ +static void nhpoly1305_units(struct nhpoly1305_state *state, + const struct nhpoly1305_key *key, + const u8 *src, unsigned int srclen, nh_t nh_fn) +{ + do { + unsigned int bytes; + + if (state->nh_remaining == 0) { + /* Starting a new NH message */ + bytes = min_t(unsigned int, srclen, NH_MESSAGE_BYTES); + nh_fn(key->nh_key, src, bytes, state->nh_hash); + state->nh_remaining = NH_MESSAGE_BYTES - bytes; + } else { + /* Continuing a previous NH message */ + __le64 tmp_hash[NH_NUM_PASSES]; + unsigned int pos; + int i; + + pos = NH_MESSAGE_BYTES - state->nh_remaining; + bytes = min(srclen, state->nh_remaining); + nh_fn(&key->nh_key[pos / 4], src, bytes, tmp_hash); + for (i = 0; i < NH_NUM_PASSES; i++) + le64_add_cpu(&state->nh_hash[i], + le64_to_cpu(tmp_hash[i])); + state->nh_remaining -= bytes; + } + if (state->nh_remaining == 0) + process_nh_hash_value(state, key); + src += bytes; + srclen -= bytes; + } while (srclen); +} + +int crypto_nhpoly1305_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen) +{ + struct nhpoly1305_key *ctx = crypto_shash_ctx(tfm); + int i; + + if (keylen != NHPOLY1305_KEY_SIZE) + return -EINVAL; + + poly1305_core_setkey(&ctx->poly_key, key); + key += POLY1305_BLOCK_SIZE; + + for (i = 0; i < NH_KEY_WORDS; i++) + ctx->nh_key[i] = get_unaligned_le32(key + i * sizeof(u32)); + + return 0; +} +EXPORT_SYMBOL(crypto_nhpoly1305_setkey); + +int crypto_nhpoly1305_init(struct shash_desc *desc) +{ + struct nhpoly1305_state *state = shash_desc_ctx(desc); + + poly1305_core_init(&state->poly_state); + state->buflen = 0; + state->nh_remaining = 0; + return 0; +} +EXPORT_SYMBOL(crypto_nhpoly1305_init); + +int crypto_nhpoly1305_update_helper(struct shash_desc *desc, + const u8 *src, unsigned int srclen, + nh_t nh_fn) +{ + struct nhpoly1305_state *state = shash_desc_ctx(desc); + const struct nhpoly1305_key *key = crypto_shash_ctx(desc->tfm); + unsigned int bytes; + + if (state->buflen) { + bytes = min(srclen, (int)NH_MESSAGE_UNIT - state->buflen); + memcpy(&state->buffer[state->buflen], src, bytes); + state->buflen += bytes; + if (state->buflen < NH_MESSAGE_UNIT) + return 0; + nhpoly1305_units(state, key, state->buffer, NH_MESSAGE_UNIT, + nh_fn); + state->buflen = 0; + src += bytes; + srclen -= bytes; + } + + if (srclen >= NH_MESSAGE_UNIT) { + bytes = round_down(srclen, NH_MESSAGE_UNIT); + nhpoly1305_units(state, key, src, bytes, nh_fn); + src += bytes; + srclen -= bytes; + } + + if (srclen) { + memcpy(state->buffer, src, srclen); + state->buflen = srclen; + } + return 0; +} +EXPORT_SYMBOL(crypto_nhpoly1305_update_helper); + +int crypto_nhpoly1305_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + return crypto_nhpoly1305_update_helper(desc, src, srclen, nh_generic); +} +EXPORT_SYMBOL(crypto_nhpoly1305_update); + +int crypto_nhpoly1305_final_helper(struct shash_desc *desc, u8 *dst, nh_t nh_fn) +{ + struct nhpoly1305_state *state = shash_desc_ctx(desc); + const struct nhpoly1305_key *key = crypto_shash_ctx(desc->tfm); + + if (state->buflen) { + memset(&state->buffer[state->buflen], 0, + NH_MESSAGE_UNIT - state->buflen); + nhpoly1305_units(state, key, state->buffer, NH_MESSAGE_UNIT, + nh_fn); + } + + if (state->nh_remaining) + process_nh_hash_value(state, key); + + poly1305_core_emit(&state->poly_state, dst); + return 0; +} +EXPORT_SYMBOL(crypto_nhpoly1305_final_helper); + +int crypto_nhpoly1305_final(struct shash_desc *desc, u8 *dst) +{ + return crypto_nhpoly1305_final_helper(desc, dst, nh_generic); +} +EXPORT_SYMBOL(crypto_nhpoly1305_final); + +static struct shash_alg nhpoly1305_alg = { + .base.cra_name = "nhpoly1305", + .base.cra_driver_name = "nhpoly1305-generic", + .base.cra_priority = 100, + .base.cra_ctxsize = sizeof(struct nhpoly1305_key), + .base.cra_module = THIS_MODULE, + .digestsize = POLY1305_DIGEST_SIZE, + .init = crypto_nhpoly1305_init, + .update = crypto_nhpoly1305_update, + .final = crypto_nhpoly1305_final, + .setkey = crypto_nhpoly1305_setkey, + .descsize = sizeof(struct nhpoly1305_state), +}; + +static int __init nhpoly1305_mod_init(void) +{ + return crypto_register_shash(&nhpoly1305_alg); +} + +static void __exit nhpoly1305_mod_exit(void) +{ + crypto_unregister_shash(&nhpoly1305_alg); +} + +module_init(nhpoly1305_mod_init); +module_exit(nhpoly1305_mod_exit); + +MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Eric Biggers "); +MODULE_ALIAS_CRYPTO("nhpoly1305"); +MODULE_ALIAS_CRYPTO("nhpoly1305-generic"); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index b93c367e99649..ace2eef6d86bc 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -3296,6 +3296,12 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .hash = __VECS(michael_mic_tv_template) } + }, { + .alg = "nhpoly1305", + .test = alg_test_hash, + .suite = { + .hash = __VECS(nhpoly1305_tv_template) + } }, { .alg = "ofb(aes)", .test = alg_test_skcipher, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 5ad625a8ed2dc..97f49f2df88f3 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -29,7 +29,7 @@ #define MAX_DIGEST_SIZE 64 #define MAX_TAP 8 -#define MAX_KEYLEN 160 +#define MAX_KEYLEN 1088 #define MAX_IVLEN 32 struct hash_testvec { @@ -37,10 +37,10 @@ struct hash_testvec { const char *key; const char *plaintext; const char *digest; - unsigned char tap[MAX_TAP]; + unsigned short tap[MAX_TAP]; + unsigned short np; unsigned short psize; - unsigned char np; - unsigned char ksize; + unsigned short ksize; }; /* @@ -4663,6 +4663,1237 @@ static const struct hash_testvec poly1305_tv_template[] = { }, }; +/* NHPoly1305 test vectors from https://github.com/google/adiantum */ +static const struct hash_testvec nhpoly1305_tv_template[] = { + { + .key = "\xd2\x5d\x4c\xdd\x8d\x2b\x7f\x7a" + "\xd9\xbe\x71\xec\xd1\x83\x52\xe3" + "\xe1\xad\xd7\x5c\x0a\x75\x9d\xec" + "\x1d\x13\x7e\x5d\x71\x07\xc9\xe4" + "\x57\x2d\x44\x68\xcf\xd8\xd6\xc5" + "\x39\x69\x7d\x32\x75\x51\x4f\x7e" + "\xb2\x4c\xc6\x90\x51\x6e\xd9\xd6" + "\xa5\x8b\x2d\xf1\x94\xf9\xf7\x5e" + "\x2c\x84\x7b\x41\x0f\x88\x50\x89" + "\x30\xd9\xa1\x38\x46\x6c\xc0\x4f" + "\xe8\xdf\xdc\x66\xab\x24\x43\x41" + "\x91\x55\x29\x65\x86\x28\x5e\x45" + "\xd5\x2d\xb7\x80\x08\x9a\xc3\xd4" + "\x9a\x77\x0a\xd4\xef\x3e\xe6\x3f" + "\x6f\x2f\x9b\x3a\x7d\x12\x1e\x80" + "\x6c\x44\xa2\x25\xe1\xf6\x60\xe9" + "\x0d\xaf\xc5\x3c\xa5\x79\xae\x64" + "\xbc\xa0\x39\xa3\x4d\x10\xe5\x4d" + "\xd5\xe7\x89\x7a\x13\xee\x06\x78" + "\xdc\xa4\xdc\x14\x27\xe6\x49\x38" + "\xd0\xe0\x45\x25\x36\xc5\xf4\x79" + "\x2e\x9a\x98\x04\xe4\x2b\x46\x52" + "\x7c\x33\xca\xe2\x56\x51\x50\xe2" + "\xa5\x9a\xae\x18\x6a\x13\xf8\xd2" + "\x21\x31\x66\x02\xe2\xda\x8d\x7e" + "\x41\x19\xb2\x61\xee\x48\x8f\xf1" + "\x65\x24\x2e\x1e\x68\xce\x05\xd9" + "\x2a\xcf\xa5\x3a\x57\xdd\x35\x91" + "\x93\x01\xca\x95\xfc\x2b\x36\x04" + "\xe6\x96\x97\x28\xf6\x31\xfe\xa3" + "\x9d\xf6\x6a\x1e\x80\x8d\xdc\xec" + "\xaf\x66\x11\x13\x02\x88\xd5\x27" + "\x33\xb4\x1a\xcd\xa3\xf6\xde\x31" + "\x8e\xc0\x0e\x6c\xd8\x5a\x97\x5e" + "\xdd\xfd\x60\x69\x38\x46\x3f\x90" + "\x5e\x97\xd3\x32\x76\xc7\x82\x49" + "\xfe\xba\x06\x5f\x2f\xa2\xfd\xff" + "\x80\x05\x40\xe4\x33\x03\xfb\x10" + "\xc0\xde\x65\x8c\xc9\x8d\x3a\x9d" + "\xb5\x7b\x36\x4b\xb5\x0c\xcf\x00" + "\x9c\x87\xe4\x49\xad\x90\xda\x4a" + "\xdd\xbd\xff\xe2\x32\x57\xd6\x78" + "\x36\x39\x6c\xd3\x5b\x9b\x88\x59" + "\x2d\xf0\x46\xe4\x13\x0e\x2b\x35" + "\x0d\x0f\x73\x8a\x4f\x26\x84\x75" + "\x88\x3c\xc5\x58\x66\x18\x1a\xb4" + "\x64\x51\x34\x27\x1b\xa4\x11\xc9" + "\x6d\x91\x8a\xfa\x32\x60\x9d\xd7" + "\x87\xe5\xaa\x43\x72\xf8\xda\xd1" + "\x48\x44\x13\x61\xdc\x8c\x76\x17" + "\x0c\x85\x4e\xf3\xdd\xa2\x42\xd2" + "\x74\xc1\x30\x1b\xeb\x35\x31\x29" + "\x5b\xd7\x4c\x94\x46\x35\xa1\x23" + "\x50\xf2\xa2\x8e\x7e\x4f\x23\x4f" + "\x51\xff\xe2\xc9\xa3\x7d\x56\x8b" + "\x41\xf2\xd0\xc5\x57\x7e\x59\xac" + "\xbb\x65\xf3\xfe\xf7\x17\xef\x63" + "\x7c\x6f\x23\xdd\x22\x8e\xed\x84" + "\x0e\x3b\x09\xb3\xf3\xf4\x8f\xcd" + "\x37\xa8\xe1\xa7\x30\xdb\xb1\xa2" + "\x9c\xa2\xdf\x34\x17\x3e\x68\x44" + "\xd0\xde\x03\x50\xd1\x48\x6b\x20" + "\xe2\x63\x45\xa5\xea\x87\xc2\x42" + "\x95\x03\x49\x05\xed\xe0\x90\x29" + "\x1a\xb8\xcf\x9b\x43\xcf\x29\x7a" + "\x63\x17\x41\x9f\xe0\xc9\x10\xfd" + "\x2c\x56\x8c\x08\x55\xb4\xa9\x27" + "\x0f\x23\xb1\x05\x6a\x12\x46\xc7" + "\xe1\xfe\x28\x93\x93\xd7\x2f\xdc" + "\x98\x30\xdb\x75\x8a\xbe\x97\x7a" + "\x02\xfb\x8c\xba\xbe\x25\x09\xbe" + "\xce\xcb\xa2\xef\x79\x4d\x0e\x9d" + "\x1b\x9d\xb6\x39\x34\x38\xfa\x07" + "\xec\xe8\xfc\x32\x85\x1d\xf7\x85" + "\x63\xc3\x3c\xc0\x02\x75\xd7\x3f" + "\xb2\x68\x60\x66\x65\x81\xc6\xb1" + "\x42\x65\x4b\x4b\x28\xd7\xc7\xaa" + "\x9b\xd2\xdc\x1b\x01\xe0\x26\x39" + "\x01\xc1\x52\x14\xd1\x3f\xb7\xe6" + "\x61\x41\xc7\x93\xd2\xa2\x67\xc6" + "\xf7\x11\xb5\xf5\xea\xdd\x19\xfb" + "\x4d\x21\x12\xd6\x7d\xf1\x10\xb0" + "\x89\x07\xc7\x5a\x52\x73\x70\x2f" + "\x32\xef\x65\x2b\x12\xb2\xf0\xf5" + "\x20\xe0\x90\x59\x7e\x64\xf1\x4c" + "\x41\xb3\xa5\x91\x08\xe6\x5e\x5f" + "\x05\x56\x76\xb4\xb0\xcd\x70\x53" + "\x10\x48\x9c\xff\xc2\x69\x55\x24" + "\x87\xef\x84\xea\xfb\xa7\xbf\xa0" + "\x91\x04\xad\x4f\x8b\x57\x54\x4b" + "\xb6\xe9\xd1\xac\x37\x2f\x1d\x2e" + "\xab\xa5\xa4\xe8\xff\xfb\xd9\x39" + "\x2f\xb7\xac\xd1\xfe\x0b\x9a\x80" + "\x0f\xb6\xf4\x36\x39\x90\x51\xe3" + "\x0a\x2f\xb6\x45\x76\x89\xcd\x61" + "\xfe\x48\x5f\x75\x1d\x13\x00\x62" + "\x80\x24\x47\xe7\xbc\x37\xd7\xe3" + "\x15\xe8\x68\x22\xaf\x80\x6f\x4b" + "\xa8\x9f\x01\x10\x48\x14\xc3\x02" + "\x52\xd2\xc7\x75\x9b\x52\x6d\x30" + "\xac\x13\x85\xc8\xf7\xa3\x58\x4b" + "\x49\xf7\x1c\x45\x55\x8c\x39\x9a" + "\x99\x6d\x97\x27\x27\xe6\xab\xdd" + "\x2c\x42\x1b\x35\xdd\x9d\x73\xbb" + "\x6c\xf3\x64\xf1\xfb\xb9\xf7\xe6" + "\x4a\x3c\xc0\x92\xc0\x2e\xb7\x1a" + "\xbe\xab\xb3\x5a\xe5\xea\xb1\x48" + "\x58\x13\x53\x90\xfd\xc3\x8e\x54" + "\xf9\x18\x16\x73\xe8\xcb\x6d\x39" + "\x0e\xd7\xe0\xfe\xb6\x9f\x43\x97" + "\xe8\xd0\x85\x56\x83\x3e\x98\x68" + "\x7f\xbd\x95\xa8\x9a\x61\x21\x8f" + "\x06\x98\x34\xa6\xc8\xd6\x1d\xf3" + "\x3d\x43\xa4\x9a\x8c\xe5\xd3\x5a" + "\x32\xa2\x04\x22\xa4\x19\x1a\x46" + "\x42\x7e\x4d\xe5\xe0\xe6\x0e\xca" + "\xd5\x58\x9d\x2c\xaf\xda\x33\x5c" + "\xb0\x79\x9e\xc9\xfc\xca\xf0\x2f" + "\xa8\xb2\x77\xeb\x7a\xa2\xdd\x37" + "\x35\x83\x07\xd6\x02\x1a\xb6\x6c" + "\x24\xe2\x59\x08\x0e\xfd\x3e\x46" + "\xec\x40\x93\xf4\x00\x26\x4f\x2a" + "\xff\x47\x2f\xeb\x02\x92\x26\x5b" + "\x53\x17\xc2\x8d\x2a\xc7\xa3\x1b" + "\xcd\xbc\xa7\xe8\xd1\x76\xe3\x80" + "\x21\xca\x5d\x3b\xe4\x9c\x8f\xa9" + "\x5b\x7f\x29\x7f\x7c\xd8\xed\x6d" + "\x8c\xb2\x86\x85\xe7\x77\xf2\x85" + "\xab\x38\xa9\x9d\xc1\x4e\xc5\x64" + "\x33\x73\x8b\x59\x03\xad\x05\xdf" + "\x25\x98\x31\xde\xef\x13\xf1\x9b" + "\x3c\x91\x9d\x7b\xb1\xfa\xe6\xbf" + "\x5b\xed\xa5\x55\xe6\xea\x6c\x74" + "\xf4\xb9\xe4\x45\x64\x72\x81\xc2" + "\x4c\x28\xd4\xcd\xac\xe2\xde\xf9" + "\xeb\x5c\xeb\x61\x60\x5a\xe5\x28", + .ksize = 1088, + .plaintext = "", + .psize = 0, + .digest = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + }, { + .key = "\x29\x21\x43\xcb\xcb\x13\x07\xde" + "\xbf\x48\xdf\x8a\x7f\xa2\x84\xde" + "\x72\x23\x9d\xf5\xf0\x07\xf2\x4c" + "\x20\x3a\x93\xb9\xcd\x5d\xfe\xcb" + "\x99\x2c\x2b\x58\xc6\x50\x5f\x94" + "\x56\xc3\x7c\x0d\x02\x3f\xb8\x5e" + "\x7b\xc0\x6c\x51\x34\x76\xc0\x0e" + "\xc6\x22\xc8\x9e\x92\xa0\x21\xc9" + "\x85\x5c\x7c\xf8\xe2\x64\x47\xc9" + "\xe4\xa2\x57\x93\xf8\xa2\x69\xcd" + "\x62\x98\x99\xf4\xd7\x7b\x14\xb1" + "\xd8\x05\xff\x04\x15\xc9\xe1\x6e" + "\x9b\xe6\x50\x6b\x0b\x3f\x22\x1f" + "\x08\xde\x0c\x5b\x08\x7e\xc6\x2f" + "\x6c\xed\xd6\xb2\x15\xa4\xb3\xf9" + "\xa7\x46\x38\x2a\xea\x69\xa5\xde" + "\x02\xc3\x96\x89\x4d\x55\x3b\xed" + "\x3d\x3a\x85\x77\xbf\x97\x45\x5c" + "\x9e\x02\x69\xe2\x1b\x68\xbe\x96" + "\xfb\x64\x6f\x0f\xf6\x06\x40\x67" + "\xfa\x04\xe3\x55\xfa\xbe\xa4\x60" + "\xef\x21\x66\x97\xe6\x9d\x5c\x1f" + "\x62\x37\xaa\x31\xde\xe4\x9c\x28" + "\x95\xe0\x22\x86\xf4\x4d\xf3\x07" + "\xfd\x5f\x3a\x54\x2c\x51\x80\x71" + "\xba\x78\x69\x5b\x65\xab\x1f\x81" + "\xed\x3b\xff\x34\xa3\xfb\xbc\x73" + "\x66\x7d\x13\x7f\xdf\x6e\xe2\xe2" + "\xeb\x4f\x6c\xda\x7d\x33\x57\xd0" + "\xd3\x7c\x95\x4f\x33\x58\x21\xc7" + "\xc0\xe5\x6f\x42\x26\xc6\x1f\x5e" + "\x85\x1b\x98\x9a\xa2\x1e\x55\x77" + "\x23\xdf\x81\x5e\x79\x55\x05\xfc" + "\xfb\xda\xee\xba\x5a\xba\xf7\x77" + "\x7f\x0e\xd3\xe1\x37\xfe\x8d\x2b" + "\xd5\x3f\xfb\xd0\xc0\x3c\x0b\x3f" + "\xcf\x3c\x14\xcf\xfb\x46\x72\x4c" + "\x1f\x39\xe2\xda\x03\x71\x6d\x23" + "\xef\x93\xcd\x39\xd9\x37\x80\x4d" + "\x65\x61\xd1\x2c\x03\xa9\x47\x72" + "\x4d\x1e\x0e\x16\x33\x0f\x21\x17" + "\xec\x92\xea\x6f\x37\x22\xa4\xd8" + "\x03\x33\x9e\xd8\x03\x69\x9a\xe8" + "\xb2\x57\xaf\x78\x99\x05\x12\xab" + "\x48\x90\x80\xf0\x12\x9b\x20\x64" + "\x7a\x1d\x47\x5f\xba\x3c\xf9\xc3" + "\x0a\x0d\x8d\xa1\xf9\x1b\x82\x13" + "\x3e\x0d\xec\x0a\x83\xc0\x65\xe1" + "\xe9\x95\xff\x97\xd6\xf2\xe4\xd5" + "\x86\xc0\x1f\x29\x27\x63\xd7\xde" + "\xb7\x0a\x07\x99\x04\x2d\xa3\x89" + "\xa2\x43\xcf\xf3\xe1\x43\xac\x4a" + "\x06\x97\xd0\x05\x4f\x87\xfa\xf9" + "\x9b\xbf\x52\x70\xbd\xbc\x6c\xf3" + "\x03\x13\x60\x41\x28\x09\xec\xcc" + "\xb1\x1a\xec\xd6\xfb\x6f\x2a\x89" + "\x5d\x0b\x53\x9c\x59\xc1\x84\x21" + "\x33\x51\x47\x19\x31\x9c\xd4\x0a" + "\x4d\x04\xec\x50\x90\x61\xbd\xbc" + "\x7e\xc8\xd9\x6c\x98\x1d\x45\x41" + "\x17\x5e\x97\x1c\xc5\xa8\xe8\xea" + "\x46\x58\x53\xf7\x17\xd5\xad\x11" + "\xc8\x54\xf5\x7a\x33\x90\xf5\x19" + "\xba\x36\xb4\xfc\x52\xa5\x72\x3d" + "\x14\xbb\x55\xa7\xe9\xe3\x12\xf7" + "\x1c\x30\xa2\x82\x03\xbf\x53\x91" + "\x2e\x60\x41\x9f\x5b\x69\x39\xf6" + "\x4d\xc8\xf8\x46\x7a\x7f\xa4\x98" + "\x36\xff\x06\xcb\xca\xe7\x33\xf2" + "\xc0\x4a\xf4\x3c\x14\x44\x5f\x6b" + "\x75\xef\x02\x36\x75\x08\x14\xfd" + "\x10\x8e\xa5\x58\xd0\x30\x46\x49" + "\xaf\x3a\xf8\x40\x3d\x35\xdb\x84" + "\x11\x2e\x97\x6a\xb7\x87\x7f\xad" + "\xf1\xfa\xa5\x63\x60\xd8\x5e\xbf" + "\x41\x78\x49\xcf\x77\xbb\x56\xbb" + "\x7d\x01\x67\x05\x22\xc8\x8f\x41" + "\xba\x81\xd2\xca\x2c\x38\xac\x76" + "\x06\xc1\x1a\xc2\xce\xac\x90\x67" + "\x57\x3e\x20\x12\x5b\xd9\x97\x58" + "\x65\x05\xb7\x04\x61\x7e\xd8\x3a" + "\xbf\x55\x3b\x13\xe9\x34\x5a\x37" + "\x36\xcb\x94\x45\xc5\x32\xb3\xa0" + "\x0c\x3e\x49\xc5\xd3\xed\xa7\xf0" + "\x1c\x69\xcc\xea\xcc\x83\xc9\x16" + "\x95\x72\x4b\xf4\x89\xd5\xb9\x10" + "\xf6\x2d\x60\x15\xea\x3c\x06\x66" + "\x9f\x82\xad\x17\xce\xd2\xa4\x48" + "\x7c\x65\xd9\xf8\x02\x4d\x9b\x4c" + "\x89\x06\x3a\x34\x85\x48\x89\x86" + "\xf9\x24\xa9\x54\x72\xdb\x44\x95" + "\xc7\x44\x1c\x19\x11\x4c\x04\xdc" + "\x13\xb9\x67\xc8\xc3\x3a\x6a\x50" + "\xfa\xd1\xfb\xe1\x88\xb6\xf1\xa3" + "\xc5\x3b\xdc\x38\x45\x16\x26\x02" + "\x3b\xb8\x8f\x8b\x58\x7d\x23\x04" + "\x50\x6b\x81\x9f\xae\x66\xac\x6f" + "\xcf\x2a\x9d\xf1\xfd\x1d\x57\x07" + "\xbe\x58\xeb\x77\x0c\xe3\xc2\x19" + "\x14\x74\x1b\x51\x1c\x4f\x41\xf3" + "\x32\x89\xb3\xe7\xde\x62\xf6\x5f" + "\xc7\x6a\x4a\x2a\x5b\x0f\x5f\x87" + "\x9c\x08\xb9\x02\x88\xc8\x29\xb7" + "\x94\x52\xfa\x52\xfe\xaa\x50\x10" + "\xba\x48\x75\x5e\x11\x1b\xe6\x39" + "\xd7\x82\x2c\x87\xf1\x1e\xa4\x38" + "\x72\x3e\x51\xe7\xd8\x3e\x5b\x7b" + "\x31\x16\x89\xba\xd6\xad\x18\x5e" + "\xba\xf8\x12\xb3\xf4\x6c\x47\x30" + "\xc0\x38\x58\xb3\x10\x8d\x58\x5d" + "\xb4\xfb\x19\x7e\x41\xc3\x66\xb8" + "\xd6\x72\x84\xe1\x1a\xc2\x71\x4c" + "\x0d\x4a\x21\x7a\xab\xa2\xc0\x36" + "\x15\xc5\xe9\x46\xd7\x29\x17\x76" + "\x5e\x47\x36\x7f\x72\x05\xa7\xcc" + "\x36\x63\xf9\x47\x7d\xe6\x07\x3c" + "\x8b\x79\x1d\x96\x61\x8d\x90\x65" + "\x7c\xf5\xeb\x4e\x6e\x09\x59\x6d" + "\x62\x50\x1b\x0f\xe0\xdc\x78\xf2" + "\x5b\x83\x1a\xa1\x11\x75\xfd\x18" + "\xd7\xe2\x8d\x65\x14\x21\xce\xbe" + "\xb5\x87\xe3\x0a\xda\x24\x0a\x64" + "\xa9\x9f\x03\x8d\x46\x5d\x24\x1a" + "\x8a\x0c\x42\x01\xca\xb1\x5f\x7c" + "\xa5\xac\x32\x4a\xb8\x07\x91\x18" + "\x6f\xb0\x71\x3c\xc9\xb1\xa8\xf8" + "\x5f\x69\xa5\xa1\xca\x9e\x7a\xaa" + "\xac\xe9\xc7\x47\x41\x75\x25\xc3" + "\x73\xe2\x0b\xdd\x6d\x52\x71\xbe" + "\xc5\xdc\xb4\xe7\x01\x26\x53\x77" + "\x86\x90\x85\x68\x6b\x7b\x03\x53" + "\xda\x52\x52\x51\x68\xc8\xf3\xec" + "\x6c\xd5\x03\x7a\xa3\x0e\xb4\x02" + "\x5f\x1a\xab\xee\xca\x67\x29\x7b" + "\xbd\x96\x59\xb3\x8b\x32\x7a\x92" + "\x9f\xd8\x25\x2b\xdf\xc0\x4c\xda", + .ksize = 1088, + .plaintext = "\xbc\xda\x81\xa8\x78\x79\x1c\xbf" + "\x77\x53\xba\x4c\x30\x5b\xb8\x33", + .psize = 16, + .digest = "\x04\xbf\x7f\x6a\xce\x72\xea\x6a" + "\x79\xdb\xb0\xc9\x60\xf6\x12\xcc", + .np = 6, + .tap = { 4, 4, 1, 1, 1, 5 }, + }, { + .key = "\x65\x4d\xe3\xf8\xd2\x4c\xac\x28" + "\x68\xf5\xb3\x81\x71\x4b\xa1\xfa" + "\x04\x0e\xd3\x81\x36\xbe\x0c\x81" + "\x5e\xaf\xbc\x3a\xa4\xc0\x8e\x8b" + "\x55\x63\xd3\x52\x97\x88\xd6\x19" + "\xbc\x96\xdf\x49\xff\x04\x63\xf5" + "\x0c\x11\x13\xaa\x9e\x1f\x5a\xf7" + "\xdd\xbd\x37\x80\xc3\xd0\xbe\xa7" + "\x05\xc8\x3c\x98\x1e\x05\x3c\x84" + "\x39\x61\xc4\xed\xed\x71\x1b\xc4" + "\x74\x45\x2c\xa1\x56\x70\x97\xfd" + "\x44\x18\x07\x7d\xca\x60\x1f\x73" + "\x3b\x6d\x21\xcb\x61\x87\x70\x25" + "\x46\x21\xf1\x1f\x21\x91\x31\x2d" + "\x5d\xcc\xb7\xd1\x84\x3e\x3d\xdb" + "\x03\x53\x2a\x82\xa6\x9a\x95\xbc" + "\x1a\x1e\x0a\x5e\x07\x43\xab\x43" + "\xaf\x92\x82\x06\x91\x04\x09\xf4" + "\x17\x0a\x9a\x2c\x54\xdb\xb8\xf4" + "\xd0\xf0\x10\x66\x24\x8d\xcd\xda" + "\xfe\x0e\x45\x9d\x6f\xc4\x4e\xf4" + "\x96\xaf\x13\xdc\xa9\xd4\x8c\xc4" + "\xc8\x57\x39\x3c\xc2\xd3\x0a\x76" + "\x4a\x1f\x75\x83\x44\xc7\xd1\x39" + "\xd8\xb5\x41\xba\x73\x87\xfa\x96" + "\xc7\x18\x53\xfb\x9b\xda\xa0\x97" + "\x1d\xee\x60\x85\x9e\x14\xc3\xce" + "\xc4\x05\x29\x3b\x95\x30\xa3\xd1" + "\x9f\x82\x6a\x04\xf5\xa7\x75\x57" + "\x82\x04\xfe\x71\x51\x71\xb1\x49" + "\x50\xf8\xe0\x96\xf1\xfa\xa8\x88" + "\x3f\xa0\x86\x20\xd4\x60\x79\x59" + "\x17\x2d\xd1\x09\xf4\xec\x05\x57" + "\xcf\x62\x7e\x0e\x7e\x60\x78\xe6" + "\x08\x60\x29\xd8\xd5\x08\x1a\x24" + "\xc4\x6c\x24\xe7\x92\x08\x3d\x8a" + "\x98\x7a\xcf\x99\x0a\x65\x0e\xdc" + "\x8c\x8a\xbe\x92\x82\x91\xcc\x62" + "\x30\xb6\xf4\x3f\xc6\x8a\x7f\x12" + "\x4a\x8a\x49\xfa\x3f\x5c\xd4\x5a" + "\xa6\x82\xa3\xe6\xaa\x34\x76\xb2" + "\xab\x0a\x30\xef\x6c\x77\x58\x3f" + "\x05\x6b\xcc\x5c\xae\xdc\xd7\xb9" + "\x51\x7e\x8d\x32\x5b\x24\x25\xbe" + "\x2b\x24\x01\xcf\x80\xda\x16\xd8" + "\x90\x72\x2c\xad\x34\x8d\x0c\x74" + "\x02\xcb\xfd\xcf\x6e\xef\x97\xb5" + "\x4c\xf2\x68\xca\xde\x43\x9e\x8a" + "\xc5\x5f\x31\x7f\x14\x71\x38\xec" + "\xbd\x98\xe5\x71\xc4\xb5\xdb\xef" + "\x59\xd2\xca\xc0\xc1\x86\x75\x01" + "\xd4\x15\x0d\x6f\xa4\xf7\x7b\x37" + "\x47\xda\x18\x93\x63\xda\xbe\x9e" + "\x07\xfb\xb2\x83\xd5\xc4\x34\x55" + "\xee\x73\xa1\x42\x96\xf9\x66\x41" + "\xa4\xcc\xd2\x93\x6e\xe1\x0a\xbb" + "\xd2\xdd\x18\x23\xe6\x6b\x98\x0b" + "\x8a\x83\x59\x2c\xc3\xa6\x59\x5b" + "\x01\x22\x59\xf7\xdc\xb0\x87\x7e" + "\xdb\x7d\xf4\x71\x41\xab\xbd\xee" + "\x79\xbe\x3c\x01\x76\x0b\x2d\x0a" + "\x42\xc9\x77\x8c\xbb\x54\x95\x60" + "\x43\x2e\xe0\x17\x52\xbd\x90\xc9" + "\xc2\x2c\xdd\x90\x24\x22\x76\x40" + "\x5c\xb9\x41\xc9\xa1\xd5\xbd\xe3" + "\x44\xe0\xa4\xab\xcc\xb8\xe2\x32" + "\x02\x15\x04\x1f\x8c\xec\x5d\x14" + "\xac\x18\xaa\xef\x6e\x33\x19\x6e" + "\xde\xfe\x19\xdb\xeb\x61\xca\x18" + "\xad\xd8\x3d\xbf\x09\x11\xc7\xa5" + "\x86\x0b\x0f\xe5\x3e\xde\xe8\xd9" + "\x0a\x69\x9e\x4c\x20\xff\xf9\xc5" + "\xfa\xf8\xf3\x7f\xa5\x01\x4b\x5e" + "\x0f\xf0\x3b\x68\xf0\x46\x8c\x2a" + "\x7a\xc1\x8f\xa0\xfe\x6a\x5b\x44" + "\x70\x5c\xcc\x92\x2c\x6f\x0f\xbd" + "\x25\x3e\xb7\x8e\x73\x58\xda\xc9" + "\xa5\xaa\x9e\xf3\x9b\xfd\x37\x3e" + "\xe2\x88\xa4\x7b\xc8\x5c\xa8\x93" + "\x0e\xe7\x9a\x9c\x2e\x95\x18\x9f" + "\xc8\x45\x0c\x88\x9e\x53\x4f\x3a" + "\x76\xc1\x35\xfa\x17\xd8\xac\xa0" + "\x0c\x2d\x47\x2e\x4f\x69\x9b\xf7" + "\xd0\xb6\x96\x0c\x19\xb3\x08\x01" + "\x65\x7a\x1f\xc7\x31\x86\xdb\xc8" + "\xc1\x99\x8f\xf8\x08\x4a\x9d\x23" + "\x22\xa8\xcf\x27\x01\x01\x88\x93" + "\x9c\x86\x45\xbd\xe0\x51\xca\x52" + "\x84\xba\xfe\x03\xf7\xda\xc5\xce" + "\x3e\x77\x75\x86\xaf\x84\xc8\x05" + "\x44\x01\x0f\x02\xf3\x58\xb0\x06" + "\x5a\xd7\x12\x30\x8d\xdf\x1f\x1f" + "\x0a\xe6\xd2\xea\xf6\x3a\x7a\x99" + "\x63\xe8\xd2\xc1\x4a\x45\x8b\x40" + "\x4d\x0a\xa9\x76\x92\xb3\xda\x87" + "\x36\x33\xf0\x78\xc3\x2f\x5f\x02" + "\x1a\x6a\x2c\x32\xcd\x76\xbf\xbd" + "\x5a\x26\x20\x28\x8c\x8c\xbc\x52" + "\x3d\x0a\xc9\xcb\xab\xa4\x21\xb0" + "\x54\x40\x81\x44\xc7\xd6\x1c\x11" + "\x44\xc6\x02\x92\x14\x5a\xbf\x1a" + "\x09\x8a\x18\xad\xcd\x64\x3d\x53" + "\x4a\xb6\xa5\x1b\x57\x0e\xef\xe0" + "\x8c\x44\x5f\x7d\xbd\x6c\xfd\x60" + "\xae\x02\x24\xb6\x99\xdd\x8c\xaf" + "\x59\x39\x75\x3c\xd1\x54\x7b\x86" + "\xcc\x99\xd9\x28\x0c\xb0\x94\x62" + "\xf9\x51\xd1\x19\x96\x2d\x66\xf5" + "\x55\xcf\x9e\x59\xe2\x6b\x2c\x08" + "\xc0\x54\x48\x24\x45\xc3\x8c\x73" + "\xea\x27\x6e\x66\x7d\x1d\x0e\x6e" + "\x13\xe8\x56\x65\x3a\xb0\x81\x5c" + "\xf0\xe8\xd8\x00\x6b\xcd\x8f\xad" + "\xdd\x53\xf3\xa4\x6c\x43\xd6\x31" + "\xaf\xd2\x76\x1e\x91\x12\xdb\x3c" + "\x8c\xc2\x81\xf0\x49\xdb\xe2\x6b" + "\x76\x62\x0a\x04\xe4\xaa\x8a\x7c" + "\x08\x0b\x5d\xd0\xee\x1d\xfb\xc4" + "\x02\x75\x42\xd6\xba\xa7\x22\xa8" + "\x47\x29\xb7\x85\x6d\x93\x3a\xdb" + "\x00\x53\x0b\xa2\xeb\xf8\xfe\x01" + "\x6f\x8a\x31\xd6\x17\x05\x6f\x67" + "\x88\x95\x32\xfe\x4f\xa6\x4b\xf8" + "\x03\xe4\xcd\x9a\x18\xe8\x4e\x2d" + "\xf7\x97\x9a\x0c\x7d\x9f\x7e\x44" + "\x69\x51\xe0\x32\x6b\x62\x86\x8f" + "\xa6\x8e\x0b\x21\x96\xe5\xaf\x77" + "\xc0\x83\xdf\xa5\x0e\xd0\xa1\x04" + "\xaf\xc1\x10\xcb\x5a\x40\xe4\xe3" + "\x38\x7e\x07\xe8\x4d\xfa\xed\xc5" + "\xf0\x37\xdf\xbb\x8a\xcf\x3d\xdc" + "\x61\xd2\xc6\x2b\xff\x07\xc9\x2f" + "\x0c\x2d\x5c\x07\xa8\x35\x6a\xfc" + "\xae\x09\x03\x45\x74\x51\x4d\xc4" + "\xb8\x23\x87\x4a\x99\x27\x20\x87" + "\x62\x44\x0a\x4a\xce\x78\x47\x22", + .ksize = 1088, + .plaintext = "\x8e\xb0\x4c\xde\x9c\x4a\x04\x5a" + "\xf6\xa9\x7f\x45\x25\xa5\x7b\x3a" + "\xbc\x4d\x73\x39\x81\xb5\xbd\x3d" + "\x21\x6f\xd7\x37\x50\x3c\x7b\x28" + "\xd1\x03\x3a\x17\xed\x7b\x7c\x2a" + "\x16\xbc\xdf\x19\x89\x52\x71\x31" + "\xb6\xc0\xfd\xb5\xd3\xba\x96\x99" + "\xb6\x34\x0b\xd0\x99\x93\xfc\x1a" + "\x01\x3c\x85\xc6\x9b\x78\x5c\x8b" + "\xfe\xae\xd2\xbf\xb2\x6f\xf9\xed" + "\xc8\x25\x17\xfe\x10\x3b\x7d\xda" + "\xf4\x8d\x35\x4b\x7c\x7b\x82\xe7" + "\xc2\xb3\xee\x60\x4a\x03\x86\xc9" + "\x4e\xb5\xc4\xbe\xd2\xbd\x66\xf1" + "\x13\xf1\x09\xab\x5d\xca\x63\x1f" + "\xfc\xfb\x57\x2a\xfc\xca\x66\xd8" + "\x77\x84\x38\x23\x1d\xac\xd3\xb3" + "\x7a\xad\x4c\x70\xfa\x9c\xc9\x61" + "\xa6\x1b\xba\x33\x4b\x4e\x33\xec" + "\xa0\xa1\x64\x39\x40\x05\x1c\xc2" + "\x3f\x49\x9d\xae\xf2\xc5\xf2\xc5" + "\xfe\xe8\xf4\xc2\xf9\x96\x2d\x28" + "\x92\x30\x44\xbc\xd2\x7f\xe1\x6e" + "\x62\x02\x8f\x3d\x1c\x80\xda\x0e" + "\x6a\x90\x7e\x75\xff\xec\x3e\xc4" + "\xcd\x16\x34\x3b\x05\x6d\x4d\x20" + "\x1c\x7b\xf5\x57\x4f\xfa\x3d\xac" + "\xd0\x13\x55\xe8\xb3\xe1\x1b\x78" + "\x30\xe6\x9f\x84\xd4\x69\xd1\x08" + "\x12\x77\xa7\x4a\xbd\xc0\xf2\xd2" + "\x78\xdd\xa3\x81\x12\xcb\x6c\x14" + "\x90\x61\xe2\x84\xc6\x2b\x16\xcc" + "\x40\x99\x50\x88\x01\x09\x64\x4f" + "\x0a\x80\xbe\x61\xae\x46\xc9\x0a" + "\x5d\xe0\xfb\x72\x7a\x1a\xdd\x61" + "\x63\x20\x05\xa0\x4a\xf0\x60\x69" + "\x7f\x92\xbc\xbf\x4e\x39\x4d\xdd" + "\x74\xd1\xb7\xc0\x5a\x34\xb7\xae" + "\x76\x65\x2e\xbc\x36\xb9\x04\x95" + "\x42\xe9\x6f\xca\x78\xb3\x72\x07" + "\xa3\xba\x02\x94\x67\x4c\xb1\xd7" + "\xe9\x30\x0d\xf0\x3b\xb8\x10\x6d" + "\xea\x2b\x21\xbf\x74\x59\x82\x97" + "\x85\xaa\xf1\xd7\x54\x39\xeb\x05" + "\xbd\xf3\x40\xa0\x97\xe6\x74\xfe" + "\xb4\x82\x5b\xb1\x36\xcb\xe8\x0d" + "\xce\x14\xd9\xdf\xf1\x94\x22\xcd" + "\xd6\x00\xba\x04\x4c\x05\x0c\xc0" + "\xd1\x5a\xeb\x52\xd5\xa8\x8e\xc8" + "\x97\xa1\xaa\xc1\xea\xc1\xbe\x7c" + "\x36\xb3\x36\xa0\xc6\x76\x66\xc5" + "\xe2\xaf\xd6\x5c\xe2\xdb\x2c\xb3" + "\x6c\xb9\x99\x7f\xff\x9f\x03\x24" + "\xe1\x51\x44\x66\xd8\x0c\x5d\x7f" + "\x5c\x85\x22\x2a\xcf\x6d\x79\x28" + "\xab\x98\x01\x72\xfe\x80\x87\x5f" + "\x46\xba\xef\x81\x24\xee\xbf\xb0" + "\x24\x74\xa3\x65\x97\x12\xc4\xaf" + "\x8b\xa0\x39\xda\x8a\x7e\x74\x6e" + "\x1b\x42\xb4\x44\x37\xfc\x59\xfd" + "\x86\xed\xfb\x8c\x66\x33\xda\x63" + "\x75\xeb\xe1\xa4\x85\x4f\x50\x8f" + "\x83\x66\x0d\xd3\x37\xfa\xe6\x9c" + "\x4f\x30\x87\x35\x18\xe3\x0b\xb7" + "\x6e\x64\x54\xcd\x70\xb3\xde\x54" + "\xb7\x1d\xe6\x4c\x4d\x55\x12\x12" + "\xaf\x5f\x7f\x5e\xee\x9d\xe8\x8e" + "\x32\x9d\x4e\x75\xeb\xc6\xdd\xaa" + "\x48\x82\xa4\x3f\x3c\xd7\xd3\xa8" + "\x63\x9e\x64\xfe\xe3\x97\x00\x62" + "\xe5\x40\x5d\xc3\xad\x72\xe1\x28" + "\x18\x50\xb7\x75\xef\xcd\x23\xbf" + "\x3f\xc0\x51\x36\xf8\x41\xc3\x08" + "\xcb\xf1\x8d\x38\x34\xbd\x48\x45" + "\x75\xed\xbc\x65\x7b\xb5\x0c\x9b" + "\xd7\x67\x7d\x27\xb4\xc4\x80\xd7" + "\xa9\xb9\xc7\x4a\x97\xaa\xda\xc8" + "\x3c\x74\xcf\x36\x8f\xe4\x41\xe3" + "\xd4\xd3\x26\xa7\xf3\x23\x9d\x8f" + "\x6c\x20\x05\x32\x3e\xe0\xc3\xc8" + "\x56\x3f\xa7\x09\xb7\xfb\xc7\xf7" + "\xbe\x2a\xdd\x0f\x06\x7b\x0d\xdd" + "\xb0\xb4\x86\x17\xfd\xb9\x04\xe5" + "\xc0\x64\x5d\xad\x2a\x36\x38\xdb" + "\x24\xaf\x5b\xff\xca\xf9\x41\xe8" + "\xf9\x2f\x1e\x5e\xf9\xf5\xd5\xf2" + "\xb2\x88\xca\xc9\xa1\x31\xe2\xe8" + "\x10\x95\x65\xbf\xf1\x11\x61\x7a" + "\x30\x1a\x54\x90\xea\xd2\x30\xf6" + "\xa5\xad\x60\xf9\x4d\x84\x21\x1b" + "\xe4\x42\x22\xc8\x12\x4b\xb0\x58" + "\x3e\x9c\x2d\x32\x95\x0a\x8e\xb0" + "\x0a\x7e\x77\x2f\xe8\x97\x31\x6a" + "\xf5\x59\xb4\x26\xe6\x37\x12\xc9" + "\xcb\xa0\x58\x33\x6f\xd5\x55\x55" + "\x3c\xa1\x33\xb1\x0b\x7e\x2e\xb4" + "\x43\x2a\x84\x39\xf0\x9c\xf4\x69" + "\x4f\x1e\x79\xa6\x15\x1b\x87\xbb" + "\xdb\x9b\xe0\xf1\x0b\xba\xe3\x6e" + "\xcc\x2f\x49\x19\x22\x29\xfc\x71" + "\xbb\x77\x38\x18\x61\xaf\x85\x76" + "\xeb\xd1\x09\xcc\x86\x04\x20\x9a" + "\x66\x53\x2f\x44\x8b\xc6\xa3\xd2" + "\x5f\xc7\x79\x82\x66\xa8\x6e\x75" + "\x7d\x94\xd1\x86\x75\x0f\xa5\x4f" + "\x3c\x7a\x33\xce\xd1\x6e\x9d\x7b" + "\x1f\x91\x37\xb8\x37\x80\xfb\xe0" + "\x52\x26\xd0\x9a\xd4\x48\x02\x41" + "\x05\xe3\x5a\x94\xf1\x65\x61\x19" + "\xb8\x88\x4e\x2b\xea\xba\x8b\x58" + "\x8b\x42\x01\x00\xa8\xfe\x00\x5c" + "\xfe\x1c\xee\x31\x15\x69\xfa\xb3" + "\x9b\x5f\x22\x8e\x0d\x2c\xe3\xa5" + "\x21\xb9\x99\x8a\x8e\x94\x5a\xef" + "\x13\x3e\x99\x96\x79\x6e\xd5\x42" + "\x36\x03\xa9\xe2\xca\x65\x4e\x8a" + "\x8a\x30\xd2\x7d\x74\xe7\xf0\xaa" + "\x23\x26\xdd\xcb\x82\x39\xfc\x9d" + "\x51\x76\x21\x80\xa2\xbe\x93\x03" + "\x47\xb0\xc1\xb6\xdc\x63\xfd\x9f" + "\xca\x9d\xa5\xca\x27\x85\xe2\xd8" + "\x15\x5b\x7e\x14\x7a\xc4\x89\xcc" + "\x74\x14\x4b\x46\xd2\xce\xac\x39" + "\x6b\x6a\x5a\xa4\x0e\xe3\x7b\x15" + "\x94\x4b\x0f\x74\xcb\x0c\x7f\xa9" + "\xbe\x09\x39\xa3\xdd\x56\x5c\xc7" + "\x99\x56\x65\x39\xf4\x0b\x7d\x87" + "\xec\xaa\xe3\x4d\x22\x65\x39\x4e", + .psize = 1024, + .digest = "\x64\x3a\xbc\xc3\x3f\x74\x40\x51" + "\x6e\x56\x01\x1a\x51\xec\x36\xde", + .np = 8, + .tap = { 64, 203, 267, 28, 263, 62, 54, 83 }, + }, { + .key = "\x1b\x82\x2e\x1b\x17\x23\xb9\x6d" + "\xdc\x9c\xda\x99\x07\xe3\x5f\xd8" + "\xd2\xf8\x43\x80\x8d\x86\x7d\x80" + "\x1a\xd0\xcc\x13\xb9\x11\x05\x3f" + "\x7e\xcf\x7e\x80\x0e\xd8\x25\x48" + "\x8b\xaa\x63\x83\x92\xd0\x72\xf5" + "\x4f\x67\x7e\x50\x18\x25\xa4\xd1" + "\xe0\x7e\x1e\xba\xd8\xa7\x6e\xdb" + "\x1a\xcc\x0d\xfe\x9f\x6d\x22\x35" + "\xe1\xe6\xe0\xa8\x7b\x9c\xb1\x66" + "\xa3\xf8\xff\x4d\x90\x84\x28\xbc" + "\xdc\x19\xc7\x91\x49\xfc\xf6\x33" + "\xc9\x6e\x65\x7f\x28\x6f\x68\x2e" + "\xdf\x1a\x75\xe9\xc2\x0c\x96\xb9" + "\x31\x22\xc4\x07\xc6\x0a\x2f\xfd" + "\x36\x06\x5f\x5c\xc5\xb1\x3a\xf4" + "\x5e\x48\xa4\x45\x2b\x88\xa7\xee" + "\xa9\x8b\x52\xcc\x99\xd9\x2f\xb8" + "\xa4\x58\x0a\x13\xeb\x71\x5a\xfa" + "\xe5\x5e\xbe\xf2\x64\xad\x75\xbc" + "\x0b\x5b\x34\x13\x3b\x23\x13\x9a" + "\x69\x30\x1e\x9a\xb8\x03\xb8\x8b" + "\x3e\x46\x18\x6d\x38\xd9\xb3\xd8" + "\xbf\xf1\xd0\x28\xe6\x51\x57\x80" + "\x5e\x99\xfb\xd0\xce\x1e\x83\xf7" + "\xe9\x07\x5a\x63\xa9\xef\xce\xa5" + "\xfb\x3f\x37\x17\xfc\x0b\x37\x0e" + "\xbb\x4b\x21\x62\xb7\x83\x0e\xa9" + "\x9e\xb0\xc4\xad\x47\xbe\x35\xe7" + "\x51\xb2\xf2\xac\x2b\x65\x7b\x48" + "\xe3\x3f\x5f\xb6\x09\x04\x0c\x58" + "\xce\x99\xa9\x15\x2f\x4e\xc1\xf2" + "\x24\x48\xc0\xd8\x6c\xd3\x76\x17" + "\x83\x5d\xe6\xe3\xfd\x01\x8e\xf7" + "\x42\xa5\x04\x29\x30\xdf\xf9\x00" + "\x4a\xdc\x71\x22\x1a\x33\x15\xb6" + "\xd7\x72\xfb\x9a\xb8\xeb\x2b\x38" + "\xea\xa8\x61\xa8\x90\x11\x9d\x73" + "\x2e\x6c\xce\x81\x54\x5a\x9f\xcd" + "\xcf\xd5\xbd\x26\x5d\x66\xdb\xfb" + "\xdc\x1e\x7c\x10\xfe\x58\x82\x10" + "\x16\x24\x01\xce\x67\x55\x51\xd1" + "\xdd\x6b\x44\xa3\x20\x8e\xa9\xa6" + "\x06\xa8\x29\x77\x6e\x00\x38\x5b" + "\xde\x4d\x58\xd8\x1f\x34\xdf\xf9" + "\x2c\xac\x3e\xad\xfb\x92\x0d\x72" + "\x39\xa4\xac\x44\x10\xc0\x43\xc4" + "\xa4\x77\x3b\xfc\xc4\x0d\x37\xd3" + "\x05\x84\xda\x53\x71\xf8\x80\xd3" + "\x34\x44\xdb\x09\xb4\x2b\x8e\xe3" + "\x00\x75\x50\x9e\x43\x22\x00\x0b" + "\x7c\x70\xab\xd4\x41\xf1\x93\xcd" + "\x25\x2d\x84\x74\xb5\xf2\x92\xcd" + "\x0a\x28\xea\x9a\x49\x02\x96\xcb" + "\x85\x9e\x2f\x33\x03\x86\x1d\xdc" + "\x1d\x31\xd5\xfc\x9d\xaa\xc5\xe9" + "\x9a\xc4\x57\xf5\x35\xed\xf4\x4b" + "\x3d\x34\xc2\x29\x13\x86\x36\x42" + "\x5d\xbf\x90\x86\x13\x77\xe5\xc3" + "\x62\xb4\xfe\x0b\x70\x39\x35\x65" + "\x02\xea\xf6\xce\x57\x0c\xbb\x74" + "\x29\xe3\xfd\x60\x90\xfd\x10\x38" + "\xd5\x4e\x86\xbd\x37\x70\xf0\x97" + "\xa6\xab\x3b\x83\x64\x52\xca\x66" + "\x2f\xf9\xa4\xca\x3a\x55\x6b\xb0" + "\xe8\x3a\x34\xdb\x9e\x48\x50\x2f" + "\x3b\xef\xfd\x08\x2d\x5f\xc1\x37" + "\x5d\xbe\x73\xe4\xd8\xe9\xac\xca" + "\x8a\xaa\x48\x7c\x5c\xf4\xa6\x96" + "\x5f\xfa\x70\xa6\xb7\x8b\x50\xcb" + "\xa6\xf5\xa9\xbd\x7b\x75\x4c\x22" + "\x0b\x19\x40\x2e\xc9\x39\x39\x32" + "\x83\x03\xa8\xa4\x98\xe6\x8e\x16" + "\xb9\xde\x08\xc5\xfc\xbf\xad\x39" + "\xa8\xc7\x93\x6c\x6f\x23\xaf\xc1" + "\xab\xe1\xdf\xbb\x39\xae\x93\x29" + "\x0e\x7d\x80\x8d\x3e\x65\xf3\xfd" + "\x96\x06\x65\x90\xa1\x28\x64\x4b" + "\x69\xf9\xa8\x84\x27\x50\xfc\x87" + "\xf7\xbf\x55\x8e\x56\x13\x58\x7b" + "\x85\xb4\x6a\x72\x0f\x40\xf1\x4f" + "\x83\x81\x1f\x76\xde\x15\x64\x7a" + "\x7a\x80\xe4\xc7\x5e\x63\x01\x91" + "\xd7\x6b\xea\x0b\x9b\xa2\x99\x3b" + "\x6c\x88\xd8\xfd\x59\x3c\x8d\x22" + "\x86\x56\xbe\xab\xa1\x37\x08\x01" + "\x50\x85\x69\x29\xee\x9f\xdf\x21" + "\x3e\x20\x20\xf5\xb0\xbb\x6b\xd0" + "\x9c\x41\x38\xec\x54\x6f\x2d\xbd" + "\x0f\xe1\xbd\xf1\x2b\x6e\x60\x56" + "\x29\xe5\x7a\x70\x1c\xe2\xfc\x97" + "\x82\x68\x67\xd9\x3d\x1f\xfb\xd8" + "\x07\x9f\xbf\x96\x74\xba\x6a\x0e" + "\x10\x48\x20\xd8\x13\x1e\xb5\x44" + "\xf2\xcc\xb1\x8b\xfb\xbb\xec\xd7" + "\x37\x70\x1f\x7c\x55\xd2\x4b\xb9" + "\xfd\x70\x5e\xa3\x91\x73\x63\x52" + "\x13\x47\x5a\x06\xfb\x01\x67\xa5" + "\xc0\xd0\x49\x19\x56\x66\x9a\x77" + "\x64\xaf\x8c\x25\x91\x52\x87\x0e" + "\x18\xf3\x5f\x97\xfd\x71\x13\xf8" + "\x05\xa5\x39\xcc\x65\xd3\xcc\x63" + "\x5b\xdb\x5f\x7e\x5f\x6e\xad\xc4" + "\xf4\xa0\xc5\xc2\x2b\x4d\x97\x38" + "\x4f\xbc\xfa\x33\x17\xb4\x47\xb9" + "\x43\x24\x15\x8d\xd2\xed\x80\x68" + "\x84\xdb\x04\x80\xca\x5e\x6a\x35" + "\x2c\x2c\xe7\xc5\x03\x5f\x54\xb0" + "\x5e\x4f\x1d\x40\x54\x3d\x78\x9a" + "\xac\xda\x80\x27\x4d\x15\x4c\x1a" + "\x6e\x80\xc9\xc4\x3b\x84\x0e\xd9" + "\x2e\x93\x01\x8c\xc3\xc8\x91\x4b" + "\xb3\xaa\x07\x04\x68\x5b\x93\xa5" + "\xe7\xc4\x9d\xe7\x07\xee\xf5\x3b" + "\x40\x89\xcc\x60\x34\x9d\xb4\x06" + "\x1b\xef\x92\xe6\xc1\x2a\x7d\x0f" + "\x81\xaa\x56\xe3\xd7\xed\xa7\xd4" + "\xa7\x3a\x49\xc4\xad\x81\x5c\x83" + "\x55\x8e\x91\x54\xb7\x7d\x65\xa5" + "\x06\x16\xd5\x9a\x16\xc1\xb0\xa2" + "\x06\xd8\x98\x47\x73\x7e\x73\xa0" + "\xb8\x23\xb1\x52\xbf\x68\x74\x5d" + "\x0b\xcb\xfa\x8c\x46\xe3\x24\xe6" + "\xab\xd4\x69\x8d\x8c\xf2\x8a\x59" + "\xbe\x48\x46\x50\x8c\x9a\xe8\xe3" + "\x31\x55\x0a\x06\xed\x4f\xf8\xb7" + "\x4f\xe3\x85\x17\x30\xbd\xd5\x20" + "\xe7\x5b\xb2\x32\xcf\x6b\x16\x44" + "\xd2\xf5\x7e\xd7\xd1\x2f\xee\x64" + "\x3e\x9d\x10\xef\x27\x35\x43\x64" + "\x67\xfb\x7a\x7b\xe0\x62\x31\x9a" + "\x4d\xdf\xa5\xab\xc0\x20\xbb\x01" + "\xe9\x7b\x54\xf1\xde\xb2\x79\x50" + "\x6c\x4b\x91\xdb\x7f\xbb\x50\xc1" + "\x55\x44\x38\x9a\xe0\x9f\xe8\x29" + "\x6f\x15\xf8\x4e\xa6\xec\xa0\x60", + .ksize = 1088, + .plaintext = "\x15\x68\x9e\x2f\xad\x15\x52\xdf" + "\xf0\x42\x62\x24\x2a\x2d\xea\xbf" + "\xc7\xf3\xb4\x1a\xf5\xed\xb2\x08" + "\x15\x60\x1c\x00\x77\xbf\x0b\x0e" + "\xb7\x2c\xcf\x32\x3a\xc7\x01\x77" + "\xef\xa6\x75\xd0\x29\xc7\x68\x20" + "\xb2\x92\x25\xbf\x12\x34\xe9\xa4" + "\xfd\x32\x7b\x3f\x7c\xbd\xa5\x02" + "\x38\x41\xde\xc9\xc1\x09\xd9\xfc" + "\x6e\x78\x22\x83\x18\xf7\x50\x8d" + "\x8f\x9c\x2d\x02\xa5\x30\xac\xff" + "\xea\x63\x2e\x80\x37\x83\xb0\x58" + "\xda\x2f\xef\x21\x55\xba\x7b\xb1" + "\xb6\xed\xf5\xd2\x4d\xaa\x8c\xa9" + "\xdd\xdb\x0f\xb4\xce\xc1\x9a\xb1" + "\xc1\xdc\xbd\xab\x86\xc2\xdf\x0b" + "\xe1\x2c\xf9\xbe\xf6\xd8\xda\x62" + "\x72\xdd\x98\x09\x52\xc0\xc4\xb6" + "\x7b\x17\x5c\xf5\xd8\x4b\x88\xd6" + "\x6b\xbf\x84\x4a\x3f\xf5\x4d\xd2" + "\x94\xe2\x9c\xff\xc7\x3c\xd9\xc8" + "\x37\x38\xbc\x8c\xf3\xe7\xb7\xd0" + "\x1d\x78\xc4\x39\x07\xc8\x5e\x79" + "\xb6\x5a\x90\x5b\x6e\x97\xc9\xd4" + "\x82\x9c\xf3\x83\x7a\xe7\x97\xfc" + "\x1d\xbb\xef\xdb\xce\xe0\x82\xad" + "\xca\x07\x6c\x54\x62\x6f\x81\xe6" + "\x7a\x5a\x96\x6e\x80\x3a\xa2\x37" + "\x6f\xc6\xa4\x29\xc3\x9e\x19\x94" + "\x9f\xb0\x3e\x38\xfb\x3c\x2b\x7d" + "\xaa\xb8\x74\xda\x54\x23\x51\x12" + "\x4b\x96\x36\x8f\x91\x4f\x19\x37" + "\x83\xc9\xdd\xc7\x1a\x32\x2d\xab" + "\xc7\x89\xe2\x07\x47\x6c\xe8\xa6" + "\x70\x6b\x8e\x0c\xda\x5c\x6a\x59" + "\x27\x33\x0e\xe1\xe1\x20\xe8\xc8" + "\xae\xdc\xd0\xe3\x6d\xa8\xa6\x06" + "\x41\xb4\xd4\xd4\xcf\x91\x3e\x06" + "\xb0\x9a\xf7\xf1\xaa\xa6\x23\x92" + "\x10\x86\xf0\x94\xd1\x7c\x2e\x07" + "\x30\xfb\xc5\xd8\xf3\x12\xa9\xe8" + "\x22\x1c\x97\x1a\xad\x96\xb0\xa1" + "\x72\x6a\x6b\xb4\xfd\xf7\xe8\xfa" + "\xe2\x74\xd8\x65\x8d\x35\x17\x4b" + "\x00\x23\x5c\x8c\x70\xad\x71\xa2" + "\xca\xc5\x6c\x59\xbf\xb4\xc0\x6d" + "\x86\x98\x3e\x19\x5a\x90\x92\xb1" + "\x66\x57\x6a\x91\x68\x7c\xbc\xf3" + "\xf1\xdb\x94\xf8\x48\xf1\x36\xd8" + "\x78\xac\x1c\xa9\xcc\xd6\x27\xba" + "\x91\x54\x22\xf5\xe6\x05\x3f\xcc" + "\xc2\x8f\x2c\x3b\x2b\xc3\x2b\x2b" + "\x3b\xb8\xb6\x29\xb7\x2f\x94\xb6" + "\x7b\xfc\x94\x3e\xd0\x7a\x41\x59" + "\x7b\x1f\x9a\x09\xa6\xed\x4a\x82" + "\x9d\x34\x1c\xbd\x4e\x1c\x3a\x66" + "\x80\x74\x0e\x9a\x4f\x55\x54\x47" + "\x16\xba\x2a\x0a\x03\x35\x99\xa3" + "\x5c\x63\x8d\xa2\x72\x8b\x17\x15" + "\x68\x39\x73\xeb\xec\xf2\xe8\xf5" + "\x95\x32\x27\xd6\xc4\xfe\xb0\x51" + "\xd5\x0c\x50\xc5\xcd\x6d\x16\xb3" + "\xa3\x1e\x95\x69\xad\x78\x95\x06" + "\xb9\x46\xf2\x6d\x24\x5a\x99\x76" + "\x73\x6a\x91\xa6\xac\x12\xe1\x28" + "\x79\xbc\x08\x4e\x97\x00\x98\x63" + "\x07\x1c\x4e\xd1\x68\xf3\xb3\x81" + "\xa8\xa6\x5f\xf1\x01\xc9\xc1\xaf" + "\x3a\x96\xf9\x9d\xb5\x5a\x5f\x8f" + "\x7e\xc1\x7e\x77\x0a\x40\xc8\x8e" + "\xfc\x0e\xed\xe1\x0d\xb0\xe5\x5e" + "\x5e\x6f\xf5\x7f\xab\x33\x7d\xcd" + "\xf0\x09\x4b\xb2\x11\x37\xdc\x65" + "\x97\x32\x62\x71\x3a\x29\x54\xb9" + "\xc7\xa4\xbf\x75\x0f\xf9\x40\xa9" + "\x8d\xd7\x8b\xa7\xe0\x9a\xbe\x15" + "\xc6\xda\xd8\x00\x14\x69\x1a\xaf" + "\x5f\x79\xc3\xf5\xbb\x6c\x2a\x9d" + "\xdd\x3c\x5f\x97\x21\xe1\x3a\x03" + "\x84\x6a\xe9\x76\x11\x1f\xd3\xd5" + "\xf0\x54\x20\x4d\xc2\x91\xc3\xa4" + "\x36\x25\xbe\x1b\x2a\x06\xb7\xf3" + "\xd1\xd0\x55\x29\x81\x4c\x83\xa3" + "\xa6\x84\x1e\x5c\xd1\xd0\x6c\x90" + "\xa4\x11\xf0\xd7\x63\x6a\x48\x05" + "\xbc\x48\x18\x53\xcd\xb0\x8d\xdb" + "\xdc\xfe\x55\x11\x5c\x51\xb3\xab" + "\xab\x63\x3e\x31\x5a\x8b\x93\x63" + "\x34\xa9\xba\x2b\x69\x1a\xc0\xe3" + "\xcb\x41\xbc\xd7\xf5\x7f\x82\x3e" + "\x01\xa3\x3c\x72\xf4\xfe\xdf\xbe" + "\xb1\x67\x17\x2b\x37\x60\x0d\xca" + "\x6f\xc3\x94\x2c\xd2\x92\x6d\x9d" + "\x75\x18\x77\xaa\x29\x38\x96\xed" + "\x0e\x20\x70\x92\xd5\xd0\xb4\x00" + "\xc0\x31\xf2\xc9\x43\x0e\x75\x1d" + "\x4b\x64\xf2\x1f\xf2\x29\x6c\x7b" + "\x7f\xec\x59\x7d\x8c\x0d\xd4\xd3" + "\xac\x53\x4c\xa3\xde\x42\x92\x95" + "\x6d\xa3\x4f\xd0\xe6\x3d\xe7\xec" + "\x7a\x4d\x68\xf1\xfe\x67\x66\x09" + "\x83\x22\xb1\x98\x43\x8c\xab\xb8" + "\x45\xe6\x6d\xdf\x5e\x50\x71\xce" + "\xf5\x4e\x40\x93\x2b\xfa\x86\x0e" + "\xe8\x30\xbd\x82\xcc\x1c\x9c\x5f" + "\xad\xfd\x08\x31\xbe\x52\xe7\xe6" + "\xf2\x06\x01\x62\x25\x15\x99\x74" + "\x33\x51\x52\x57\x3f\x57\x87\x61" + "\xb9\x7f\x29\x3d\xcd\x92\x5e\xa6" + "\x5c\x3b\xf1\xed\x5f\xeb\x82\xed" + "\x56\x7b\x61\xe7\xfd\x02\x47\x0e" + "\x2a\x15\xa4\xce\x43\x86\x9b\xe1" + "\x2b\x4c\x2a\xd9\x42\x97\xf7\x9a" + "\xe5\x47\x46\x48\xd3\x55\x6f\x4d" + "\xd9\xeb\x4b\xdd\x7b\x21\x2f\xb3" + "\xa8\x36\x28\xdf\xca\xf1\xf6\xd9" + "\x10\xf6\x1c\xfd\x2e\x0c\x27\xe0" + "\x01\xb3\xff\x6d\x47\x08\x4d\xd4" + "\x00\x25\xee\x55\x4a\xe9\xe8\x5b" + "\xd8\xf7\x56\x12\xd4\x50\xb2\xe5" + "\x51\x6f\x34\x63\x69\xd2\x4e\x96" + "\x4e\xbc\x79\xbf\x18\xae\xc6\x13" + "\x80\x92\x77\xb0\xb4\x0f\x29\x94" + "\x6f\x4c\xbb\x53\x11\x36\xc3\x9f" + "\x42\x8e\x96\x8a\x91\xc8\xe9\xfc" + "\xfe\xbf\x7c\x2d\x6f\xf9\xb8\x44" + "\x89\x1b\x09\x53\x0a\x2a\x92\xc3" + "\x54\x7a\x3a\xf9\xe2\xe4\x75\x87" + "\xa0\x5e\x4b\x03\x7a\x0d\x8a\xf4" + "\x55\x59\x94\x2b\x63\x96\x0e\xf5", + .psize = 1040, + .digest = "\xb5\xb9\x08\xb3\x24\x3e\x03\xf0" + "\xd6\x0b\x57\xbc\x0a\x6d\x89\x59", + }, { + .key = "\xf6\x34\x42\x71\x35\x52\x8b\x58" + "\x02\x3a\x8e\x4a\x8d\x41\x13\xe9" + "\x7f\xba\xb9\x55\x9d\x73\x4d\xf8" + "\x3f\x5d\x73\x15\xff\xd3\x9e\x7f" + "\x20\x2a\x6a\xa8\xd1\xf0\x8f\x12" + "\x6b\x02\xd8\x6c\xde\xba\x80\x22" + "\x19\x37\xc8\xd0\x4e\x89\x17\x7c" + "\x7c\xdd\x88\xfd\x41\xc0\x04\xb7" + "\x1d\xac\x19\xe3\x20\xc7\x16\xcf" + "\x58\xee\x1d\x7a\x61\x69\xa9\x12" + "\x4b\xef\x4f\xb6\x38\xdd\x78\xf8" + "\x28\xee\x70\x08\xc7\x7c\xcc\xc8" + "\x1e\x41\xf5\x80\x86\x70\xd0\xf0" + "\xa3\x87\x6b\x0a\x00\xd2\x41\x28" + "\x74\x26\xf1\x24\xf3\xd0\x28\x77" + "\xd7\xcd\xf6\x2d\x61\xf4\xa2\x13" + "\x77\xb4\x6f\xa0\xf4\xfb\xd6\xb5" + "\x38\x9d\x5a\x0c\x51\xaf\xad\x63" + "\x27\x67\x8c\x01\xea\x42\x1a\x66" + "\xda\x16\x7c\x3c\x30\x0c\x66\x53" + "\x1c\x88\xa4\x5c\xb2\xe3\x78\x0a" + "\x13\x05\x6d\xe2\xaf\xb3\xe4\x75" + "\x00\x99\x58\xee\x76\x09\x64\xaa" + "\xbb\x2e\xb1\x81\xec\xd8\x0e\xd3" + "\x0c\x33\x5d\xb7\x98\xef\x36\xb6" + "\xd2\x65\x69\x41\x70\x12\xdc\x25" + "\x41\x03\x99\x81\x41\x19\x62\x13" + "\xd1\x0a\x29\xc5\x8c\xe0\x4c\xf3" + "\xd6\xef\x4c\xf4\x1d\x83\x2e\x6d" + "\x8e\x14\x87\xed\x80\xe0\xaa\xd3" + "\x08\x04\x73\x1a\x84\x40\xf5\x64" + "\xbd\x61\x32\x65\x40\x42\xfb\xb0" + "\x40\xf6\x40\x8d\xc7\x7f\x14\xd0" + "\x83\x99\xaa\x36\x7e\x60\xc6\xbf" + "\x13\x8a\xf9\x21\xe4\x7e\x68\x87" + "\xf3\x33\x86\xb4\xe0\x23\x7e\x0a" + "\x21\xb1\xf5\xad\x67\x3c\x9c\x9d" + "\x09\xab\xaf\x5f\xba\xe0\xd0\x82" + "\x48\x22\x70\xb5\x6d\x53\xd6\x0e" + "\xde\x64\x92\x41\xb0\xd3\xfb\xda" + "\x21\xfe\xab\xea\x20\xc4\x03\x58" + "\x18\x2e\x7d\x2f\x03\xa9\x47\x66" + "\xdf\x7b\xa4\x6b\x34\x6b\x55\x9c" + "\x4f\xd7\x9c\x47\xfb\xa9\x42\xec" + "\x5a\x12\xfd\xfe\x76\xa0\x92\x9d" + "\xfe\x1e\x16\xdd\x24\x2a\xe4\x27" + "\xd5\xa9\xf2\x05\x4f\x83\xa2\xaf" + "\xfe\xee\x83\x7a\xad\xde\xdf\x9a" + "\x80\xd5\x81\x14\x93\x16\x7e\x46" + "\x47\xc2\x14\xef\x49\x6e\xb9\xdb" + "\x40\xe8\x06\x6f\x9c\x2a\xfd\x62" + "\x06\x46\xfd\x15\x1d\x36\x61\x6f" + "\x77\x77\x5e\x64\xce\x78\x1b\x85" + "\xbf\x50\x9a\xfd\x67\xa6\x1a\x65" + "\xad\x5b\x33\x30\xf1\x71\xaa\xd9" + "\x23\x0d\x92\x24\x5f\xae\x57\xb0" + "\x24\x37\x0a\x94\x12\xfb\xb5\xb1" + "\xd3\xb8\x1d\x12\x29\xb0\x80\x24" + "\x2d\x47\x9f\x96\x1f\x95\xf1\xb1" + "\xda\x35\xf6\x29\xe0\xe1\x23\x96" + "\xc7\xe8\x22\x9b\x7c\xac\xf9\x41" + "\x39\x01\xe5\x73\x15\x5e\x99\xec" + "\xb4\xc1\xf4\xe7\xa7\x97\x6a\xd5" + "\x90\x9a\xa0\x1d\xf3\x5a\x8b\x5f" + "\xdf\x01\x52\xa4\x93\x31\x97\xb0" + "\x93\x24\xb5\xbc\xb2\x14\x24\x98" + "\x4a\x8f\x19\x85\xc3\x2d\x0f\x74" + "\x9d\x16\x13\x80\x5e\x59\x62\x62" + "\x25\xe0\xd1\x2f\x64\xef\xba\xac" + "\xcd\x09\x07\x15\x8a\xcf\x73\xb5" + "\x8b\xc9\xd8\x24\xb0\x53\xd5\x6f" + "\xe1\x2b\x77\xb1\xc5\xe4\xa7\x0e" + "\x18\x45\xab\x36\x03\x59\xa8\xbd" + "\x43\xf0\xd8\x2c\x1a\x69\x96\xbb" + "\x13\xdf\x6c\x33\x77\xdf\x25\x34" + "\x5b\xa5\x5b\x8c\xf9\x51\x05\xd4" + "\x8b\x8b\x44\x87\x49\xfc\xa0\x8f" + "\x45\x15\x5b\x40\x42\xc4\x09\x92" + "\x98\x0c\x4d\xf4\x26\x37\x1b\x13" + "\x76\x01\x93\x8d\x4f\xe6\xed\x18" + "\xd0\x79\x7b\x3f\x44\x50\xcb\xee" + "\xf7\x4a\xc9\x9e\xe0\x96\x74\xa7" + "\xe6\x93\xb2\x53\xca\x55\xa8\xdc" + "\x1e\x68\x07\x87\xb7\x2e\xc1\x08" + "\xb2\xa4\x5b\xaf\xc6\xdb\x5c\x66" + "\x41\x1c\x51\xd9\xb0\x07\x00\x0d" + "\xf0\x4c\xdc\x93\xde\xa9\x1e\x8e" + "\xd3\x22\x62\xd8\x8b\x88\x2c\xea" + "\x5e\xf1\x6e\x14\x40\xc7\xbe\xaa" + "\x42\x28\xd0\x26\x30\x78\x01\x9b" + "\x83\x07\xbc\x94\xc7\x57\xa2\x9f" + "\x03\x07\xff\x16\xff\x3c\x6e\x48" + "\x0a\xd0\xdd\x4c\xf6\x64\x9a\xf1" + "\xcd\x30\x12\x82\x2c\x38\xd3\x26" + "\x83\xdb\xab\x3e\xc6\xf8\xe6\xfa" + "\x77\x0a\x78\x82\x75\xf8\x63\x51" + "\x59\xd0\x8d\x24\x9f\x25\xe6\xa3" + "\x4c\xbc\x34\xfc\xe3\x10\xc7\x62" + "\xd4\x23\xc8\x3d\xa7\xc6\xa6\x0a" + "\x4f\x7e\x29\x9d\x6d\xbe\xb5\xf1" + "\xdf\xa4\x53\xfa\xc0\x23\x0f\x37" + "\x84\x68\xd0\xb5\xc8\xc6\xae\xf8" + "\xb7\x8d\xb3\x16\xfe\x8f\x87\xad" + "\xd0\xc1\x08\xee\x12\x1c\x9b\x1d" + "\x90\xf8\xd1\x63\xa4\x92\x3c\xf0" + "\xc7\x34\xd8\xf1\x14\xed\xa3\xbc" + "\x17\x7e\xd4\x62\x42\x54\x57\x2c" + "\x3e\x7a\x35\x35\x17\x0f\x0b\x7f" + "\x81\xa1\x3f\xd0\xcd\xc8\x3b\x96" + "\xe9\xe0\x4a\x04\xe1\xb6\x3c\xa1" + "\xd6\xca\xc4\xbd\xb6\xb5\x95\x34" + "\x12\x9d\xc5\x96\xf2\xdf\xba\x54" + "\x76\xd1\xb2\x6b\x3b\x39\xe0\xb9" + "\x18\x62\xfb\xf7\xfc\x12\xf1\x5f" + "\x7e\xc7\xe3\x59\x4c\xa6\xc2\x3d" + "\x40\x15\xf9\xa3\x95\x64\x4c\x74" + "\x8b\x73\x77\x33\x07\xa7\x04\x1d" + "\x33\x5a\x7e\x8f\xbd\x86\x01\x4f" + "\x3e\xb9\x27\x6f\xe2\x41\xf7\x09" + "\x67\xfd\x29\x28\xc5\xe4\xf6\x18" + "\x4c\x1b\x49\xb2\x9c\x5b\xf6\x81" + "\x4f\xbb\x5c\xcc\x0b\xdf\x84\x23" + "\x58\xd6\x28\x34\x93\x3a\x25\x97" + "\xdf\xb2\xc3\x9e\x97\x38\x0b\x7d" + "\x10\xb3\x54\x35\x23\x8c\x64\xee" + "\xf0\xd8\x66\xff\x8b\x22\xd2\x5b" + "\x05\x16\x3c\x89\xf7\xb1\x75\xaf" + "\xc0\xae\x6a\x4f\x3f\xaf\x9a\xf4" + "\xf4\x9a\x24\xd9\x80\x82\xc0\x12" + "\xde\x96\xd1\xbe\x15\x0b\x8d\x6a" + "\xd7\x12\xe4\x85\x9f\x83\xc9\xc3" + "\xff\x0b\xb5\xaf\x3b\xd8\x6d\x67" + "\x81\x45\xe6\xac\xec\xc1\x7b\x16" + "\x18\x0a\xce\x4b\xc0\x2e\x76\xbc" + "\x1b\xfa\xb4\x34\xb8\xfc\x3e\xc8" + "\x5d\x90\x71\x6d\x7a\x79\xef\x06", + .ksize = 1088, + .plaintext = "\xaa\x5d\x54\xcb\xea\x1e\x46\x0f" + "\x45\x87\x70\x51\x8a\x66\x7a\x33" + "\xb4\x18\xff\xa9\x82\xf9\x45\x4b" + "\x93\xae\x2e\x7f\xab\x98\xfe\xbf" + "\x01\xee\xe5\xa0\x37\x8f\x57\xa6" + "\xb0\x76\x0d\xa4\xd6\x28\x2b\x5d" + "\xe1\x03\xd6\x1c\x6f\x34\x0d\xe7" + "\x61\x2d\x2e\xe5\xae\x5d\x47\xc7" + "\x80\x4b\x18\x8f\xa8\x99\xbc\x28" + "\xed\x1d\x9d\x86\x7d\xd7\x41\xd1" + "\xe0\x2b\xe1\x8c\x93\x2a\xa7\x80" + "\xe1\x07\xa0\xa9\x9f\x8c\x8d\x1a" + "\x55\xfc\x6b\x24\x7a\xbd\x3e\x51" + "\x68\x4b\x26\x59\xc8\xa7\x16\xd9" + "\xb9\x61\x13\xde\x8b\x63\x1c\xf6" + "\x60\x01\xfb\x08\xb3\x5b\x0a\xbf" + "\x34\x73\xda\x87\x87\x3d\x6f\x97" + "\x4a\x0c\xa3\x58\x20\xa2\xc0\x81" + "\x5b\x8c\xef\xa9\xc2\x01\x1e\x64" + "\x83\x8c\xbc\x03\xb6\xd0\x29\x9f" + "\x54\xe2\xce\x8b\xc2\x07\x85\x78" + "\x25\x38\x96\x4c\xb4\xbe\x17\x4a" + "\x65\xa6\xfa\x52\x9d\x66\x9d\x65" + "\x4a\xd1\x01\x01\xf0\xcb\x13\xcc" + "\xa5\x82\xf3\xf2\x66\xcd\x3f\x9d" + "\xd1\xaa\xe4\x67\xea\xf2\xad\x88" + "\x56\x76\xa7\x9b\x59\x3c\xb1\x5d" + "\x78\xfd\x69\x79\x74\x78\x43\x26" + "\x7b\xde\x3f\xf1\xf5\x4e\x14\xd9" + "\x15\xf5\x75\xb5\x2e\x19\xf3\x0c" + "\x48\x72\xd6\x71\x6d\x03\x6e\xaa" + "\xa7\x08\xf9\xaa\x70\xa3\x0f\x4d" + "\x12\x8a\xdd\xe3\x39\x73\x7e\xa7" + "\xea\x1f\x6d\x06\x26\x2a\xf2\xc5" + "\x52\xb4\xbf\xfd\x52\x0c\x06\x60" + "\x90\xd1\xb2\x7b\x56\xae\xac\x58" + "\x5a\x6b\x50\x2a\xf5\xe0\x30\x3c" + "\x2a\x98\x0f\x1b\x5b\x0a\x84\x6c" + "\x31\xae\x92\xe2\xd4\xbb\x7f\x59" + "\x26\x10\xb9\x89\x37\x68\x26\xbf" + "\x41\xc8\x49\xc4\x70\x35\x7d\xff" + "\x2d\x7f\xf6\x8a\x93\x68\x8c\x78" + "\x0d\x53\xce\x7d\xff\x7d\xfb\xae" + "\x13\x1b\x75\xc4\x78\xd7\x71\xd8" + "\xea\xd3\xf4\x9d\x95\x64\x8e\xb4" + "\xde\xb8\xe4\xa6\x68\xc8\xae\x73" + "\x58\xaf\xa8\xb0\x5a\x20\xde\x87" + "\x43\xb9\x0f\xe3\xad\x41\x4b\xd5" + "\xb7\xad\x16\x00\xa6\xff\xf6\x74" + "\xbf\x8c\x9f\xb3\x58\x1b\xb6\x55" + "\xa9\x90\x56\x28\xf0\xb5\x13\x4e" + "\x9e\xf7\x25\x86\xe0\x07\x7b\x98" + "\xd8\x60\x5d\x38\x95\x3c\xe4\x22" + "\x16\x2f\xb2\xa2\xaf\xe8\x90\x17" + "\xec\x11\x83\x1a\xf4\xa9\x26\xda" + "\x39\x72\xf5\x94\x61\x05\x51\xec" + "\xa8\x30\x8b\x2c\x13\xd0\x72\xac" + "\xb9\xd2\xa0\x4c\x4b\x78\xe8\x6e" + "\x04\x85\xe9\x04\x49\x82\x91\xff" + "\x89\xe5\xab\x4c\xaa\x37\x03\x12" + "\xca\x8b\x74\x10\xfd\x9e\xd9\x7b" + "\xcb\xdb\x82\x6e\xce\x2e\x33\x39" + "\xce\xd2\x84\x6e\x34\x71\x51\x6e" + "\x0d\xd6\x01\x87\xc7\xfa\x0a\xd3" + "\xad\x36\xf3\x4c\x9f\x96\x5e\x62" + "\x62\x54\xc3\x03\x78\xd6\xab\xdd" + "\x89\x73\x55\x25\x30\xf8\xa7\xe6" + "\x4f\x11\x0c\x7c\x0a\xa1\x2b\x7b" + "\x3d\x0d\xde\x81\xd4\x9d\x0b\xae" + "\xdf\x00\xf9\x4c\xb6\x90\x8e\x16" + "\xcb\x11\xc8\xd1\x2e\x73\x13\x75" + "\x75\x3e\xaa\xf5\xee\x02\xb3\x18" + "\xa6\x2d\xf5\x3b\x51\xd1\x1f\x47" + "\x6b\x2c\xdb\xc4\x10\xe0\xc8\xba" + "\x9d\xac\xb1\x9d\x75\xd5\x41\x0e" + "\x7e\xbe\x18\x5b\xa4\x1f\xf8\x22" + "\x4c\xc1\x68\xda\x6d\x51\x34\x6c" + "\x19\x59\xec\xb5\xb1\xec\xa7\x03" + "\xca\x54\x99\x63\x05\x6c\xb1\xac" + "\x9c\x31\xd6\xdb\xba\x7b\x14\x12" + "\x7a\xc3\x2f\xbf\x8d\xdc\x37\x46" + "\xdb\xd2\xbc\xd4\x2f\xab\x30\xd5" + "\xed\x34\x99\x8e\x83\x3e\xbe\x4c" + "\x86\x79\x58\xe0\x33\x8d\x9a\xb8" + "\xa9\xa6\x90\x46\xa2\x02\xb8\xdd" + "\xf5\xf9\x1a\x5c\x8c\x01\xaa\x6e" + "\xb4\x22\x12\xf5\x0c\x1b\x9b\x7a" + "\xc3\x80\xf3\x06\x00\x5f\x30\xd5" + "\x06\xdb\x7d\x82\xc2\xd4\x0b\x4c" + "\x5f\xe9\xc5\xf5\xdf\x97\x12\xbf" + "\x56\xaf\x9b\x69\xcd\xee\x30\xb4" + "\xa8\x71\xff\x3e\x7d\x73\x7a\xb4" + "\x0d\xa5\x46\x7a\xf3\xf4\x15\x87" + "\x5d\x93\x2b\x8c\x37\x64\xb5\xdd" + "\x48\xd1\xe5\x8c\xae\xd4\xf1\x76" + "\xda\xf4\xba\x9e\x25\x0e\xad\xa3" + "\x0d\x08\x7c\xa8\x82\x16\x8d\x90" + "\x56\x40\x16\x84\xe7\x22\x53\x3a" + "\x58\xbc\xb9\x8f\x33\xc8\xc2\x84" + "\x22\xe6\x0d\xe7\xb3\xdc\x5d\xdf" + "\xd7\x2a\x36\xe4\x16\x06\x07\xd2" + "\x97\x60\xb2\xf5\x5e\x14\xc9\xfd" + "\x8b\x05\xd1\xce\xee\x9a\x65\x99" + "\xb7\xae\x19\xb7\xc8\xbc\xd5\xa2" + "\x7b\x95\xe1\xcc\xba\x0d\xdc\x8a" + "\x1d\x59\x52\x50\xaa\x16\x02\x82" + "\xdf\x61\x33\x2e\x44\xce\x49\xc7" + "\xe5\xc6\x2e\x76\xcf\x80\x52\xf0" + "\x3d\x17\x34\x47\x3f\xd3\x80\x48" + "\xa2\xba\xd5\xc7\x7b\x02\x28\xdb" + "\xac\x44\xc7\x6e\x05\x5c\xc2\x79" + "\xb3\x7d\x6a\x47\x77\x66\xf1\x38" + "\xf0\xf5\x4f\x27\x1a\x31\xca\x6c" + "\x72\x95\x92\x8e\x3f\xb0\xec\x1d" + "\xc7\x2a\xff\x73\xee\xdf\x55\x80" + "\x93\xd2\xbd\x34\xd3\x9f\x00\x51" + "\xfb\x2e\x41\xba\x6c\x5a\x7c\x17" + "\x7f\xe6\x70\xac\x8d\x39\x3f\x77" + "\xe2\x23\xac\x8f\x72\x4e\xe4\x53" + "\xcc\xf1\x1b\xf1\x35\xfe\x52\xa4" + "\xd6\xb8\x40\x6b\xc1\xfd\xa0\xa1" + "\xf5\x46\x65\xc2\x50\xbb\x43\xe2" + "\xd1\x43\x28\x34\x74\xf5\x87\xa0" + "\xf2\x5e\x27\x3b\x59\x2b\x3e\x49" + "\xdf\x46\xee\xaf\x71\xd7\x32\x36" + "\xc7\x14\x0b\x58\x6e\x3e\x2d\x41" + "\xfa\x75\x66\x3a\x54\xe0\xb2\xb9" + "\xaf\xdd\x04\x80\x15\x19\x3f\x6f" + "\xce\x12\xb4\xd8\xe8\x89\x3c\x05" + "\x30\xeb\xf3\x3d\xcd\x27\xec\xdc" + "\x56\x70\x12\xcf\x78\x2b\x77\xbf" + "\x22\xf0\x1b\x17\x9c\xcc\xd6\x1b" + "\x2d\x3d\xa0\x3b\xd8\xc9\x70\xa4" + "\x7a\x3e\x07\xb9\x06\xc3\xfa\xb0" + "\x33\xee\xc1\xd8\xf6\xe0\xf0\xb2" + "\x61\x12\x69\xb0\x5f\x28\x99\xda" + "\xc3\x61\x48\xfa\x07\x16\x03\xc4" + "\xa8\xe1\x3c\xe8\x0e\x64\x15\x30" + "\xc1\x9d\x84\x2f\x73\x98\x0e\x3a" + "\xf2\x86\x21\xa4\x9e\x1d\xb5\x86" + "\x16\xdb\x2b\x9a\x06\x64\x8e\x79" + "\x8d\x76\x3e\xc3\xc2\x64\x44\xe3" + "\xda\xbc\x1a\x52\xd7\x61\x03\x65" + "\x54\x32\x77\x01\xed\x9d\x8a\x43" + "\x25\x24\xe3\xc1\xbe\xb8\x2f\xcb" + "\x89\x14\x64\xab\xf6\xa0\x6e\x02" + "\x57\xe4\x7d\xa9\x4e\x9a\x03\x36" + "\xad\xf1\xb1\xfc\x0b\xe6\x79\x51" + "\x9f\x81\x77\xc4\x14\x78\x9d\xbf" + "\xb6\xd6\xa3\x8c\xba\x0b\x26\xe7" + "\xc8\xb9\x5c\xcc\xe1\x5f\xd5\xc6" + "\xc4\xca\xc2\xa3\x45\xba\x94\x13" + "\xb2\x8f\xc3\x54\x01\x09\xe7\x8b" + "\xda\x2a\x0a\x11\x02\x43\xcb\x57" + "\xc9\xcc\xb5\x5c\xab\xc4\xec\x54" + "\x00\x06\x34\xe1\x6e\x03\x89\x7c" + "\xc6\xfb\x6a\xc7\x60\x43\xd6\xc5" + "\xb5\x68\x72\x89\x8f\x42\xc3\x74" + "\xbd\x25\xaa\x9f\x67\xb5\xdf\x26" + "\x20\xe8\xb7\x01\x3c\xe4\x77\xce" + "\xc4\x65\xa7\x23\x79\xea\x33\xc7" + "\x82\x14\x5c\x82\xf2\x4e\x3d\xf6" + "\xc6\x4a\x0e\x29\xbb\xec\x44\xcd" + "\x2f\xd1\x4f\x21\x71\xa9\xce\x0f" + "\x5c\xf2\x72\x5c\x08\x2e\x21\xd2" + "\xc3\x29\x13\xd8\xac\xc3\xda\x13" + "\x1a\x9d\xa7\x71\x1d\x27\x1d\x27" + "\x1d\xea\xab\x44\x79\xad\xe5\xeb" + "\xef\x1f\x22\x0a\x44\x4f\xcb\x87" + "\xa7\x58\x71\x0e\x66\xf8\x60\xbf" + "\x60\x74\x4a\xb4\xec\x2e\xfe\xd3" + "\xf5\xb8\xfe\x46\x08\x50\x99\x6c" + "\x66\xa5\xa8\x34\x44\xb5\xe5\xf0" + "\xdd\x2c\x67\x4e\x35\x96\x8e\x67" + "\x48\x3f\x5f\x37\x44\x60\x51\x2e" + "\x14\x91\x5e\x57\xc3\x0e\x79\x77" + "\x2f\x03\xf4\xe2\x1c\x72\xbf\x85" + "\x5d\xd3\x17\xdf\x6c\xc5\x70\x24" + "\x42\xdf\x51\x4e\x2a\xb2\xd2\x5b" + "\x9e\x69\x83\x41\x11\xfe\x73\x22" + "\xde\x8a\x9e\xd8\x8a\xfb\x20\x38" + "\xd8\x47\x6f\xd5\xed\x8f\x41\xfd" + "\x13\x7a\x18\x03\x7d\x0f\xcd\x7d" + "\xa6\x7d\x31\x9e\xf1\x8f\x30\xa3" + "\x8b\x4c\x24\xb7\xf5\x48\xd7\xd9" + "\x12\xe7\x84\x97\x5c\x31\x6d\xfb" + "\xdf\xf3\xd3\xd1\xd5\x0c\x30\x06" + "\x01\x6a\xbc\x6c\x78\x7b\xa6\x50" + "\xfa\x0f\x3c\x42\x2d\xa5\xa3\x3b" + "\xcf\x62\x50\xff\x71\x6d\xe7\xda" + "\x27\xab\xc6\x67\x16\x65\x68\x64" + "\xc7\xd5\x5f\x81\xa9\xf6\x65\xb3" + "\x5e\x43\x91\x16\xcd\x3d\x55\x37" + "\x55\xb3\xf0\x28\xc5\x54\x19\xc0" + "\xe0\xd6\x2a\x61\xd4\xc8\x72\x51" + "\xe9\xa1\x7b\x48\x21\xad\x44\x09" + "\xe4\x01\x61\x3c\x8a\x5b\xf9\xa1" + "\x6e\x1b\xdf\xc0\x04\xa8\x8b\xf2" + "\x21\xbe\x34\x7b\xfc\xa1\xcd\xc9" + "\xa9\x96\xf4\xa4\x4c\xf7\x4e\x8f" + "\x84\xcc\xd3\xa8\x92\x77\x8f\x36" + "\xe2\x2e\x8c\x33\xe8\x84\xa6\x0c" + "\x6c\x8a\xda\x14\x32\xc2\x96\xff" + "\xc6\x4a\xc2\x9b\x30\x7f\xd1\x29" + "\xc0\xd5\x78\x41\x00\x80\x80\x03" + "\x2a\xb1\xde\x26\x03\x48\x49\xee" + "\x57\x14\x76\x51\x3c\x36\x5d\x0a" + "\x5c\x9f\xe8\xd8\x53\xdb\x4f\xd4" + "\x38\xbf\x66\xc9\x75\x12\x18\x75" + "\x34\x2d\x93\x22\x96\x51\x24\x6e" + "\x4e\xd9\x30\xea\x67\xff\x92\x1c" + "\x16\x26\xe9\xb5\x33\xab\x8c\x22" + "\x47\xdb\xa0\x2c\x08\xf0\x12\x69" + "\x7e\x93\x52\xda\xa5\xe5\xca\xc1" + "\x0f\x55\x2a\xbd\x09\x30\x88\x1b" + "\x9c\xc6\x9f\xe6\xdb\xa6\x92\xeb" + "\xf4\xbd\x5c\xc4\xdb\xc6\x71\x09" + "\xab\x5e\x48\x0c\xed\x6f\xda\x8e" + "\x8d\x0c\x98\x71\x7d\x10\xd0\x9c" + "\x20\x9b\x79\x53\x26\x5d\xb9\x85" + "\x8a\x31\xb8\xc5\x1c\x97\xde\x88" + "\x61\x55\x7f\x7c\x21\x06\xea\xc4" + "\x5f\xaf\xf2\xf0\xd5\x5e\x7d\xb4" + "\x6e\xcf\xe9\xae\x1b\x0e\x11\x80" + "\xc1\x9a\x74\x7e\x52\x6f\xa0\xb7" + "\x24\xcd\x8d\x0a\x11\x40\x63\x72" + "\xfa\xe2\xc5\xb3\x94\xef\x29\xa2" + "\x1a\x23\x43\x04\x37\x55\x0d\xe9" + "\x83\xb2\x29\x51\x49\x64\xa0\xbd" + "\xde\x73\xfd\xa5\x7c\x95\x70\x62" + "\x58\xdc\xe2\xd0\xbf\x98\xf5\x8a" + "\x6a\xfd\xce\xa8\x0e\x42\x2a\xeb" + "\xd2\xff\x83\x27\x53\x5c\xa0\x6e" + "\x93\xef\xe2\xb9\x5d\x35\xd6\x98" + "\xf6\x71\x19\x7a\x54\xa1\xa7\xe8" + "\x09\xfe\xf6\x9e\xc7\xbd\x3e\x29" + "\xbd\x6b\x17\xf4\xe7\x3e\x10\x5c" + "\xc1\xd2\x59\x4f\x4b\x12\x1a\x5b" + "\x50\x80\x59\xb9\xec\x13\x66\xa8" + "\xd2\x31\x7b\x6a\x61\x22\xdd\x7d" + "\x61\xee\x87\x16\x46\x9f\xf9\xc7" + "\x41\xee\x74\xf8\xd0\x96\x2c\x76" + "\x2a\xac\x7d\x6e\x9f\x0e\x7f\x95" + "\xfe\x50\x16\xb2\x23\xca\x62\xd5" + "\x68\xcf\x07\x3f\x3f\x97\x85\x2a" + "\x0c\x25\x45\xba\xdb\x32\xcb\x83" + "\x8c\x4f\xe0\x6d\x9a\x99\xf9\xc9" + "\xda\xd4\x19\x31\xc1\x7c\x6d\xd9" + "\x9c\x56\xd3\xec\xc1\x81\x4c\xed" + "\x28\x9d\x87\xeb\x19\xd7\x1a\x4f" + "\x04\x6a\xcb\x1f\xcf\x1f\xa2\x16" + "\xfc\x2a\x0d\xa1\x14\x2d\xfa\xc5" + "\x5a\xd2\xc5\xf9\x19\x7c\x20\x1f" + "\x2d\x10\xc0\x66\x7c\xd9\x2d\xe5" + "\x88\x70\x59\xa7\x85\xd5\x2e\x7c" + "\x5c\xe3\xb7\x12\xd6\x97\x3f\x29", + .psize = 2048, + .digest = "\x37\x90\x92\xc2\xeb\x01\x87\xd9" + "\x95\xc7\x91\xc3\x17\x8b\x38\x52", + } +}; + /* * DES test vectors. */ diff --git a/include/crypto/nhpoly1305.h b/include/crypto/nhpoly1305.h new file mode 100644 index 0000000000000..53c04423c582e --- /dev/null +++ b/include/crypto/nhpoly1305.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common values and helper functions for the NHPoly1305 hash function. + */ + +#ifndef _NHPOLY1305_H +#define _NHPOLY1305_H + +#include +#include + +/* NH parameterization: */ + +/* Endianness: little */ +/* Word size: 32 bits (works well on NEON, SSE2, AVX2) */ + +/* Stride: 2 words (optimal on ARM32 NEON; works okay on other CPUs too) */ +#define NH_PAIR_STRIDE 2 +#define NH_MESSAGE_UNIT (NH_PAIR_STRIDE * 2 * sizeof(u32)) + +/* Num passes (Toeplitz iteration count): 4, to give ε = 2^{-128} */ +#define NH_NUM_PASSES 4 +#define NH_HASH_BYTES (NH_NUM_PASSES * sizeof(u64)) + +/* Max message size: 1024 bytes (32x compression factor) */ +#define NH_NUM_STRIDES 64 +#define NH_MESSAGE_WORDS (NH_PAIR_STRIDE * 2 * NH_NUM_STRIDES) +#define NH_MESSAGE_BYTES (NH_MESSAGE_WORDS * sizeof(u32)) +#define NH_KEY_WORDS (NH_MESSAGE_WORDS + \ + NH_PAIR_STRIDE * 2 * (NH_NUM_PASSES - 1)) +#define NH_KEY_BYTES (NH_KEY_WORDS * sizeof(u32)) + +#define NHPOLY1305_KEY_SIZE (POLY1305_BLOCK_SIZE + NH_KEY_BYTES) + +struct nhpoly1305_key { + struct poly1305_key poly_key; + u32 nh_key[NH_KEY_WORDS]; +}; + +struct nhpoly1305_state { + + /* Running total of polynomial evaluation */ + struct poly1305_state poly_state; + + /* Partial block buffer */ + u8 buffer[NH_MESSAGE_UNIT]; + unsigned int buflen; + + /* + * Number of bytes remaining until the current NH message reaches + * NH_MESSAGE_BYTES. When nonzero, 'nh_hash' holds the partial NH hash. + */ + unsigned int nh_remaining; + + __le64 nh_hash[NH_NUM_PASSES]; +}; + +typedef void (*nh_t)(const u32 *key, const u8 *message, size_t message_len, + __le64 hash[NH_NUM_PASSES]); + +int crypto_nhpoly1305_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen); + +int crypto_nhpoly1305_init(struct shash_desc *desc); +int crypto_nhpoly1305_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen); +int crypto_nhpoly1305_update_helper(struct shash_desc *desc, + const u8 *src, unsigned int srclen, + nh_t nh_fn); +int crypto_nhpoly1305_final(struct shash_desc *desc, u8 *dst); +int crypto_nhpoly1305_final_helper(struct shash_desc *desc, u8 *dst, + nh_t nh_fn); + +#endif /* _NHPOLY1305_H */ -- GitLab From 59e501683f150e22c808139d9233f9fa46048c9b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 16 Nov 2018 17:26:30 -0800 Subject: [PATCH 1963/2269] FROMGIT: crypto: arm/nhpoly1305 - add NEON-accelerated NHPoly1305 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an ARM NEON implementation of NHPoly1305, an ε-almost-∆-universal hash function used in the Adiantum encryption mode. For now, only the NH portion is actually NEON-accelerated; the Poly1305 part is less performance-critical so is just implemented in C. Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel Signed-off-by: Herbert Xu (cherry picked from commit 16aae3595a9d41c97d983889b341c455779c2ecf https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master) Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: Ibe2a3fd8ed8522f08e136e48b11492d9f01a3160 Signed-off-by: Eric Biggers --- arch/arm/crypto/Kconfig | 5 ++ arch/arm/crypto/Makefile | 2 + arch/arm/crypto/nh-neon-core.S | 116 +++++++++++++++++++++++++ arch/arm/crypto/nhpoly1305-neon-glue.c | 77 ++++++++++++++++ 4 files changed, 200 insertions(+) create mode 100644 arch/arm/crypto/nh-neon-core.S create mode 100644 arch/arm/crypto/nhpoly1305-neon-glue.c diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 8284a40edf5aa..5c278a8cce686 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -130,4 +130,9 @@ config CRYPTO_CHACHA20_NEON select CRYPTO_BLKCIPHER select CRYPTO_CHACHA20 +config CRYPTO_NHPOLY1305_NEON + tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)" + depends on KERNEL_MODE_NEON + select CRYPTO_NHPOLY1305 + endif diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 85f7940bfb990..5668d910fb152 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o +obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o @@ -53,6 +54,7 @@ ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o +nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o ifdef REGENERATE_ARM_CRYPTO quiet_cmd_perl = PERL $@ diff --git a/arch/arm/crypto/nh-neon-core.S b/arch/arm/crypto/nh-neon-core.S new file mode 100644 index 0000000000000..434d80ab531c2 --- /dev/null +++ b/arch/arm/crypto/nh-neon-core.S @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * NH - ε-almost-universal hash function, NEON accelerated version + * + * Copyright 2018 Google LLC + * + * Author: Eric Biggers + */ + +#include + + .text + .fpu neon + + KEY .req r0 + MESSAGE .req r1 + MESSAGE_LEN .req r2 + HASH .req r3 + + PASS0_SUMS .req q0 + PASS0_SUM_A .req d0 + PASS0_SUM_B .req d1 + PASS1_SUMS .req q1 + PASS1_SUM_A .req d2 + PASS1_SUM_B .req d3 + PASS2_SUMS .req q2 + PASS2_SUM_A .req d4 + PASS2_SUM_B .req d5 + PASS3_SUMS .req q3 + PASS3_SUM_A .req d6 + PASS3_SUM_B .req d7 + K0 .req q4 + K1 .req q5 + K2 .req q6 + K3 .req q7 + T0 .req q8 + T0_L .req d16 + T0_H .req d17 + T1 .req q9 + T1_L .req d18 + T1_H .req d19 + T2 .req q10 + T2_L .req d20 + T2_H .req d21 + T3 .req q11 + T3_L .req d22 + T3_H .req d23 + +.macro _nh_stride k0, k1, k2, k3 + + // Load next message stride + vld1.8 {T3}, [MESSAGE]! + + // Load next key stride + vld1.32 {\k3}, [KEY]! + + // Add message words to key words + vadd.u32 T0, T3, \k0 + vadd.u32 T1, T3, \k1 + vadd.u32 T2, T3, \k2 + vadd.u32 T3, T3, \k3 + + // Multiply 32x32 => 64 and accumulate + vmlal.u32 PASS0_SUMS, T0_L, T0_H + vmlal.u32 PASS1_SUMS, T1_L, T1_H + vmlal.u32 PASS2_SUMS, T2_L, T2_H + vmlal.u32 PASS3_SUMS, T3_L, T3_H +.endm + +/* + * void nh_neon(const u32 *key, const u8 *message, size_t message_len, + * u8 hash[NH_HASH_BYTES]) + * + * It's guaranteed that message_len % 16 == 0. + */ +ENTRY(nh_neon) + + vld1.32 {K0,K1}, [KEY]! + vmov.u64 PASS0_SUMS, #0 + vmov.u64 PASS1_SUMS, #0 + vld1.32 {K2}, [KEY]! + vmov.u64 PASS2_SUMS, #0 + vmov.u64 PASS3_SUMS, #0 + + subs MESSAGE_LEN, MESSAGE_LEN, #64 + blt .Lloop4_done +.Lloop4: + _nh_stride K0, K1, K2, K3 + _nh_stride K1, K2, K3, K0 + _nh_stride K2, K3, K0, K1 + _nh_stride K3, K0, K1, K2 + subs MESSAGE_LEN, MESSAGE_LEN, #64 + bge .Lloop4 + +.Lloop4_done: + ands MESSAGE_LEN, MESSAGE_LEN, #63 + beq .Ldone + _nh_stride K0, K1, K2, K3 + + subs MESSAGE_LEN, MESSAGE_LEN, #16 + beq .Ldone + _nh_stride K1, K2, K3, K0 + + subs MESSAGE_LEN, MESSAGE_LEN, #16 + beq .Ldone + _nh_stride K2, K3, K0, K1 + +.Ldone: + // Sum the accumulators for each pass, then store the sums to 'hash' + vadd.u64 T0_L, PASS0_SUM_A, PASS0_SUM_B + vadd.u64 T0_H, PASS1_SUM_A, PASS1_SUM_B + vadd.u64 T1_L, PASS2_SUM_A, PASS2_SUM_B + vadd.u64 T1_H, PASS3_SUM_A, PASS3_SUM_B + vst1.8 {T0-T1}, [HASH] + bx lr +ENDPROC(nh_neon) diff --git a/arch/arm/crypto/nhpoly1305-neon-glue.c b/arch/arm/crypto/nhpoly1305-neon-glue.c new file mode 100644 index 0000000000000..49aae87cb2bcc --- /dev/null +++ b/arch/arm/crypto/nhpoly1305-neon-glue.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum + * (NEON accelerated version) + * + * Copyright 2018 Google LLC + */ + +#include +#include +#include +#include +#include + +asmlinkage void nh_neon(const u32 *key, const u8 *message, size_t message_len, + u8 hash[NH_HASH_BYTES]); + +/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */ +static void _nh_neon(const u32 *key, const u8 *message, size_t message_len, + __le64 hash[NH_NUM_PASSES]) +{ + nh_neon(key, message, message_len, (u8 *)hash); +} + +static int nhpoly1305_neon_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + if (srclen < 64 || !may_use_simd()) + return crypto_nhpoly1305_update(desc, src, srclen); + + do { + unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); + + kernel_neon_begin(); + crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon); + kernel_neon_end(); + src += n; + srclen -= n; + } while (srclen); + return 0; +} + +static struct shash_alg nhpoly1305_alg = { + .base.cra_name = "nhpoly1305", + .base.cra_driver_name = "nhpoly1305-neon", + .base.cra_priority = 200, + .base.cra_ctxsize = sizeof(struct nhpoly1305_key), + .base.cra_module = THIS_MODULE, + .digestsize = POLY1305_DIGEST_SIZE, + .init = crypto_nhpoly1305_init, + .update = nhpoly1305_neon_update, + .final = crypto_nhpoly1305_final, + .setkey = crypto_nhpoly1305_setkey, + .descsize = sizeof(struct nhpoly1305_state), +}; + +static int __init nhpoly1305_mod_init(void) +{ + if (!(elf_hwcap & HWCAP_NEON)) + return -ENODEV; + + return crypto_register_shash(&nhpoly1305_alg); +} + +static void __exit nhpoly1305_mod_exit(void) +{ + crypto_unregister_shash(&nhpoly1305_alg); +} + +module_init(nhpoly1305_mod_init); +module_exit(nhpoly1305_mod_exit); + +MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function (NEON-accelerated)"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Eric Biggers "); +MODULE_ALIAS_CRYPTO("nhpoly1305"); +MODULE_ALIAS_CRYPTO("nhpoly1305-neon"); -- GitLab From 3975a6faad41ce4dc047aa83bb77e5dbc0f82713 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 16 Nov 2018 17:26:31 -0800 Subject: [PATCH 1964/2269] BACKPORT, FROMGIT: crypto: adiantum - add Adiantum support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for the Adiantum encryption mode. Adiantum was designed by Paul Crowley and is specified by our paper: Adiantum: length-preserving encryption for entry-level processors (https://eprint.iacr.org/2018/720.pdf) See our paper for full details; this patch only provides an overview. Adiantum is a tweakable, length-preserving encryption mode designed for fast and secure disk encryption, especially on CPUs without dedicated crypto instructions. Adiantum encrypts each sector using the XChaCha12 stream cipher, two passes of an ε-almost-∆-universal (εA∆U) hash function, and an invocation of the AES-256 block cipher on a single 16-byte block. On CPUs without AES instructions, Adiantum is much faster than AES-XTS; for example, on ARM Cortex-A7, on 4096-byte sectors Adiantum encryption is about 4 times faster than AES-256-XTS encryption, and decryption about 5 times faster. Adiantum is a specialization of the more general HBSH construction. Our earlier proposal, HPolyC, was also a HBSH specialization, but it used a different εA∆U hash function, one based on Poly1305 only. Adiantum's εA∆U hash function, which is based primarily on the "NH" hash function like that used in UMAC (RFC4418), is about twice as fast as HPolyC's; consequently, Adiantum is about 20% faster than HPolyC. This speed comes with no loss of security: Adiantum is provably just as secure as HPolyC, in fact slightly *more* secure. Like HPolyC, Adiantum's security is reducible to that of XChaCha12 and AES-256, subject to a security bound. XChaCha12 itself has a security reduction to ChaCha12. Therefore, one need not "trust" Adiantum; one need only trust ChaCha12 and AES-256. Note that the εA∆U hash function is only used for its proven combinatorical properties so cannot be "broken". Adiantum is also a true wide-block encryption mode, so flipping any plaintext bit in the sector scrambles the entire ciphertext, and vice versa. No other such mode is available in the kernel currently; doing the same with XTS scrambles only 16 bytes. Adiantum also supports arbitrary-length tweaks and naturally supports any length input >= 16 bytes without needing "ciphertext stealing". For the stream cipher, Adiantum uses XChaCha12 rather than XChaCha20 in order to make encryption feasible on the widest range of devices. Although the 20-round variant is quite popular, the best known attacks on ChaCha are on only 7 rounds, so ChaCha12 still has a substantial security margin; in fact, larger than AES-256's. 12-round Salsa20 is also the eSTREAM recommendation. For the block cipher, Adiantum uses AES-256, despite it having a lower security margin than XChaCha12 and needing table lookups, due to AES's extensive adoption and analysis making it the obvious first choice. Nevertheless, for flexibility this patch also permits the "adiantum" template to be instantiated with XChaCha20 and/or with an alternate block cipher. We need Adiantum support in the kernel for use in dm-crypt and fscrypt, where currently the only other suitable options are block cipher modes such as AES-XTS. A big problem with this is that many low-end mobile devices (e.g. Android Go phones sold primarily in developing countries, as well as some smartwatches) still have CPUs that lack AES instructions, e.g. ARM Cortex-A7. Sadly, AES-XTS encryption is much too slow to be viable on these devices. We did find that some "lightweight" block ciphers are fast enough, but these suffer from problems such as not having much cryptanalysis or being too controversial. The ChaCha stream cipher has excellent performance but is insecure to use directly for disk encryption, since each sector's IV is reused each time it is overwritten. Even restricting the threat model to offline attacks only isn't enough, since modern flash storage devices don't guarantee that "overwrites" are really overwrites, due to wear-leveling. Adiantum avoids this problem by constructing a "tweakable super-pseudorandom permutation"; this is the strongest possible security model for length-preserving encryption. Of course, storing random nonces along with the ciphertext would be the ideal solution. But doing that with existing hardware and filesystems runs into major practical problems; in most cases it would require data journaling (like dm-integrity) which severely degrades performance. Thus, for now length-preserving encryption is still needed. Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel Signed-off-by: Herbert Xu (cherry picked from commit 059c2a4d8e164dccc3078e49e7f286023b019a98 https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master) Conflicts: crypto/tcrypt.c crypto/testmgr.c (adjusted test vector formatting for old testmgr) Bug: 112008522 Test: Among other things, I ran the relevant crypto self-tests: 1.) Build kernel with CONFIG_CRYPTO_MANAGER_DISABLE_TESTS *unset*, and all relevant crypto algorithms built-in, including: CONFIG_CRYPTO_ADIANTUM=y CONFIG_CRYPTO_CHACHA20=y CONFIG_CRYPTO_CHACHA20_NEON=y CONFIG_CRYPTO_NHPOLY1305=y CONFIG_CRYPTO_NHPOLY1305_NEON=y CONFIG_CRYPTO_POLY1305=y CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_AES_ARM=y 2.) Boot and check dmesg for test failures. 3.) Instantiate "adiantum(xchacha12,aes)" and "adiantum(xchacha20,aes)" to trigger them to be tested. There are many ways to do this, but one way is to create a dm-crypt target that uses them, e.g. key=$(hexdump -n 32 -e '16/4 "%08X" 1 "\n"' /dev/urandom) dmsetup create crypt --table "0 $((1<<17)) crypt xchacha12,aes-adiantum-plain64 $key 0 /dev/vdc 0" dmsetup remove crypt dmsetup create crypt --table "0 $((1<<17)) crypt xchacha20,aes-adiantum-plain64 $key 0 /dev/vdc 0" dmsetup remove crypt 4.) Check dmesg for test failures again. 5.) Do 1-4 on both x86_64 (for basic testing) and on arm32 (for testing the ARM32-specific implementations). I did the arm32 kernel testing on Raspberry Pi 2, which is a BCM2836-based device that can run the upstream and Android common kernels. The same ARM32 assembly files for ChaCha, NHPoly1305, and AES are also included in the userspace Adiantum benchmark suite at https://github.com/google/adiantum, where they have undergone additional correctness testing. Change-Id: Ic61c13b53facfd2173065be715a7ee5f3af8760b Signed-off-by: Eric Biggers --- crypto/Kconfig | 23 ++ crypto/Makefile | 1 + crypto/adiantum.c | 658 ++++++++++++++++++++++++++++++++ crypto/tcrypt.c | 10 + crypto/testmgr.c | 18 + crypto/testmgr.h | 935 ++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 1645 insertions(+) create mode 100644 crypto/adiantum.c diff --git a/crypto/Kconfig b/crypto/Kconfig index e92d51eb13789..8ab3488fee905 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -394,6 +394,29 @@ config CRYPTO_NHPOLY1305 select CRYPTO_HASH select CRYPTO_POLY1305 +config CRYPTO_ADIANTUM + tristate "Adiantum support" + select CRYPTO_CHACHA20 + select CRYPTO_POLY1305 + select CRYPTO_NHPOLY1305 + help + Adiantum is a tweakable, length-preserving encryption mode + designed for fast and secure disk encryption, especially on + CPUs without dedicated crypto instructions. It encrypts + each sector using the XChaCha12 stream cipher, two passes of + an ε-almost-∆-universal hash function, and an invocation of + the AES-256 block cipher on a single 16-byte block. On CPUs + without AES instructions, Adiantum is much faster than + AES-XTS. + + Adiantum's security is provably reducible to that of its + underlying stream and block ciphers, subject to a security + bound. Unlike XTS, Adiantum is a true wide-block encryption + mode, so it actually provides an even stronger notion of + security than XTS, subject to the security bound. + + If unsure, say N. + comment "Hash modes" config CRYPTO_CMAC diff --git a/crypto/Makefile b/crypto/Makefile index bfa498533d6a6..3bbd4103accb0 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -83,6 +83,7 @@ obj-$(CONFIG_CRYPTO_LRW) += lrw.o obj-$(CONFIG_CRYPTO_XTS) += xts.o obj-$(CONFIG_CRYPTO_CTR) += ctr.o obj-$(CONFIG_CRYPTO_KEYWRAP) += keywrap.o +obj-$(CONFIG_CRYPTO_ADIANTUM) += adiantum.o obj-$(CONFIG_CRYPTO_NHPOLY1305) += nhpoly1305.o obj-$(CONFIG_CRYPTO_GCM) += gcm.o obj-$(CONFIG_CRYPTO_CCM) += ccm.o diff --git a/crypto/adiantum.c b/crypto/adiantum.c new file mode 100644 index 0000000000000..2dfcf12fd4529 --- /dev/null +++ b/crypto/adiantum.c @@ -0,0 +1,658 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Adiantum length-preserving encryption mode + * + * Copyright 2018 Google LLC + */ + +/* + * Adiantum is a tweakable, length-preserving encryption mode designed for fast + * and secure disk encryption, especially on CPUs without dedicated crypto + * instructions. Adiantum encrypts each sector using the XChaCha12 stream + * cipher, two passes of an ε-almost-∆-universal (εA∆U) hash function based on + * NH and Poly1305, and an invocation of the AES-256 block cipher on a single + * 16-byte block. See the paper for details: + * + * Adiantum: length-preserving encryption for entry-level processors + * (https://eprint.iacr.org/2018/720.pdf) + * + * For flexibility, this implementation also allows other ciphers: + * + * - Stream cipher: XChaCha12 or XChaCha20 + * - Block cipher: any with a 128-bit block size and 256-bit key + * + * This implementation doesn't currently allow other εA∆U hash functions, i.e. + * HPolyC is not supported. This is because Adiantum is ~20% faster than HPolyC + * but still provably as secure, and also the εA∆U hash function of HBSH is + * formally defined to take two inputs (tweak, message) which makes it difficult + * to wrap with the crypto_shash API. Rather, some details need to be handled + * here. Nevertheless, if needed in the future, support for other εA∆U hash + * functions could be added here. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "internal.h" + +/* + * Size of right-hand block of input data, in bytes; also the size of the block + * cipher's block size and the hash function's output. + */ +#define BLOCKCIPHER_BLOCK_SIZE 16 + +/* Size of the block cipher key (K_E) in bytes */ +#define BLOCKCIPHER_KEY_SIZE 32 + +/* Size of the hash key (K_H) in bytes */ +#define HASH_KEY_SIZE (POLY1305_BLOCK_SIZE + NHPOLY1305_KEY_SIZE) + +/* + * The specification allows variable-length tweaks, but Linux's crypto API + * currently only allows algorithms to support a single length. The "natural" + * tweak length for Adiantum is 16, since that fits into one Poly1305 block for + * the best performance. But longer tweaks are useful for fscrypt, to avoid + * needing to derive per-file keys. So instead we use two blocks, or 32 bytes. + */ +#define TWEAK_SIZE 32 + +struct adiantum_instance_ctx { + struct crypto_skcipher_spawn streamcipher_spawn; + struct crypto_spawn blockcipher_spawn; + struct crypto_shash_spawn hash_spawn; +}; + +struct adiantum_tfm_ctx { + struct crypto_skcipher *streamcipher; + struct crypto_cipher *blockcipher; + struct crypto_shash *hash; + struct poly1305_key header_hash_key; +}; + +struct adiantum_request_ctx { + + /* + * Buffer for right-hand block of data, i.e. + * + * P_L => P_M => C_M => C_R when encrypting, or + * C_R => C_M => P_M => P_L when decrypting. + * + * Also used to build the IV for the stream cipher. + */ + union { + u8 bytes[XCHACHA_IV_SIZE]; + __le32 words[XCHACHA_IV_SIZE / sizeof(__le32)]; + le128 bignum; /* interpret as element of Z/(2^{128}Z) */ + } rbuf; + + bool enc; /* true if encrypting, false if decrypting */ + + /* + * The result of the Poly1305 εA∆U hash function applied to + * (message length, tweak). + */ + le128 header_hash; + + /* Sub-requests, must be last */ + union { + struct shash_desc hash_desc; + struct skcipher_request streamcipher_req; + } u; +}; + +/* + * Given the XChaCha stream key K_S, derive the block cipher key K_E and the + * hash key K_H as follows: + * + * K_E || K_H || ... = XChaCha(key=K_S, nonce=1||0^191) + * + * Note that this denotes using bits from the XChaCha keystream, which here we + * get indirectly by encrypting a buffer containing all 0's. + */ +static int adiantum_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); + struct { + u8 iv[XCHACHA_IV_SIZE]; + u8 derived_keys[BLOCKCIPHER_KEY_SIZE + HASH_KEY_SIZE]; + struct scatterlist sg; + struct crypto_wait wait; + struct skcipher_request req; /* must be last */ + } *data; + u8 *keyp; + int err; + + /* Set the stream cipher key (K_S) */ + crypto_skcipher_clear_flags(tctx->streamcipher, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(tctx->streamcipher, + crypto_skcipher_get_flags(tfm) & + CRYPTO_TFM_REQ_MASK); + err = crypto_skcipher_setkey(tctx->streamcipher, key, keylen); + crypto_skcipher_set_flags(tfm, + crypto_skcipher_get_flags(tctx->streamcipher) & + CRYPTO_TFM_RES_MASK); + if (err) + return err; + + /* Derive the subkeys */ + data = kzalloc(sizeof(*data) + + crypto_skcipher_reqsize(tctx->streamcipher), GFP_KERNEL); + if (!data) + return -ENOMEM; + data->iv[0] = 1; + sg_init_one(&data->sg, data->derived_keys, sizeof(data->derived_keys)); + crypto_init_wait(&data->wait); + skcipher_request_set_tfm(&data->req, tctx->streamcipher); + skcipher_request_set_callback(&data->req, CRYPTO_TFM_REQ_MAY_SLEEP | + CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, &data->wait); + skcipher_request_set_crypt(&data->req, &data->sg, &data->sg, + sizeof(data->derived_keys), data->iv); + err = crypto_wait_req(crypto_skcipher_encrypt(&data->req), &data->wait); + if (err) + goto out; + keyp = data->derived_keys; + + /* Set the block cipher key (K_E) */ + crypto_cipher_clear_flags(tctx->blockcipher, CRYPTO_TFM_REQ_MASK); + crypto_cipher_set_flags(tctx->blockcipher, + crypto_skcipher_get_flags(tfm) & + CRYPTO_TFM_REQ_MASK); + err = crypto_cipher_setkey(tctx->blockcipher, keyp, + BLOCKCIPHER_KEY_SIZE); + crypto_skcipher_set_flags(tfm, + crypto_cipher_get_flags(tctx->blockcipher) & + CRYPTO_TFM_RES_MASK); + if (err) + goto out; + keyp += BLOCKCIPHER_KEY_SIZE; + + /* Set the hash key (K_H) */ + poly1305_core_setkey(&tctx->header_hash_key, keyp); + keyp += POLY1305_BLOCK_SIZE; + + crypto_shash_clear_flags(tctx->hash, CRYPTO_TFM_REQ_MASK); + crypto_shash_set_flags(tctx->hash, crypto_skcipher_get_flags(tfm) & + CRYPTO_TFM_REQ_MASK); + err = crypto_shash_setkey(tctx->hash, keyp, NHPOLY1305_KEY_SIZE); + crypto_skcipher_set_flags(tfm, crypto_shash_get_flags(tctx->hash) & + CRYPTO_TFM_RES_MASK); + keyp += NHPOLY1305_KEY_SIZE; + WARN_ON(keyp != &data->derived_keys[ARRAY_SIZE(data->derived_keys)]); +out: + kzfree(data); + return err; +} + +/* Addition in Z/(2^{128}Z) */ +static inline void le128_add(le128 *r, const le128 *v1, const le128 *v2) +{ + u64 x = le64_to_cpu(v1->b); + u64 y = le64_to_cpu(v2->b); + + r->b = cpu_to_le64(x + y); + r->a = cpu_to_le64(le64_to_cpu(v1->a) + le64_to_cpu(v2->a) + + (x + y < x)); +} + +/* Subtraction in Z/(2^{128}Z) */ +static inline void le128_sub(le128 *r, const le128 *v1, const le128 *v2) +{ + u64 x = le64_to_cpu(v1->b); + u64 y = le64_to_cpu(v2->b); + + r->b = cpu_to_le64(x - y); + r->a = cpu_to_le64(le64_to_cpu(v1->a) - le64_to_cpu(v2->a) - + (x - y > x)); +} + +/* + * Apply the Poly1305 εA∆U hash function to (message length, tweak) and save the + * result to rctx->header_hash. + * + * This value is reused in both the first and second hash steps. Specifically, + * it's added to the result of an independently keyed εA∆U hash function (for + * equal length inputs only) taken over the message. This gives the overall + * Adiantum hash of the (tweak, message) pair. + */ +static void adiantum_hash_header(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); + struct adiantum_request_ctx *rctx = skcipher_request_ctx(req); + const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE; + struct { + __le64 message_bits; + __le64 padding; + } header = { + .message_bits = cpu_to_le64((u64)bulk_len * 8) + }; + struct poly1305_state state; + + poly1305_core_init(&state); + + BUILD_BUG_ON(sizeof(header) % POLY1305_BLOCK_SIZE != 0); + poly1305_core_blocks(&state, &tctx->header_hash_key, + &header, sizeof(header) / POLY1305_BLOCK_SIZE); + + BUILD_BUG_ON(TWEAK_SIZE % POLY1305_BLOCK_SIZE != 0); + poly1305_core_blocks(&state, &tctx->header_hash_key, req->iv, + TWEAK_SIZE / POLY1305_BLOCK_SIZE); + + poly1305_core_emit(&state, &rctx->header_hash); +} + +/* Hash the left-hand block (the "bulk") of the message using NHPoly1305 */ +static int adiantum_hash_message(struct skcipher_request *req, + struct scatterlist *sgl, le128 *digest) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); + struct adiantum_request_ctx *rctx = skcipher_request_ctx(req); + const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE; + struct shash_desc *hash_desc = &rctx->u.hash_desc; + struct sg_mapping_iter miter; + unsigned int i, n; + int err; + + hash_desc->tfm = tctx->hash; + hash_desc->flags = 0; + + err = crypto_shash_init(hash_desc); + if (err) + return err; + + sg_miter_start(&miter, sgl, sg_nents(sgl), + SG_MITER_FROM_SG | SG_MITER_ATOMIC); + for (i = 0; i < bulk_len; i += n) { + sg_miter_next(&miter); + n = min_t(unsigned int, miter.length, bulk_len - i); + err = crypto_shash_update(hash_desc, miter.addr, n); + if (err) + break; + } + sg_miter_stop(&miter); + if (err) + return err; + + return crypto_shash_final(hash_desc, (u8 *)digest); +} + +/* Continue Adiantum encryption/decryption after the stream cipher step */ +static int adiantum_finish(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); + struct adiantum_request_ctx *rctx = skcipher_request_ctx(req); + const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE; + le128 digest; + int err; + + /* If decrypting, decrypt C_M with the block cipher to get P_M */ + if (!rctx->enc) + crypto_cipher_decrypt_one(tctx->blockcipher, rctx->rbuf.bytes, + rctx->rbuf.bytes); + + /* + * Second hash step + * enc: C_R = C_M - H_{K_H}(T, C_L) + * dec: P_R = P_M - H_{K_H}(T, P_L) + */ + err = adiantum_hash_message(req, req->dst, &digest); + if (err) + return err; + le128_add(&digest, &digest, &rctx->header_hash); + le128_sub(&rctx->rbuf.bignum, &rctx->rbuf.bignum, &digest); + scatterwalk_map_and_copy(&rctx->rbuf.bignum, req->dst, + bulk_len, BLOCKCIPHER_BLOCK_SIZE, 1); + return 0; +} + +static void adiantum_streamcipher_done(struct crypto_async_request *areq, + int err) +{ + struct skcipher_request *req = areq->data; + + if (!err) + err = adiantum_finish(req); + + skcipher_request_complete(req, err); +} + +static int adiantum_crypt(struct skcipher_request *req, bool enc) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); + struct adiantum_request_ctx *rctx = skcipher_request_ctx(req); + const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE; + unsigned int stream_len; + le128 digest; + int err; + + if (req->cryptlen < BLOCKCIPHER_BLOCK_SIZE) + return -EINVAL; + + rctx->enc = enc; + + /* + * First hash step + * enc: P_M = P_R + H_{K_H}(T, P_L) + * dec: C_M = C_R + H_{K_H}(T, C_L) + */ + adiantum_hash_header(req); + err = adiantum_hash_message(req, req->src, &digest); + if (err) + return err; + le128_add(&digest, &digest, &rctx->header_hash); + scatterwalk_map_and_copy(&rctx->rbuf.bignum, req->src, + bulk_len, BLOCKCIPHER_BLOCK_SIZE, 0); + le128_add(&rctx->rbuf.bignum, &rctx->rbuf.bignum, &digest); + + /* If encrypting, encrypt P_M with the block cipher to get C_M */ + if (enc) + crypto_cipher_encrypt_one(tctx->blockcipher, rctx->rbuf.bytes, + rctx->rbuf.bytes); + + /* Initialize the rest of the XChaCha IV (first part is C_M) */ + BUILD_BUG_ON(BLOCKCIPHER_BLOCK_SIZE != 16); + BUILD_BUG_ON(XCHACHA_IV_SIZE != 32); /* nonce || stream position */ + rctx->rbuf.words[4] = cpu_to_le32(1); + rctx->rbuf.words[5] = 0; + rctx->rbuf.words[6] = 0; + rctx->rbuf.words[7] = 0; + + /* + * XChaCha needs to be done on all the data except the last 16 bytes; + * for disk encryption that usually means 4080 or 496 bytes. But ChaCha + * implementations tend to be most efficient when passed a whole number + * of 64-byte ChaCha blocks, or sometimes even a multiple of 256 bytes. + * And here it doesn't matter whether the last 16 bytes are written to, + * as the second hash step will overwrite them. Thus, round the XChaCha + * length up to the next 64-byte boundary if possible. + */ + stream_len = bulk_len; + if (round_up(stream_len, CHACHA_BLOCK_SIZE) <= req->cryptlen) + stream_len = round_up(stream_len, CHACHA_BLOCK_SIZE); + + skcipher_request_set_tfm(&rctx->u.streamcipher_req, tctx->streamcipher); + skcipher_request_set_crypt(&rctx->u.streamcipher_req, req->src, + req->dst, stream_len, &rctx->rbuf); + skcipher_request_set_callback(&rctx->u.streamcipher_req, + req->base.flags, + adiantum_streamcipher_done, req); + return crypto_skcipher_encrypt(&rctx->u.streamcipher_req) ?: + adiantum_finish(req); +} + +static int adiantum_encrypt(struct skcipher_request *req) +{ + return adiantum_crypt(req, true); +} + +static int adiantum_decrypt(struct skcipher_request *req) +{ + return adiantum_crypt(req, false); +} + +static int adiantum_init_tfm(struct crypto_skcipher *tfm) +{ + struct skcipher_instance *inst = skcipher_alg_instance(tfm); + struct adiantum_instance_ctx *ictx = skcipher_instance_ctx(inst); + struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); + struct crypto_skcipher *streamcipher; + struct crypto_cipher *blockcipher; + struct crypto_shash *hash; + unsigned int subreq_size; + int err; + + streamcipher = crypto_spawn_skcipher(&ictx->streamcipher_spawn); + if (IS_ERR(streamcipher)) + return PTR_ERR(streamcipher); + + blockcipher = crypto_spawn_cipher(&ictx->blockcipher_spawn); + if (IS_ERR(blockcipher)) { + err = PTR_ERR(blockcipher); + goto err_free_streamcipher; + } + + hash = crypto_spawn_shash(&ictx->hash_spawn); + if (IS_ERR(hash)) { + err = PTR_ERR(hash); + goto err_free_blockcipher; + } + + tctx->streamcipher = streamcipher; + tctx->blockcipher = blockcipher; + tctx->hash = hash; + + BUILD_BUG_ON(offsetofend(struct adiantum_request_ctx, u) != + sizeof(struct adiantum_request_ctx)); + subreq_size = max(FIELD_SIZEOF(struct adiantum_request_ctx, + u.hash_desc) + + crypto_shash_descsize(hash), + FIELD_SIZEOF(struct adiantum_request_ctx, + u.streamcipher_req) + + crypto_skcipher_reqsize(streamcipher)); + + crypto_skcipher_set_reqsize(tfm, + offsetof(struct adiantum_request_ctx, u) + + subreq_size); + return 0; + +err_free_blockcipher: + crypto_free_cipher(blockcipher); +err_free_streamcipher: + crypto_free_skcipher(streamcipher); + return err; +} + +static void adiantum_exit_tfm(struct crypto_skcipher *tfm) +{ + struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); + + crypto_free_skcipher(tctx->streamcipher); + crypto_free_cipher(tctx->blockcipher); + crypto_free_shash(tctx->hash); +} + +static void adiantum_free_instance(struct skcipher_instance *inst) +{ + struct adiantum_instance_ctx *ictx = skcipher_instance_ctx(inst); + + crypto_drop_skcipher(&ictx->streamcipher_spawn); + crypto_drop_spawn(&ictx->blockcipher_spawn); + crypto_drop_shash(&ictx->hash_spawn); + kfree(inst); +} + +/* + * Check for a supported set of inner algorithms. + * See the comment at the beginning of this file. + */ +static bool adiantum_supported_algorithms(struct skcipher_alg *streamcipher_alg, + struct crypto_alg *blockcipher_alg, + struct shash_alg *hash_alg) +{ + if (strcmp(streamcipher_alg->base.cra_name, "xchacha12") != 0 && + strcmp(streamcipher_alg->base.cra_name, "xchacha20") != 0) + return false; + + if (blockcipher_alg->cra_cipher.cia_min_keysize > BLOCKCIPHER_KEY_SIZE || + blockcipher_alg->cra_cipher.cia_max_keysize < BLOCKCIPHER_KEY_SIZE) + return false; + if (blockcipher_alg->cra_blocksize != BLOCKCIPHER_BLOCK_SIZE) + return false; + + if (strcmp(hash_alg->base.cra_name, "nhpoly1305") != 0) + return false; + + return true; +} + +static int adiantum_create(struct crypto_template *tmpl, struct rtattr **tb) +{ + struct crypto_attr_type *algt; + const char *streamcipher_name; + const char *blockcipher_name; + const char *nhpoly1305_name; + struct skcipher_instance *inst; + struct adiantum_instance_ctx *ictx; + struct skcipher_alg *streamcipher_alg; + struct crypto_alg *blockcipher_alg; + struct crypto_alg *_hash_alg; + struct shash_alg *hash_alg; + int err; + + algt = crypto_get_attr_type(tb); + if (IS_ERR(algt)) + return PTR_ERR(algt); + + if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask) + return -EINVAL; + + streamcipher_name = crypto_attr_alg_name(tb[1]); + if (IS_ERR(streamcipher_name)) + return PTR_ERR(streamcipher_name); + + blockcipher_name = crypto_attr_alg_name(tb[2]); + if (IS_ERR(blockcipher_name)) + return PTR_ERR(blockcipher_name); + + nhpoly1305_name = crypto_attr_alg_name(tb[3]); + if (nhpoly1305_name == ERR_PTR(-ENOENT)) + nhpoly1305_name = "nhpoly1305"; + if (IS_ERR(nhpoly1305_name)) + return PTR_ERR(nhpoly1305_name); + + inst = kzalloc(sizeof(*inst) + sizeof(*ictx), GFP_KERNEL); + if (!inst) + return -ENOMEM; + ictx = skcipher_instance_ctx(inst); + + /* Stream cipher, e.g. "xchacha12" */ + err = crypto_grab_skcipher(&ictx->streamcipher_spawn, streamcipher_name, + 0, crypto_requires_sync(algt->type, + algt->mask)); + if (err) + goto out_free_inst; + streamcipher_alg = crypto_spawn_skcipher_alg(&ictx->streamcipher_spawn); + + /* Block cipher, e.g. "aes" */ + err = crypto_grab_spawn(&ictx->blockcipher_spawn, blockcipher_name, + CRYPTO_ALG_TYPE_CIPHER, CRYPTO_ALG_TYPE_MASK); + if (err) + goto out_drop_streamcipher; + blockcipher_alg = ictx->blockcipher_spawn.alg; + + /* NHPoly1305 εA∆U hash function */ + _hash_alg = crypto_alg_mod_lookup(nhpoly1305_name, + CRYPTO_ALG_TYPE_SHASH, + CRYPTO_ALG_TYPE_MASK); + if (IS_ERR(_hash_alg)) { + err = PTR_ERR(_hash_alg); + goto out_drop_blockcipher; + } + hash_alg = __crypto_shash_alg(_hash_alg); + err = crypto_init_shash_spawn(&ictx->hash_spawn, hash_alg, + skcipher_crypto_instance(inst)); + if (err) { + crypto_mod_put(_hash_alg); + goto out_drop_blockcipher; + } + + /* Check the set of algorithms */ + if (!adiantum_supported_algorithms(streamcipher_alg, blockcipher_alg, + hash_alg)) { + pr_warn("Unsupported Adiantum instantiation: (%s,%s,%s)\n", + streamcipher_alg->base.cra_name, + blockcipher_alg->cra_name, hash_alg->base.cra_name); + err = -EINVAL; + goto out_drop_hash; + } + + /* Instance fields */ + + err = -ENAMETOOLONG; + if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, + "adiantum(%s,%s)", streamcipher_alg->base.cra_name, + blockcipher_alg->cra_name) >= CRYPTO_MAX_ALG_NAME) + goto out_drop_hash; + if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, + "adiantum(%s,%s,%s)", + streamcipher_alg->base.cra_driver_name, + blockcipher_alg->cra_driver_name, + hash_alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) + goto out_drop_hash; + + inst->alg.base.cra_blocksize = BLOCKCIPHER_BLOCK_SIZE; + inst->alg.base.cra_ctxsize = sizeof(struct adiantum_tfm_ctx); + inst->alg.base.cra_alignmask = streamcipher_alg->base.cra_alignmask | + hash_alg->base.cra_alignmask; + /* + * The block cipher is only invoked once per message, so for long + * messages (e.g. sectors for disk encryption) its performance doesn't + * matter as much as that of the stream cipher and hash function. Thus, + * weigh the block cipher's ->cra_priority less. + */ + inst->alg.base.cra_priority = (4 * streamcipher_alg->base.cra_priority + + 2 * hash_alg->base.cra_priority + + blockcipher_alg->cra_priority) / 7; + + inst->alg.setkey = adiantum_setkey; + inst->alg.encrypt = adiantum_encrypt; + inst->alg.decrypt = adiantum_decrypt; + inst->alg.init = adiantum_init_tfm; + inst->alg.exit = adiantum_exit_tfm; + inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(streamcipher_alg); + inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(streamcipher_alg); + inst->alg.ivsize = TWEAK_SIZE; + + inst->free = adiantum_free_instance; + + err = skcipher_register_instance(tmpl, inst); + if (err) + goto out_drop_hash; + + return 0; + +out_drop_hash: + crypto_drop_shash(&ictx->hash_spawn); +out_drop_blockcipher: + crypto_drop_spawn(&ictx->blockcipher_spawn); +out_drop_streamcipher: + crypto_drop_skcipher(&ictx->streamcipher_spawn); +out_free_inst: + kfree(inst); + return err; +} + +/* adiantum(streamcipher_name, blockcipher_name [, nhpoly1305_name]) */ +static struct crypto_template adiantum_tmpl = { + .name = "adiantum", + .create = adiantum_create, + .module = THIS_MODULE, +}; + +static int __init adiantum_module_init(void) +{ + return crypto_register_template(&adiantum_tmpl); +} + +static void __exit adiantum_module_exit(void) +{ + crypto_unregister_template(&adiantum_tmpl); +} + +module_init(adiantum_module_init); +module_exit(adiantum_module_exit); + +MODULE_DESCRIPTION("Adiantum length-preserving encryption mode"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Eric Biggers "); +MODULE_ALIAS_CRYPTO("adiantum"); diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index f7affe7cf0b47..99f2432c96fb2 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1610,6 +1610,16 @@ static int do_test(const char *alg, u32 type, u32 mask, int m) speed_template_32); break; + case 219: + test_cipher_speed("adiantum(xchacha12,aes)", ENCRYPT, sec, NULL, + 0, speed_template_32); + test_cipher_speed("adiantum(xchacha12,aes)", DECRYPT, sec, NULL, + 0, speed_template_32); + test_cipher_speed("adiantum(xchacha20,aes)", ENCRYPT, sec, NULL, + 0, speed_template_32); + test_cipher_speed("adiantum(xchacha20,aes)", DECRYPT, sec, NULL, + 0, speed_template_32); + break; case 300: if (alg) { diff --git a/crypto/testmgr.c b/crypto/testmgr.c index ace2eef6d86bc..f35220933090d 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -2349,6 +2349,24 @@ static int alg_test_null(const struct alg_test_desc *desc, /* Please keep this list sorted by algorithm name. */ static const struct alg_test_desc alg_test_descs[] = { { + .alg = "adiantum(xchacha12,aes)", + .test = alg_test_skcipher, + .suite = { + .cipher = { + .enc = __VECS(adiantum_xchacha12_aes_enc_tv_template), + .dec = __VECS(adiantum_xchacha12_aes_dec_tv_template) + } + }, + }, { + .alg = "adiantum(xchacha20,aes)", + .test = alg_test_skcipher, + .suite = { + .cipher = { + .enc = __VECS(adiantum_xchacha20_aes_enc_tv_template), + .dec = __VECS(adiantum_xchacha20_aes_dec_tv_template) + } + }, + }, { .alg = "ansi_cprng", .test = alg_test_cprng, .suite = { diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 97f49f2df88f3..50ed007959276 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -35457,6 +35457,941 @@ static const struct cipher_testvec xchacha12_tv_template[] = { }, }; +/* Adiantum test vectors from https://github.com/google/adiantum */ +static const struct cipher_testvec adiantum_xchacha12_aes_enc_tv_template[] = { + { + .key = "\x9e\xeb\xb2\x49\x3c\x1c\xf5\xf4" + "\x6a\x99\xc2\xc4\xdf\xb1\xf4\xdd" + "\x75\x20\x57\xea\x2c\x4f\xcd\xb2" + "\xa5\x3d\x7b\x49\x1e\xab\xfd\x0f", + .klen = 32, + .iv = "\xdf\x63\xd4\xab\xd2\x49\xf3\xd8" + "\x33\x81\x37\x60\x7d\xfa\x73\x08" + "\xd8\x49\x6d\x80\xe8\x2f\x62\x54" + "\xeb\x0e\xa9\x39\x5b\x45\x7f\x8a", + .input = "\x67\xc9\xf2\x30\x84\x41\x8e\x43" + "\xfb\xf3\xb3\x3e\x79\x36\x7f\xe8", + .result = "\x6d\x32\x86\x18\x67\x86\x0f\x3f" + "\x96\x7c\x9d\x28\x0d\x53\xec\x9f", + .ilen = 16, + .rlen = 16, + .also_non_np = 1, + .np = 2, + .tap = { 14, 2 }, + }, { + .key = "\x36\x2b\x57\x97\xf8\x5d\xcd\x99" + "\x5f\x1a\x5a\x44\x1d\x92\x0f\x27" + "\xcc\x16\xd7\x2b\x85\x63\x99\xd3" + "\xba\x96\xa1\xdb\xd2\x60\x68\xda", + .klen = 32, + .iv = "\xef\x58\x69\xb1\x2c\x5e\x9a\x47" + "\x24\xc1\xb1\x69\xe1\x12\x93\x8f" + "\x43\x3d\x6d\x00\xdb\x5e\xd8\xd9" + "\x12\x9a\xfe\xd9\xff\x2d\xaa\xc4", + .input = "\x5e\xa8\x68\x19\x85\x98\x12\x23" + "\x26\x0a\xcc\xdb\x0a\x04\xb9\xdf" + "\x4d\xb3\x48\x7b\xb0\xe3\xc8\x19" + "\x43\x5a\x46\x06\x94\x2d\xf2", + .result = "\xc7\xc6\xf1\x73\x8f\xc4\xff\x4a" + "\x39\xbe\x78\xbe\x8d\x28\xc8\x89" + "\x46\x63\xe7\x0c\x7d\x87\xe8\x4e" + "\xc9\x18\x7b\xbe\x18\x60\x50", + .ilen = 31, + .rlen = 31, + }, { + .key = "\xa5\x28\x24\x34\x1a\x3c\xd8\xf7" + "\x05\x91\x8f\xee\x85\x1f\x35\x7f" + "\x80\x3d\xfc\x9b\x94\xf6\xfc\x9e" + "\x19\x09\x00\xa9\x04\x31\x4f\x11", + .klen = 32, + .iv = "\xa1\xba\x49\x95\xff\x34\x6d\xb8" + "\xcd\x87\x5d\x5e\xfd\xea\x85\xdb" + "\x8a\x7b\x5e\xb2\x5d\x57\xdd\x62" + "\xac\xa9\x8c\x41\x42\x94\x75\xb7", + .input = "\x69\xb4\xe8\x8c\x37\xe8\x67\x82" + "\xf1\xec\x5d\x04\xe5\x14\x91\x13" + "\xdf\xf2\x87\x1b\x69\x81\x1d\x71" + "\x70\x9e\x9c\x3b\xde\x49\x70\x11" + "\xa0\xa3\xdb\x0d\x54\x4f\x66\x69" + "\xd7\xdb\x80\xa7\x70\x92\x68\xce" + "\x81\x04\x2c\xc6\xab\xae\xe5\x60" + "\x15\xe9\x6f\xef\xaa\x8f\xa7\xa7" + "\x63\x8f\xf2\xf0\x77\xf1\xa8\xea" + "\xe1\xb7\x1f\x9e\xab\x9e\x4b\x3f" + "\x07\x87\x5b\x6f\xcd\xa8\xaf\xb9" + "\xfa\x70\x0b\x52\xb8\xa8\xa7\x9e" + "\x07\x5f\xa6\x0e\xb3\x9b\x79\x13" + "\x79\xc3\x3e\x8d\x1c\x2c\x68\xc8" + "\x51\x1d\x3c\x7b\x7d\x79\x77\x2a" + "\x56\x65\xc5\x54\x23\x28\xb0\x03", + .result = "\x9e\x16\xab\xed\x4b\xa7\x42\x5a" + "\xc6\xfb\x4e\x76\xff\xbe\x03\xa0" + "\x0f\xe3\xad\xba\xe4\x98\x2b\x0e" + "\x21\x48\xa0\xb8\x65\x48\x27\x48" + "\x84\x54\x54\xb2\x9a\x94\x7b\xe6" + "\x4b\x29\xe9\xcf\x05\x91\x80\x1a" + "\x3a\xf3\x41\x96\x85\x1d\x9f\x74" + "\x51\x56\x63\xfa\x7c\x28\x85\x49" + "\xf7\x2f\xf9\xf2\x18\x46\xf5\x33" + "\x80\xa3\x3c\xce\xb2\x57\x93\xf5" + "\xae\xbd\xa9\xf5\x7b\x30\xc4\x93" + "\x66\xe0\x30\x77\x16\xe4\xa0\x31" + "\xba\x70\xbc\x68\x13\xf5\xb0\x9a" + "\xc1\xfc\x7e\xfe\x55\x80\x5c\x48" + "\x74\xa6\xaa\xa3\xac\xdc\xc2\xf5" + "\x8d\xde\x34\x86\x78\x60\x75\x8d", + .ilen = 128, + .rlen = 128, + .also_non_np = 1, + .np = 4, + .tap = { 104, 16, 4, 4 }, + }, { + .key = "\xd3\x81\x72\x18\x23\xff\x6f\x4a" + "\x25\x74\x29\x0d\x51\x8a\x0e\x13" + "\xc1\x53\x5d\x30\x8d\xee\x75\x0d" + "\x14\xd6\x69\xc9\x15\xa9\x0c\x60", + .klen = 32, + .iv = "\x65\x9b\xd4\xa8\x7d\x29\x1d\xf4" + "\xc4\xd6\x9b\x6a\x28\xab\x64\xe2" + "\x62\x81\x97\xc5\x81\xaa\xf9\x44" + "\xc1\x72\x59\x82\xaf\x16\xc8\x2c", + .input = "\xc7\x6b\x52\x6a\x10\xf0\xcc\x09" + "\xc1\x12\x1d\x6d\x21\xa6\x78\xf5" + "\x05\xa3\x69\x60\x91\x36\x98\x57" + "\xba\x0c\x14\xcc\xf3\x2d\x73\x03" + "\xc6\xb2\x5f\xc8\x16\x27\x37\x5d" + "\xd0\x0b\x87\xb2\x50\x94\x7b\x58" + "\x04\xf4\xe0\x7f\x6e\x57\x8e\xc9" + "\x41\x84\xc1\xb1\x7e\x4b\x91\x12" + "\x3a\x8b\x5d\x50\x82\x7b\xcb\xd9" + "\x9a\xd9\x4e\x18\x06\x23\x9e\xd4" + "\xa5\x20\x98\xef\xb5\xda\xe5\xc0" + "\x8a\x6a\x83\x77\x15\x84\x1e\xae" + "\x78\x94\x9d\xdf\xb7\xd1\xea\x67" + "\xaa\xb0\x14\x15\xfa\x67\x21\x84" + "\xd3\x41\x2a\xce\xba\x4b\x4a\xe8" + "\x95\x62\xa9\x55\xf0\x80\xad\xbd" + "\xab\xaf\xdd\x4f\xa5\x7c\x13\x36" + "\xed\x5e\x4f\x72\xad\x4b\xf1\xd0" + "\x88\x4e\xec\x2c\x88\x10\x5e\xea" + "\x12\xc0\x16\x01\x29\xa3\xa0\x55" + "\xaa\x68\xf3\xe9\x9d\x3b\x0d\x3b" + "\x6d\xec\xf8\xa0\x2d\xf0\x90\x8d" + "\x1c\xe2\x88\xd4\x24\x71\xf9\xb3" + "\xc1\x9f\xc5\xd6\x76\x70\xc5\x2e" + "\x9c\xac\xdb\x90\xbd\x83\x72\xba" + "\x6e\xb5\xa5\x53\x83\xa9\xa5\xbf" + "\x7d\x06\x0e\x3c\x2a\xd2\x04\xb5" + "\x1e\x19\x38\x09\x16\xd2\x82\x1f" + "\x75\x18\x56\xb8\x96\x0b\xa6\xf9" + "\xcf\x62\xd9\x32\x5d\xa9\xd7\x1d" + "\xec\xe4\xdf\x1b\xbe\xf1\x36\xee" + "\xe3\x7b\xb5\x2f\xee\xf8\x53\x3d" + "\x6a\xb7\x70\xa9\xfc\x9c\x57\x25" + "\xf2\x89\x10\xd3\xb8\xa8\x8c\x30" + "\xae\x23\x4f\x0e\x13\x66\x4f\xe1" + "\xb6\xc0\xe4\xf8\xef\x93\xbd\x6e" + "\x15\x85\x6b\xe3\x60\x81\x1d\x68" + "\xd7\x31\x87\x89\x09\xab\xd5\x96" + "\x1d\xf3\x6d\x67\x80\xca\x07\x31" + "\x5d\xa7\xe4\xfb\x3e\xf2\x9b\x33" + "\x52\x18\xc8\x30\xfe\x2d\xca\x1e" + "\x79\x92\x7a\x60\x5c\xb6\x58\x87" + "\xa4\x36\xa2\x67\x92\x8b\xa4\xb7" + "\xf1\x86\xdf\xdc\xc0\x7e\x8f\x63" + "\xd2\xa2\xdc\x78\xeb\x4f\xd8\x96" + "\x47\xca\xb8\x91\xf9\xf7\x94\x21" + "\x5f\x9a\x9f\x5b\xb8\x40\x41\x4b" + "\x66\x69\x6a\x72\xd0\xcb\x70\xb7" + "\x93\xb5\x37\x96\x05\x37\x4f\xe5" + "\x8c\xa7\x5a\x4e\x8b\xb7\x84\xea" + "\xc7\xfc\x19\x6e\x1f\x5a\xa1\xac" + "\x18\x7d\x52\x3b\xb3\x34\x62\x99" + "\xe4\x9e\x31\x04\x3f\xc0\x8d\x84" + "\x17\x7c\x25\x48\x52\x67\x11\x27" + "\x67\xbb\x5a\x85\xca\x56\xb2\x5c" + "\xe6\xec\xd5\x96\x3d\x15\xfc\xfb" + "\x22\x25\xf4\x13\xe5\x93\x4b\x9a" + "\x77\xf1\x52\x18\xfa\x16\x5e\x49" + "\x03\x45\xa8\x08\xfa\xb3\x41\x92" + "\x79\x50\x33\xca\xd0\xd7\x42\x55" + "\xc3\x9a\x0c\x4e\xd9\xa4\x3c\x86" + "\x80\x9f\x53\xd1\xa4\x2e\xd1\xbc" + "\xf1\x54\x6e\x93\xa4\x65\x99\x8e" + "\xdf\x29\xc0\x64\x63\x07\xbb\xea", + .result = "\x15\x97\xd0\x86\x18\x03\x9c\x51" + "\xc5\x11\x36\x62\x13\x92\xe6\x73" + "\x29\x79\xde\xa1\x00\x3e\x08\x64" + "\x17\x1a\xbc\xd5\xfe\x33\x0e\x0c" + "\x7c\x94\xa7\xc6\x3c\xbe\xac\xa2" + "\x89\xe6\xbc\xdf\x0c\x33\x27\x42" + "\x46\x73\x2f\xba\x4e\xa6\x46\x8f" + "\xe4\xee\x39\x63\x42\x65\xa3\x88" + "\x7a\xad\x33\x23\xa9\xa7\x20\x7f" + "\x0b\xe6\x6a\xc3\x60\xda\x9e\xb4" + "\xd6\x07\x8a\x77\x26\xd1\xab\x44" + "\x99\x55\x03\x5e\xed\x8d\x7b\xbd" + "\xc8\x21\xb7\x21\x30\x3f\xc0\xb5" + "\xc8\xec\x6c\x23\xa6\xa3\x6d\xf1" + "\x30\x0a\xd0\xa6\xa9\x28\x69\xae" + "\x2a\xe6\x54\xac\x82\x9d\x6a\x95" + "\x6f\x06\x44\xc5\x5a\x77\x6e\xec" + "\xf8\xf8\x63\xb2\xe6\xaa\xbd\x8e" + "\x0e\x8a\x62\x00\x03\xc8\x84\xdd" + "\x47\x4a\xc3\x55\xba\xb7\xe7\xdf" + "\x08\xbf\x62\xf5\xe8\xbc\xb6\x11" + "\xe4\xcb\xd0\x66\x74\x32\xcf\xd4" + "\xf8\x51\x80\x39\x14\x05\x12\xdb" + "\x87\x93\xe2\x26\x30\x9c\x3a\x21" + "\xe5\xd0\x38\x57\x80\x15\xe4\x08" + "\x58\x05\x49\x7d\xe6\x92\x77\x70" + "\xfb\x1e\x2d\x6a\x84\x00\xc8\x68" + "\xf7\x1a\xdd\xf0\x7b\x38\x1e\xd8" + "\x2c\x78\x78\x61\xcf\xe3\xde\x69" + "\x1f\xd5\x03\xd5\x1a\xb4\xcf\x03" + "\xc8\x7a\x70\x68\x35\xb4\xf6\xbe" + "\x90\x62\xb2\x28\x99\x86\xf5\x44" + "\x99\xeb\x31\xcf\xca\xdf\xd0\x21" + "\xd6\x60\xf7\x0f\x40\xb4\x80\xb7" + "\xab\xe1\x9b\x45\xba\x66\xda\xee" + "\xdd\x04\x12\x40\x98\xe1\x69\xe5" + "\x2b\x9c\x59\x80\xe7\x7b\xcc\x63" + "\xa6\xc0\x3a\xa9\xfe\x8a\xf9\x62" + "\x11\x34\x61\x94\x35\xfe\xf2\x99" + "\xfd\xee\x19\xea\x95\xb6\x12\xbf" + "\x1b\xdf\x02\x1a\xcc\x3e\x7e\x65" + "\x78\x74\x10\x50\x29\x63\x28\xea" + "\x6b\xab\xd4\x06\x4d\x15\x24\x31" + "\xc7\x0a\xc9\x16\xb6\x48\xf0\xbf" + "\x49\xdb\x68\x71\x31\x8f\x87\xe2" + "\x13\x05\x64\xd6\x22\x0c\xf8\x36" + "\x84\x24\x3e\x69\x5e\xb8\x9e\x16" + "\x73\x6c\x83\x1e\xe0\x9f\x9e\xba" + "\xe5\x59\x21\x33\x1b\xa9\x26\xc2" + "\xc7\xd9\x30\x73\xb6\xa6\x73\x82" + "\x19\xfa\x44\x4d\x40\x8b\x69\x04" + "\x94\x74\xea\x6e\xb3\x09\x47\x01" + "\x2a\xb9\x78\x34\x43\x11\xed\xd6" + "\x8c\x95\x65\x1b\x85\x67\xa5\x40" + "\xac\x9c\x05\x4b\x57\x4a\xa9\x96" + "\x0f\xdd\x4f\xa1\xe0\xcf\x6e\xc7" + "\x1b\xed\xa2\xb4\x56\x8c\x09\x6e" + "\xa6\x65\xd7\x55\x81\xb7\xed\x11" + "\x9b\x40\x75\xa8\x6b\x56\xaf\x16" + "\x8b\x3d\xf4\xcb\xfe\xd5\x1d\x3d" + "\x85\xc2\xc0\xde\x43\x39\x4a\x96" + "\xba\x88\x97\xc0\xd6\x00\x0e\x27" + "\x21\xb0\x21\x52\xba\xa7\x37\xaa" + "\xcc\xbf\x95\xa8\xf4\xd0\x91\xf6", + .ilen = 512, + .rlen = 512, + .also_non_np = 1, + .np = 2, + .tap = { 144, 368 }, + } +}; + +static const struct cipher_testvec adiantum_xchacha12_aes_dec_tv_template[] = { + { + .key = "\x9e\xeb\xb2\x49\x3c\x1c\xf5\xf4" + "\x6a\x99\xc2\xc4\xdf\xb1\xf4\xdd" + "\x75\x20\x57\xea\x2c\x4f\xcd\xb2" + "\xa5\x3d\x7b\x49\x1e\xab\xfd\x0f", + .klen = 32, + .iv = "\xdf\x63\xd4\xab\xd2\x49\xf3\xd8" + "\x33\x81\x37\x60\x7d\xfa\x73\x08" + "\xd8\x49\x6d\x80\xe8\x2f\x62\x54" + "\xeb\x0e\xa9\x39\x5b\x45\x7f\x8a", + .result = "\x67\xc9\xf2\x30\x84\x41\x8e\x43" + "\xfb\xf3\xb3\x3e\x79\x36\x7f\xe8", + .input = "\x6d\x32\x86\x18\x67\x86\x0f\x3f" + "\x96\x7c\x9d\x28\x0d\x53\xec\x9f", + .ilen = 16, + .rlen = 16, + .also_non_np = 1, + .np = 2, + .tap = { 14, 2 }, + }, { + .key = "\x36\x2b\x57\x97\xf8\x5d\xcd\x99" + "\x5f\x1a\x5a\x44\x1d\x92\x0f\x27" + "\xcc\x16\xd7\x2b\x85\x63\x99\xd3" + "\xba\x96\xa1\xdb\xd2\x60\x68\xda", + .klen = 32, + .iv = "\xef\x58\x69\xb1\x2c\x5e\x9a\x47" + "\x24\xc1\xb1\x69\xe1\x12\x93\x8f" + "\x43\x3d\x6d\x00\xdb\x5e\xd8\xd9" + "\x12\x9a\xfe\xd9\xff\x2d\xaa\xc4", + .result = "\x5e\xa8\x68\x19\x85\x98\x12\x23" + "\x26\x0a\xcc\xdb\x0a\x04\xb9\xdf" + "\x4d\xb3\x48\x7b\xb0\xe3\xc8\x19" + "\x43\x5a\x46\x06\x94\x2d\xf2", + .input = "\xc7\xc6\xf1\x73\x8f\xc4\xff\x4a" + "\x39\xbe\x78\xbe\x8d\x28\xc8\x89" + "\x46\x63\xe7\x0c\x7d\x87\xe8\x4e" + "\xc9\x18\x7b\xbe\x18\x60\x50", + .ilen = 31, + .rlen = 31, + }, { + .key = "\xa5\x28\x24\x34\x1a\x3c\xd8\xf7" + "\x05\x91\x8f\xee\x85\x1f\x35\x7f" + "\x80\x3d\xfc\x9b\x94\xf6\xfc\x9e" + "\x19\x09\x00\xa9\x04\x31\x4f\x11", + .klen = 32, + .iv = "\xa1\xba\x49\x95\xff\x34\x6d\xb8" + "\xcd\x87\x5d\x5e\xfd\xea\x85\xdb" + "\x8a\x7b\x5e\xb2\x5d\x57\xdd\x62" + "\xac\xa9\x8c\x41\x42\x94\x75\xb7", + .result = "\x69\xb4\xe8\x8c\x37\xe8\x67\x82" + "\xf1\xec\x5d\x04\xe5\x14\x91\x13" + "\xdf\xf2\x87\x1b\x69\x81\x1d\x71" + "\x70\x9e\x9c\x3b\xde\x49\x70\x11" + "\xa0\xa3\xdb\x0d\x54\x4f\x66\x69" + "\xd7\xdb\x80\xa7\x70\x92\x68\xce" + "\x81\x04\x2c\xc6\xab\xae\xe5\x60" + "\x15\xe9\x6f\xef\xaa\x8f\xa7\xa7" + "\x63\x8f\xf2\xf0\x77\xf1\xa8\xea" + "\xe1\xb7\x1f\x9e\xab\x9e\x4b\x3f" + "\x07\x87\x5b\x6f\xcd\xa8\xaf\xb9" + "\xfa\x70\x0b\x52\xb8\xa8\xa7\x9e" + "\x07\x5f\xa6\x0e\xb3\x9b\x79\x13" + "\x79\xc3\x3e\x8d\x1c\x2c\x68\xc8" + "\x51\x1d\x3c\x7b\x7d\x79\x77\x2a" + "\x56\x65\xc5\x54\x23\x28\xb0\x03", + .input = "\x9e\x16\xab\xed\x4b\xa7\x42\x5a" + "\xc6\xfb\x4e\x76\xff\xbe\x03\xa0" + "\x0f\xe3\xad\xba\xe4\x98\x2b\x0e" + "\x21\x48\xa0\xb8\x65\x48\x27\x48" + "\x84\x54\x54\xb2\x9a\x94\x7b\xe6" + "\x4b\x29\xe9\xcf\x05\x91\x80\x1a" + "\x3a\xf3\x41\x96\x85\x1d\x9f\x74" + "\x51\x56\x63\xfa\x7c\x28\x85\x49" + "\xf7\x2f\xf9\xf2\x18\x46\xf5\x33" + "\x80\xa3\x3c\xce\xb2\x57\x93\xf5" + "\xae\xbd\xa9\xf5\x7b\x30\xc4\x93" + "\x66\xe0\x30\x77\x16\xe4\xa0\x31" + "\xba\x70\xbc\x68\x13\xf5\xb0\x9a" + "\xc1\xfc\x7e\xfe\x55\x80\x5c\x48" + "\x74\xa6\xaa\xa3\xac\xdc\xc2\xf5" + "\x8d\xde\x34\x86\x78\x60\x75\x8d", + .ilen = 128, + .rlen = 128, + .also_non_np = 1, + .np = 4, + .tap = { 104, 16, 4, 4 }, + }, { + .key = "\xd3\x81\x72\x18\x23\xff\x6f\x4a" + "\x25\x74\x29\x0d\x51\x8a\x0e\x13" + "\xc1\x53\x5d\x30\x8d\xee\x75\x0d" + "\x14\xd6\x69\xc9\x15\xa9\x0c\x60", + .klen = 32, + .iv = "\x65\x9b\xd4\xa8\x7d\x29\x1d\xf4" + "\xc4\xd6\x9b\x6a\x28\xab\x64\xe2" + "\x62\x81\x97\xc5\x81\xaa\xf9\x44" + "\xc1\x72\x59\x82\xaf\x16\xc8\x2c", + .result = "\xc7\x6b\x52\x6a\x10\xf0\xcc\x09" + "\xc1\x12\x1d\x6d\x21\xa6\x78\xf5" + "\x05\xa3\x69\x60\x91\x36\x98\x57" + "\xba\x0c\x14\xcc\xf3\x2d\x73\x03" + "\xc6\xb2\x5f\xc8\x16\x27\x37\x5d" + "\xd0\x0b\x87\xb2\x50\x94\x7b\x58" + "\x04\xf4\xe0\x7f\x6e\x57\x8e\xc9" + "\x41\x84\xc1\xb1\x7e\x4b\x91\x12" + "\x3a\x8b\x5d\x50\x82\x7b\xcb\xd9" + "\x9a\xd9\x4e\x18\x06\x23\x9e\xd4" + "\xa5\x20\x98\xef\xb5\xda\xe5\xc0" + "\x8a\x6a\x83\x77\x15\x84\x1e\xae" + "\x78\x94\x9d\xdf\xb7\xd1\xea\x67" + "\xaa\xb0\x14\x15\xfa\x67\x21\x84" + "\xd3\x41\x2a\xce\xba\x4b\x4a\xe8" + "\x95\x62\xa9\x55\xf0\x80\xad\xbd" + "\xab\xaf\xdd\x4f\xa5\x7c\x13\x36" + "\xed\x5e\x4f\x72\xad\x4b\xf1\xd0" + "\x88\x4e\xec\x2c\x88\x10\x5e\xea" + "\x12\xc0\x16\x01\x29\xa3\xa0\x55" + "\xaa\x68\xf3\xe9\x9d\x3b\x0d\x3b" + "\x6d\xec\xf8\xa0\x2d\xf0\x90\x8d" + "\x1c\xe2\x88\xd4\x24\x71\xf9\xb3" + "\xc1\x9f\xc5\xd6\x76\x70\xc5\x2e" + "\x9c\xac\xdb\x90\xbd\x83\x72\xba" + "\x6e\xb5\xa5\x53\x83\xa9\xa5\xbf" + "\x7d\x06\x0e\x3c\x2a\xd2\x04\xb5" + "\x1e\x19\x38\x09\x16\xd2\x82\x1f" + "\x75\x18\x56\xb8\x96\x0b\xa6\xf9" + "\xcf\x62\xd9\x32\x5d\xa9\xd7\x1d" + "\xec\xe4\xdf\x1b\xbe\xf1\x36\xee" + "\xe3\x7b\xb5\x2f\xee\xf8\x53\x3d" + "\x6a\xb7\x70\xa9\xfc\x9c\x57\x25" + "\xf2\x89\x10\xd3\xb8\xa8\x8c\x30" + "\xae\x23\x4f\x0e\x13\x66\x4f\xe1" + "\xb6\xc0\xe4\xf8\xef\x93\xbd\x6e" + "\x15\x85\x6b\xe3\x60\x81\x1d\x68" + "\xd7\x31\x87\x89\x09\xab\xd5\x96" + "\x1d\xf3\x6d\x67\x80\xca\x07\x31" + "\x5d\xa7\xe4\xfb\x3e\xf2\x9b\x33" + "\x52\x18\xc8\x30\xfe\x2d\xca\x1e" + "\x79\x92\x7a\x60\x5c\xb6\x58\x87" + "\xa4\x36\xa2\x67\x92\x8b\xa4\xb7" + "\xf1\x86\xdf\xdc\xc0\x7e\x8f\x63" + "\xd2\xa2\xdc\x78\xeb\x4f\xd8\x96" + "\x47\xca\xb8\x91\xf9\xf7\x94\x21" + "\x5f\x9a\x9f\x5b\xb8\x40\x41\x4b" + "\x66\x69\x6a\x72\xd0\xcb\x70\xb7" + "\x93\xb5\x37\x96\x05\x37\x4f\xe5" + "\x8c\xa7\x5a\x4e\x8b\xb7\x84\xea" + "\xc7\xfc\x19\x6e\x1f\x5a\xa1\xac" + "\x18\x7d\x52\x3b\xb3\x34\x62\x99" + "\xe4\x9e\x31\x04\x3f\xc0\x8d\x84" + "\x17\x7c\x25\x48\x52\x67\x11\x27" + "\x67\xbb\x5a\x85\xca\x56\xb2\x5c" + "\xe6\xec\xd5\x96\x3d\x15\xfc\xfb" + "\x22\x25\xf4\x13\xe5\x93\x4b\x9a" + "\x77\xf1\x52\x18\xfa\x16\x5e\x49" + "\x03\x45\xa8\x08\xfa\xb3\x41\x92" + "\x79\x50\x33\xca\xd0\xd7\x42\x55" + "\xc3\x9a\x0c\x4e\xd9\xa4\x3c\x86" + "\x80\x9f\x53\xd1\xa4\x2e\xd1\xbc" + "\xf1\x54\x6e\x93\xa4\x65\x99\x8e" + "\xdf\x29\xc0\x64\x63\x07\xbb\xea", + .input = "\x15\x97\xd0\x86\x18\x03\x9c\x51" + "\xc5\x11\x36\x62\x13\x92\xe6\x73" + "\x29\x79\xde\xa1\x00\x3e\x08\x64" + "\x17\x1a\xbc\xd5\xfe\x33\x0e\x0c" + "\x7c\x94\xa7\xc6\x3c\xbe\xac\xa2" + "\x89\xe6\xbc\xdf\x0c\x33\x27\x42" + "\x46\x73\x2f\xba\x4e\xa6\x46\x8f" + "\xe4\xee\x39\x63\x42\x65\xa3\x88" + "\x7a\xad\x33\x23\xa9\xa7\x20\x7f" + "\x0b\xe6\x6a\xc3\x60\xda\x9e\xb4" + "\xd6\x07\x8a\x77\x26\xd1\xab\x44" + "\x99\x55\x03\x5e\xed\x8d\x7b\xbd" + "\xc8\x21\xb7\x21\x30\x3f\xc0\xb5" + "\xc8\xec\x6c\x23\xa6\xa3\x6d\xf1" + "\x30\x0a\xd0\xa6\xa9\x28\x69\xae" + "\x2a\xe6\x54\xac\x82\x9d\x6a\x95" + "\x6f\x06\x44\xc5\x5a\x77\x6e\xec" + "\xf8\xf8\x63\xb2\xe6\xaa\xbd\x8e" + "\x0e\x8a\x62\x00\x03\xc8\x84\xdd" + "\x47\x4a\xc3\x55\xba\xb7\xe7\xdf" + "\x08\xbf\x62\xf5\xe8\xbc\xb6\x11" + "\xe4\xcb\xd0\x66\x74\x32\xcf\xd4" + "\xf8\x51\x80\x39\x14\x05\x12\xdb" + "\x87\x93\xe2\x26\x30\x9c\x3a\x21" + "\xe5\xd0\x38\x57\x80\x15\xe4\x08" + "\x58\x05\x49\x7d\xe6\x92\x77\x70" + "\xfb\x1e\x2d\x6a\x84\x00\xc8\x68" + "\xf7\x1a\xdd\xf0\x7b\x38\x1e\xd8" + "\x2c\x78\x78\x61\xcf\xe3\xde\x69" + "\x1f\xd5\x03\xd5\x1a\xb4\xcf\x03" + "\xc8\x7a\x70\x68\x35\xb4\xf6\xbe" + "\x90\x62\xb2\x28\x99\x86\xf5\x44" + "\x99\xeb\x31\xcf\xca\xdf\xd0\x21" + "\xd6\x60\xf7\x0f\x40\xb4\x80\xb7" + "\xab\xe1\x9b\x45\xba\x66\xda\xee" + "\xdd\x04\x12\x40\x98\xe1\x69\xe5" + "\x2b\x9c\x59\x80\xe7\x7b\xcc\x63" + "\xa6\xc0\x3a\xa9\xfe\x8a\xf9\x62" + "\x11\x34\x61\x94\x35\xfe\xf2\x99" + "\xfd\xee\x19\xea\x95\xb6\x12\xbf" + "\x1b\xdf\x02\x1a\xcc\x3e\x7e\x65" + "\x78\x74\x10\x50\x29\x63\x28\xea" + "\x6b\xab\xd4\x06\x4d\x15\x24\x31" + "\xc7\x0a\xc9\x16\xb6\x48\xf0\xbf" + "\x49\xdb\x68\x71\x31\x8f\x87\xe2" + "\x13\x05\x64\xd6\x22\x0c\xf8\x36" + "\x84\x24\x3e\x69\x5e\xb8\x9e\x16" + "\x73\x6c\x83\x1e\xe0\x9f\x9e\xba" + "\xe5\x59\x21\x33\x1b\xa9\x26\xc2" + "\xc7\xd9\x30\x73\xb6\xa6\x73\x82" + "\x19\xfa\x44\x4d\x40\x8b\x69\x04" + "\x94\x74\xea\x6e\xb3\x09\x47\x01" + "\x2a\xb9\x78\x34\x43\x11\xed\xd6" + "\x8c\x95\x65\x1b\x85\x67\xa5\x40" + "\xac\x9c\x05\x4b\x57\x4a\xa9\x96" + "\x0f\xdd\x4f\xa1\xe0\xcf\x6e\xc7" + "\x1b\xed\xa2\xb4\x56\x8c\x09\x6e" + "\xa6\x65\xd7\x55\x81\xb7\xed\x11" + "\x9b\x40\x75\xa8\x6b\x56\xaf\x16" + "\x8b\x3d\xf4\xcb\xfe\xd5\x1d\x3d" + "\x85\xc2\xc0\xde\x43\x39\x4a\x96" + "\xba\x88\x97\xc0\xd6\x00\x0e\x27" + "\x21\xb0\x21\x52\xba\xa7\x37\xaa" + "\xcc\xbf\x95\xa8\xf4\xd0\x91\xf6", + .ilen = 512, + .rlen = 512, + .also_non_np = 1, + .np = 2, + .tap = { 144, 368 }, + } +}; + +/* Adiantum with XChaCha20 instead of XChaCha12 */ +/* Test vectors from https://github.com/google/adiantum */ +static const struct cipher_testvec adiantum_xchacha20_aes_enc_tv_template[] = { + { + .key = "\x9e\xeb\xb2\x49\x3c\x1c\xf5\xf4" + "\x6a\x99\xc2\xc4\xdf\xb1\xf4\xdd" + "\x75\x20\x57\xea\x2c\x4f\xcd\xb2" + "\xa5\x3d\x7b\x49\x1e\xab\xfd\x0f", + .klen = 32, + .iv = "\xdf\x63\xd4\xab\xd2\x49\xf3\xd8" + "\x33\x81\x37\x60\x7d\xfa\x73\x08" + "\xd8\x49\x6d\x80\xe8\x2f\x62\x54" + "\xeb\x0e\xa9\x39\x5b\x45\x7f\x8a", + .input = "\x67\xc9\xf2\x30\x84\x41\x8e\x43" + "\xfb\xf3\xb3\x3e\x79\x36\x7f\xe8", + .result = "\xf6\x78\x97\xd6\xaa\x94\x01\x27" + "\x2e\x4d\x83\xe0\x6e\x64\x9a\xdf", + .ilen = 16, + .rlen = 16, + .also_non_np = 1, + .np = 3, + .tap = { 5, 2, 9 }, + }, { + .key = "\x36\x2b\x57\x97\xf8\x5d\xcd\x99" + "\x5f\x1a\x5a\x44\x1d\x92\x0f\x27" + "\xcc\x16\xd7\x2b\x85\x63\x99\xd3" + "\xba\x96\xa1\xdb\xd2\x60\x68\xda", + .klen = 32, + .iv = "\xef\x58\x69\xb1\x2c\x5e\x9a\x47" + "\x24\xc1\xb1\x69\xe1\x12\x93\x8f" + "\x43\x3d\x6d\x00\xdb\x5e\xd8\xd9" + "\x12\x9a\xfe\xd9\xff\x2d\xaa\xc4", + .input = "\x5e\xa8\x68\x19\x85\x98\x12\x23" + "\x26\x0a\xcc\xdb\x0a\x04\xb9\xdf" + "\x4d\xb3\x48\x7b\xb0\xe3\xc8\x19" + "\x43\x5a\x46\x06\x94\x2d\xf2", + .result = "\x4b\xb8\x90\x10\xdf\x7f\x64\x08" + "\x0e\x14\x42\x5f\x00\x74\x09\x36" + "\x57\x72\xb5\xfd\xb5\x5d\xb8\x28" + "\x0c\x04\x91\x14\x91\xe9\x37", + .ilen = 31, + .rlen = 31, + .also_non_np = 1, + .np = 2, + .tap = { 16, 15 }, + }, { + .key = "\xa5\x28\x24\x34\x1a\x3c\xd8\xf7" + "\x05\x91\x8f\xee\x85\x1f\x35\x7f" + "\x80\x3d\xfc\x9b\x94\xf6\xfc\x9e" + "\x19\x09\x00\xa9\x04\x31\x4f\x11", + .klen = 32, + .iv = "\xa1\xba\x49\x95\xff\x34\x6d\xb8" + "\xcd\x87\x5d\x5e\xfd\xea\x85\xdb" + "\x8a\x7b\x5e\xb2\x5d\x57\xdd\x62" + "\xac\xa9\x8c\x41\x42\x94\x75\xb7", + .input = "\x69\xb4\xe8\x8c\x37\xe8\x67\x82" + "\xf1\xec\x5d\x04\xe5\x14\x91\x13" + "\xdf\xf2\x87\x1b\x69\x81\x1d\x71" + "\x70\x9e\x9c\x3b\xde\x49\x70\x11" + "\xa0\xa3\xdb\x0d\x54\x4f\x66\x69" + "\xd7\xdb\x80\xa7\x70\x92\x68\xce" + "\x81\x04\x2c\xc6\xab\xae\xe5\x60" + "\x15\xe9\x6f\xef\xaa\x8f\xa7\xa7" + "\x63\x8f\xf2\xf0\x77\xf1\xa8\xea" + "\xe1\xb7\x1f\x9e\xab\x9e\x4b\x3f" + "\x07\x87\x5b\x6f\xcd\xa8\xaf\xb9" + "\xfa\x70\x0b\x52\xb8\xa8\xa7\x9e" + "\x07\x5f\xa6\x0e\xb3\x9b\x79\x13" + "\x79\xc3\x3e\x8d\x1c\x2c\x68\xc8" + "\x51\x1d\x3c\x7b\x7d\x79\x77\x2a" + "\x56\x65\xc5\x54\x23\x28\xb0\x03", + .result = "\xb1\x8b\xa0\x05\x77\xa8\x4d\x59" + "\x1b\x8e\x21\xfc\x3a\x49\xfa\xd4" + "\xeb\x36\xf3\xc4\xdf\xdc\xae\x67" + "\x07\x3f\x70\x0e\xe9\x66\xf5\x0c" + "\x30\x4d\x66\xc9\xa4\x2f\x73\x9c" + "\x13\xc8\x49\x44\xcc\x0a\x90\x9d" + "\x7c\xdd\x19\x3f\xea\x72\x8d\x58" + "\xab\xe7\x09\x2c\xec\xb5\x44\xd2" + "\xca\xa6\x2d\x7a\x5c\x9c\x2b\x15" + "\xec\x2a\xa6\x69\x91\xf9\xf3\x13" + "\xf7\x72\xc1\xc1\x40\xd5\xe1\x94" + "\xf4\x29\xa1\x3e\x25\x02\xa8\x3e" + "\x94\xc1\x91\x14\xa1\x14\xcb\xbe" + "\x67\x4c\xb9\x38\xfe\xa7\xaa\x32" + "\x29\x62\x0d\xb2\xf6\x3c\x58\x57" + "\xc1\xd5\x5a\xbb\xd6\xa6\x2a\xe5", + .ilen = 128, + .rlen = 128, + .also_non_np = 1, + .np = 4, + .tap = { 112, 7, 8, 1 }, + }, { + .key = "\xd3\x81\x72\x18\x23\xff\x6f\x4a" + "\x25\x74\x29\x0d\x51\x8a\x0e\x13" + "\xc1\x53\x5d\x30\x8d\xee\x75\x0d" + "\x14\xd6\x69\xc9\x15\xa9\x0c\x60", + .klen = 32, + .iv = "\x65\x9b\xd4\xa8\x7d\x29\x1d\xf4" + "\xc4\xd6\x9b\x6a\x28\xab\x64\xe2" + "\x62\x81\x97\xc5\x81\xaa\xf9\x44" + "\xc1\x72\x59\x82\xaf\x16\xc8\x2c", + .input = "\xc7\x6b\x52\x6a\x10\xf0\xcc\x09" + "\xc1\x12\x1d\x6d\x21\xa6\x78\xf5" + "\x05\xa3\x69\x60\x91\x36\x98\x57" + "\xba\x0c\x14\xcc\xf3\x2d\x73\x03" + "\xc6\xb2\x5f\xc8\x16\x27\x37\x5d" + "\xd0\x0b\x87\xb2\x50\x94\x7b\x58" + "\x04\xf4\xe0\x7f\x6e\x57\x8e\xc9" + "\x41\x84\xc1\xb1\x7e\x4b\x91\x12" + "\x3a\x8b\x5d\x50\x82\x7b\xcb\xd9" + "\x9a\xd9\x4e\x18\x06\x23\x9e\xd4" + "\xa5\x20\x98\xef\xb5\xda\xe5\xc0" + "\x8a\x6a\x83\x77\x15\x84\x1e\xae" + "\x78\x94\x9d\xdf\xb7\xd1\xea\x67" + "\xaa\xb0\x14\x15\xfa\x67\x21\x84" + "\xd3\x41\x2a\xce\xba\x4b\x4a\xe8" + "\x95\x62\xa9\x55\xf0\x80\xad\xbd" + "\xab\xaf\xdd\x4f\xa5\x7c\x13\x36" + "\xed\x5e\x4f\x72\xad\x4b\xf1\xd0" + "\x88\x4e\xec\x2c\x88\x10\x5e\xea" + "\x12\xc0\x16\x01\x29\xa3\xa0\x55" + "\xaa\x68\xf3\xe9\x9d\x3b\x0d\x3b" + "\x6d\xec\xf8\xa0\x2d\xf0\x90\x8d" + "\x1c\xe2\x88\xd4\x24\x71\xf9\xb3" + "\xc1\x9f\xc5\xd6\x76\x70\xc5\x2e" + "\x9c\xac\xdb\x90\xbd\x83\x72\xba" + "\x6e\xb5\xa5\x53\x83\xa9\xa5\xbf" + "\x7d\x06\x0e\x3c\x2a\xd2\x04\xb5" + "\x1e\x19\x38\x09\x16\xd2\x82\x1f" + "\x75\x18\x56\xb8\x96\x0b\xa6\xf9" + "\xcf\x62\xd9\x32\x5d\xa9\xd7\x1d" + "\xec\xe4\xdf\x1b\xbe\xf1\x36\xee" + "\xe3\x7b\xb5\x2f\xee\xf8\x53\x3d" + "\x6a\xb7\x70\xa9\xfc\x9c\x57\x25" + "\xf2\x89\x10\xd3\xb8\xa8\x8c\x30" + "\xae\x23\x4f\x0e\x13\x66\x4f\xe1" + "\xb6\xc0\xe4\xf8\xef\x93\xbd\x6e" + "\x15\x85\x6b\xe3\x60\x81\x1d\x68" + "\xd7\x31\x87\x89\x09\xab\xd5\x96" + "\x1d\xf3\x6d\x67\x80\xca\x07\x31" + "\x5d\xa7\xe4\xfb\x3e\xf2\x9b\x33" + "\x52\x18\xc8\x30\xfe\x2d\xca\x1e" + "\x79\x92\x7a\x60\x5c\xb6\x58\x87" + "\xa4\x36\xa2\x67\x92\x8b\xa4\xb7" + "\xf1\x86\xdf\xdc\xc0\x7e\x8f\x63" + "\xd2\xa2\xdc\x78\xeb\x4f\xd8\x96" + "\x47\xca\xb8\x91\xf9\xf7\x94\x21" + "\x5f\x9a\x9f\x5b\xb8\x40\x41\x4b" + "\x66\x69\x6a\x72\xd0\xcb\x70\xb7" + "\x93\xb5\x37\x96\x05\x37\x4f\xe5" + "\x8c\xa7\x5a\x4e\x8b\xb7\x84\xea" + "\xc7\xfc\x19\x6e\x1f\x5a\xa1\xac" + "\x18\x7d\x52\x3b\xb3\x34\x62\x99" + "\xe4\x9e\x31\x04\x3f\xc0\x8d\x84" + "\x17\x7c\x25\x48\x52\x67\x11\x27" + "\x67\xbb\x5a\x85\xca\x56\xb2\x5c" + "\xe6\xec\xd5\x96\x3d\x15\xfc\xfb" + "\x22\x25\xf4\x13\xe5\x93\x4b\x9a" + "\x77\xf1\x52\x18\xfa\x16\x5e\x49" + "\x03\x45\xa8\x08\xfa\xb3\x41\x92" + "\x79\x50\x33\xca\xd0\xd7\x42\x55" + "\xc3\x9a\x0c\x4e\xd9\xa4\x3c\x86" + "\x80\x9f\x53\xd1\xa4\x2e\xd1\xbc" + "\xf1\x54\x6e\x93\xa4\x65\x99\x8e" + "\xdf\x29\xc0\x64\x63\x07\xbb\xea", + .result = "\xe0\x33\xf6\xe0\xb4\xa5\xdd\x2b" + "\xdd\xce\xfc\x12\x1e\xfc\x2d\xf2" + "\x8b\xc7\xeb\xc1\xc4\x2a\xe8\x44" + "\x0f\x3d\x97\x19\x2e\x6d\xa2\x38" + "\x9d\xa6\xaa\xe1\x96\xb9\x08\xe8" + "\x0b\x70\x48\x5c\xed\xb5\x9b\xcb" + "\x8b\x40\x88\x7e\x69\x73\xf7\x16" + "\x71\xbb\x5b\xfc\xa3\x47\x5d\xa6" + "\xae\x3a\x64\xc4\xe7\xb8\xa8\xe7" + "\xb1\x32\x19\xdb\xe3\x01\xb8\xf0" + "\xa4\x86\xb4\x4c\xc2\xde\x5c\xd2" + "\x6c\x77\xd2\xe8\x18\xb7\x0a\xc9" + "\x3d\x53\xb5\xc4\x5c\xf0\x8c\x06" + "\xdc\x90\xe0\x74\x47\x1b\x0b\xf6" + "\xd2\x71\x6b\xc4\xf1\x97\x00\x2d" + "\x63\x57\x44\x1f\x8c\xf4\xe6\x9b" + "\xe0\x7a\xdd\xec\x32\x73\x42\x32" + "\x7f\x35\x67\x60\x0d\xcf\x10\x52" + "\x61\x22\x53\x8d\x8e\xbb\x33\x76" + "\x59\xd9\x10\xce\xdf\xef\xc0\x41" + "\xd5\x33\x29\x6a\xda\x46\xa4\x51" + "\xf0\x99\x3d\x96\x31\xdd\xb5\xcb" + "\x3e\x2a\x1f\xc7\x5c\x79\xd3\xc5" + "\x20\xa1\xb1\x39\x1b\xc6\x0a\x70" + "\x26\x39\x95\x07\xad\x7a\xc9\x69" + "\xfe\x81\xc7\x88\x08\x38\xaf\xad" + "\x9e\x8d\xfb\xe8\x24\x0d\x22\xb8" + "\x0e\xed\xbe\x37\x53\x7c\xa6\xc6" + "\x78\x62\xec\xa3\x59\xd9\xc6\x9d" + "\xb8\x0e\x69\x77\x84\x2d\x6a\x4c" + "\xc5\xd9\xb2\xa0\x2b\xa8\x80\xcc" + "\xe9\x1e\x9c\x5a\xc4\xa1\xb2\x37" + "\x06\x9b\x30\x32\x67\xf7\xe7\xd2" + "\x42\xc7\xdf\x4e\xd4\xcb\xa0\x12" + "\x94\xa1\x34\x85\x93\x50\x4b\x0a" + "\x3c\x7d\x49\x25\x01\x41\x6b\x96" + "\xa9\x12\xbb\x0b\xc0\xd7\xd0\x93" + "\x1f\x70\x38\xb8\x21\xee\xf6\xa7" + "\xee\xeb\xe7\x81\xa4\x13\xb4\x87" + "\xfa\xc1\xb0\xb5\x37\x8b\x74\xa2" + "\x4e\xc7\xc2\xad\x3d\x62\x3f\xf8" + "\x34\x42\xe5\xae\x45\x13\x63\xfe" + "\xfc\x2a\x17\x46\x61\xa9\xd3\x1c" + "\x4c\xaf\xf0\x09\x62\x26\x66\x1e" + "\x74\xcf\xd6\x68\x3d\x7d\xd8\xb7" + "\xe7\xe6\xf8\xf0\x08\x20\xf7\x47" + "\x1c\x52\xaa\x0f\x3e\x21\xa3\xf2" + "\xbf\x2f\x95\x16\xa8\xc8\xc8\x8c" + "\x99\x0f\x5d\xfb\xfa\x2b\x58\x8a" + "\x7e\xd6\x74\x02\x60\xf0\xd0\x5b" + "\x65\xa8\xac\xea\x8d\x68\x46\x34" + "\x26\x9d\x4f\xb1\x9a\x8e\xc0\x1a" + "\xf1\xed\xc6\x7a\x83\xfd\x8a\x57" + "\xf2\xe6\xe4\xba\xfc\xc6\x3c\xad" + "\x5b\x19\x50\x2f\x3a\xcc\x06\x46" + "\x04\x51\x3f\x91\x97\xf0\xd2\x07" + "\xe7\x93\x89\x7e\xb5\x32\x0f\x03" + "\xe5\x58\x9e\x74\x72\xeb\xc2\x38" + "\x00\x0c\x91\x72\x69\xed\x7d\x6d" + "\xc8\x71\xf0\xec\xff\x80\xd9\x1c" + "\x9e\xd2\xfa\x15\xfc\x6c\x4e\xbc" + "\xb1\xa6\xbd\xbd\x70\x40\xca\x20" + "\xb8\x78\xd2\xa3\xc6\xf3\x79\x9c" + "\xc7\x27\xe1\x6a\x29\xad\xa4\x03", + .ilen = 512, + .rlen = 512, + } +}; + +static const struct cipher_testvec adiantum_xchacha20_aes_dec_tv_template[] = { + { + .key = "\x9e\xeb\xb2\x49\x3c\x1c\xf5\xf4" + "\x6a\x99\xc2\xc4\xdf\xb1\xf4\xdd" + "\x75\x20\x57\xea\x2c\x4f\xcd\xb2" + "\xa5\x3d\x7b\x49\x1e\xab\xfd\x0f", + .klen = 32, + .iv = "\xdf\x63\xd4\xab\xd2\x49\xf3\xd8" + "\x33\x81\x37\x60\x7d\xfa\x73\x08" + "\xd8\x49\x6d\x80\xe8\x2f\x62\x54" + "\xeb\x0e\xa9\x39\x5b\x45\x7f\x8a", + .result = "\x67\xc9\xf2\x30\x84\x41\x8e\x43" + "\xfb\xf3\xb3\x3e\x79\x36\x7f\xe8", + .input = "\xf6\x78\x97\xd6\xaa\x94\x01\x27" + "\x2e\x4d\x83\xe0\x6e\x64\x9a\xdf", + .ilen = 16, + .rlen = 16, + .also_non_np = 1, + .np = 3, + .tap = { 5, 2, 9 }, + }, { + .key = "\x36\x2b\x57\x97\xf8\x5d\xcd\x99" + "\x5f\x1a\x5a\x44\x1d\x92\x0f\x27" + "\xcc\x16\xd7\x2b\x85\x63\x99\xd3" + "\xba\x96\xa1\xdb\xd2\x60\x68\xda", + .klen = 32, + .iv = "\xef\x58\x69\xb1\x2c\x5e\x9a\x47" + "\x24\xc1\xb1\x69\xe1\x12\x93\x8f" + "\x43\x3d\x6d\x00\xdb\x5e\xd8\xd9" + "\x12\x9a\xfe\xd9\xff\x2d\xaa\xc4", + .result = "\x5e\xa8\x68\x19\x85\x98\x12\x23" + "\x26\x0a\xcc\xdb\x0a\x04\xb9\xdf" + "\x4d\xb3\x48\x7b\xb0\xe3\xc8\x19" + "\x43\x5a\x46\x06\x94\x2d\xf2", + .input = "\x4b\xb8\x90\x10\xdf\x7f\x64\x08" + "\x0e\x14\x42\x5f\x00\x74\x09\x36" + "\x57\x72\xb5\xfd\xb5\x5d\xb8\x28" + "\x0c\x04\x91\x14\x91\xe9\x37", + .ilen = 31, + .rlen = 31, + .also_non_np = 1, + .np = 2, + .tap = { 16, 15 }, + }, { + .key = "\xa5\x28\x24\x34\x1a\x3c\xd8\xf7" + "\x05\x91\x8f\xee\x85\x1f\x35\x7f" + "\x80\x3d\xfc\x9b\x94\xf6\xfc\x9e" + "\x19\x09\x00\xa9\x04\x31\x4f\x11", + .klen = 32, + .iv = "\xa1\xba\x49\x95\xff\x34\x6d\xb8" + "\xcd\x87\x5d\x5e\xfd\xea\x85\xdb" + "\x8a\x7b\x5e\xb2\x5d\x57\xdd\x62" + "\xac\xa9\x8c\x41\x42\x94\x75\xb7", + .result = "\x69\xb4\xe8\x8c\x37\xe8\x67\x82" + "\xf1\xec\x5d\x04\xe5\x14\x91\x13" + "\xdf\xf2\x87\x1b\x69\x81\x1d\x71" + "\x70\x9e\x9c\x3b\xde\x49\x70\x11" + "\xa0\xa3\xdb\x0d\x54\x4f\x66\x69" + "\xd7\xdb\x80\xa7\x70\x92\x68\xce" + "\x81\x04\x2c\xc6\xab\xae\xe5\x60" + "\x15\xe9\x6f\xef\xaa\x8f\xa7\xa7" + "\x63\x8f\xf2\xf0\x77\xf1\xa8\xea" + "\xe1\xb7\x1f\x9e\xab\x9e\x4b\x3f" + "\x07\x87\x5b\x6f\xcd\xa8\xaf\xb9" + "\xfa\x70\x0b\x52\xb8\xa8\xa7\x9e" + "\x07\x5f\xa6\x0e\xb3\x9b\x79\x13" + "\x79\xc3\x3e\x8d\x1c\x2c\x68\xc8" + "\x51\x1d\x3c\x7b\x7d\x79\x77\x2a" + "\x56\x65\xc5\x54\x23\x28\xb0\x03", + .input = "\xb1\x8b\xa0\x05\x77\xa8\x4d\x59" + "\x1b\x8e\x21\xfc\x3a\x49\xfa\xd4" + "\xeb\x36\xf3\xc4\xdf\xdc\xae\x67" + "\x07\x3f\x70\x0e\xe9\x66\xf5\x0c" + "\x30\x4d\x66\xc9\xa4\x2f\x73\x9c" + "\x13\xc8\x49\x44\xcc\x0a\x90\x9d" + "\x7c\xdd\x19\x3f\xea\x72\x8d\x58" + "\xab\xe7\x09\x2c\xec\xb5\x44\xd2" + "\xca\xa6\x2d\x7a\x5c\x9c\x2b\x15" + "\xec\x2a\xa6\x69\x91\xf9\xf3\x13" + "\xf7\x72\xc1\xc1\x40\xd5\xe1\x94" + "\xf4\x29\xa1\x3e\x25\x02\xa8\x3e" + "\x94\xc1\x91\x14\xa1\x14\xcb\xbe" + "\x67\x4c\xb9\x38\xfe\xa7\xaa\x32" + "\x29\x62\x0d\xb2\xf6\x3c\x58\x57" + "\xc1\xd5\x5a\xbb\xd6\xa6\x2a\xe5", + .ilen = 128, + .rlen = 128, + .also_non_np = 1, + .np = 4, + .tap = { 112, 7, 8, 1 }, + }, { + .key = "\xd3\x81\x72\x18\x23\xff\x6f\x4a" + "\x25\x74\x29\x0d\x51\x8a\x0e\x13" + "\xc1\x53\x5d\x30\x8d\xee\x75\x0d" + "\x14\xd6\x69\xc9\x15\xa9\x0c\x60", + .klen = 32, + .iv = "\x65\x9b\xd4\xa8\x7d\x29\x1d\xf4" + "\xc4\xd6\x9b\x6a\x28\xab\x64\xe2" + "\x62\x81\x97\xc5\x81\xaa\xf9\x44" + "\xc1\x72\x59\x82\xaf\x16\xc8\x2c", + .result = "\xc7\x6b\x52\x6a\x10\xf0\xcc\x09" + "\xc1\x12\x1d\x6d\x21\xa6\x78\xf5" + "\x05\xa3\x69\x60\x91\x36\x98\x57" + "\xba\x0c\x14\xcc\xf3\x2d\x73\x03" + "\xc6\xb2\x5f\xc8\x16\x27\x37\x5d" + "\xd0\x0b\x87\xb2\x50\x94\x7b\x58" + "\x04\xf4\xe0\x7f\x6e\x57\x8e\xc9" + "\x41\x84\xc1\xb1\x7e\x4b\x91\x12" + "\x3a\x8b\x5d\x50\x82\x7b\xcb\xd9" + "\x9a\xd9\x4e\x18\x06\x23\x9e\xd4" + "\xa5\x20\x98\xef\xb5\xda\xe5\xc0" + "\x8a\x6a\x83\x77\x15\x84\x1e\xae" + "\x78\x94\x9d\xdf\xb7\xd1\xea\x67" + "\xaa\xb0\x14\x15\xfa\x67\x21\x84" + "\xd3\x41\x2a\xce\xba\x4b\x4a\xe8" + "\x95\x62\xa9\x55\xf0\x80\xad\xbd" + "\xab\xaf\xdd\x4f\xa5\x7c\x13\x36" + "\xed\x5e\x4f\x72\xad\x4b\xf1\xd0" + "\x88\x4e\xec\x2c\x88\x10\x5e\xea" + "\x12\xc0\x16\x01\x29\xa3\xa0\x55" + "\xaa\x68\xf3\xe9\x9d\x3b\x0d\x3b" + "\x6d\xec\xf8\xa0\x2d\xf0\x90\x8d" + "\x1c\xe2\x88\xd4\x24\x71\xf9\xb3" + "\xc1\x9f\xc5\xd6\x76\x70\xc5\x2e" + "\x9c\xac\xdb\x90\xbd\x83\x72\xba" + "\x6e\xb5\xa5\x53\x83\xa9\xa5\xbf" + "\x7d\x06\x0e\x3c\x2a\xd2\x04\xb5" + "\x1e\x19\x38\x09\x16\xd2\x82\x1f" + "\x75\x18\x56\xb8\x96\x0b\xa6\xf9" + "\xcf\x62\xd9\x32\x5d\xa9\xd7\x1d" + "\xec\xe4\xdf\x1b\xbe\xf1\x36\xee" + "\xe3\x7b\xb5\x2f\xee\xf8\x53\x3d" + "\x6a\xb7\x70\xa9\xfc\x9c\x57\x25" + "\xf2\x89\x10\xd3\xb8\xa8\x8c\x30" + "\xae\x23\x4f\x0e\x13\x66\x4f\xe1" + "\xb6\xc0\xe4\xf8\xef\x93\xbd\x6e" + "\x15\x85\x6b\xe3\x60\x81\x1d\x68" + "\xd7\x31\x87\x89\x09\xab\xd5\x96" + "\x1d\xf3\x6d\x67\x80\xca\x07\x31" + "\x5d\xa7\xe4\xfb\x3e\xf2\x9b\x33" + "\x52\x18\xc8\x30\xfe\x2d\xca\x1e" + "\x79\x92\x7a\x60\x5c\xb6\x58\x87" + "\xa4\x36\xa2\x67\x92\x8b\xa4\xb7" + "\xf1\x86\xdf\xdc\xc0\x7e\x8f\x63" + "\xd2\xa2\xdc\x78\xeb\x4f\xd8\x96" + "\x47\xca\xb8\x91\xf9\xf7\x94\x21" + "\x5f\x9a\x9f\x5b\xb8\x40\x41\x4b" + "\x66\x69\x6a\x72\xd0\xcb\x70\xb7" + "\x93\xb5\x37\x96\x05\x37\x4f\xe5" + "\x8c\xa7\x5a\x4e\x8b\xb7\x84\xea" + "\xc7\xfc\x19\x6e\x1f\x5a\xa1\xac" + "\x18\x7d\x52\x3b\xb3\x34\x62\x99" + "\xe4\x9e\x31\x04\x3f\xc0\x8d\x84" + "\x17\x7c\x25\x48\x52\x67\x11\x27" + "\x67\xbb\x5a\x85\xca\x56\xb2\x5c" + "\xe6\xec\xd5\x96\x3d\x15\xfc\xfb" + "\x22\x25\xf4\x13\xe5\x93\x4b\x9a" + "\x77\xf1\x52\x18\xfa\x16\x5e\x49" + "\x03\x45\xa8\x08\xfa\xb3\x41\x92" + "\x79\x50\x33\xca\xd0\xd7\x42\x55" + "\xc3\x9a\x0c\x4e\xd9\xa4\x3c\x86" + "\x80\x9f\x53\xd1\xa4\x2e\xd1\xbc" + "\xf1\x54\x6e\x93\xa4\x65\x99\x8e" + "\xdf\x29\xc0\x64\x63\x07\xbb\xea", + .input = "\xe0\x33\xf6\xe0\xb4\xa5\xdd\x2b" + "\xdd\xce\xfc\x12\x1e\xfc\x2d\xf2" + "\x8b\xc7\xeb\xc1\xc4\x2a\xe8\x44" + "\x0f\x3d\x97\x19\x2e\x6d\xa2\x38" + "\x9d\xa6\xaa\xe1\x96\xb9\x08\xe8" + "\x0b\x70\x48\x5c\xed\xb5\x9b\xcb" + "\x8b\x40\x88\x7e\x69\x73\xf7\x16" + "\x71\xbb\x5b\xfc\xa3\x47\x5d\xa6" + "\xae\x3a\x64\xc4\xe7\xb8\xa8\xe7" + "\xb1\x32\x19\xdb\xe3\x01\xb8\xf0" + "\xa4\x86\xb4\x4c\xc2\xde\x5c\xd2" + "\x6c\x77\xd2\xe8\x18\xb7\x0a\xc9" + "\x3d\x53\xb5\xc4\x5c\xf0\x8c\x06" + "\xdc\x90\xe0\x74\x47\x1b\x0b\xf6" + "\xd2\x71\x6b\xc4\xf1\x97\x00\x2d" + "\x63\x57\x44\x1f\x8c\xf4\xe6\x9b" + "\xe0\x7a\xdd\xec\x32\x73\x42\x32" + "\x7f\x35\x67\x60\x0d\xcf\x10\x52" + "\x61\x22\x53\x8d\x8e\xbb\x33\x76" + "\x59\xd9\x10\xce\xdf\xef\xc0\x41" + "\xd5\x33\x29\x6a\xda\x46\xa4\x51" + "\xf0\x99\x3d\x96\x31\xdd\xb5\xcb" + "\x3e\x2a\x1f\xc7\x5c\x79\xd3\xc5" + "\x20\xa1\xb1\x39\x1b\xc6\x0a\x70" + "\x26\x39\x95\x07\xad\x7a\xc9\x69" + "\xfe\x81\xc7\x88\x08\x38\xaf\xad" + "\x9e\x8d\xfb\xe8\x24\x0d\x22\xb8" + "\x0e\xed\xbe\x37\x53\x7c\xa6\xc6" + "\x78\x62\xec\xa3\x59\xd9\xc6\x9d" + "\xb8\x0e\x69\x77\x84\x2d\x6a\x4c" + "\xc5\xd9\xb2\xa0\x2b\xa8\x80\xcc" + "\xe9\x1e\x9c\x5a\xc4\xa1\xb2\x37" + "\x06\x9b\x30\x32\x67\xf7\xe7\xd2" + "\x42\xc7\xdf\x4e\xd4\xcb\xa0\x12" + "\x94\xa1\x34\x85\x93\x50\x4b\x0a" + "\x3c\x7d\x49\x25\x01\x41\x6b\x96" + "\xa9\x12\xbb\x0b\xc0\xd7\xd0\x93" + "\x1f\x70\x38\xb8\x21\xee\xf6\xa7" + "\xee\xeb\xe7\x81\xa4\x13\xb4\x87" + "\xfa\xc1\xb0\xb5\x37\x8b\x74\xa2" + "\x4e\xc7\xc2\xad\x3d\x62\x3f\xf8" + "\x34\x42\xe5\xae\x45\x13\x63\xfe" + "\xfc\x2a\x17\x46\x61\xa9\xd3\x1c" + "\x4c\xaf\xf0\x09\x62\x26\x66\x1e" + "\x74\xcf\xd6\x68\x3d\x7d\xd8\xb7" + "\xe7\xe6\xf8\xf0\x08\x20\xf7\x47" + "\x1c\x52\xaa\x0f\x3e\x21\xa3\xf2" + "\xbf\x2f\x95\x16\xa8\xc8\xc8\x8c" + "\x99\x0f\x5d\xfb\xfa\x2b\x58\x8a" + "\x7e\xd6\x74\x02\x60\xf0\xd0\x5b" + "\x65\xa8\xac\xea\x8d\x68\x46\x34" + "\x26\x9d\x4f\xb1\x9a\x8e\xc0\x1a" + "\xf1\xed\xc6\x7a\x83\xfd\x8a\x57" + "\xf2\xe6\xe4\xba\xfc\xc6\x3c\xad" + "\x5b\x19\x50\x2f\x3a\xcc\x06\x46" + "\x04\x51\x3f\x91\x97\xf0\xd2\x07" + "\xe7\x93\x89\x7e\xb5\x32\x0f\x03" + "\xe5\x58\x9e\x74\x72\xeb\xc2\x38" + "\x00\x0c\x91\x72\x69\xed\x7d\x6d" + "\xc8\x71\xf0\xec\xff\x80\xd9\x1c" + "\x9e\xd2\xfa\x15\xfc\x6c\x4e\xbc" + "\xb1\xa6\xbd\xbd\x70\x40\xca\x20" + "\xb8\x78\xd2\xa3\xc6\xf3\x79\x9c" + "\xc7\x27\xe1\x6a\x29\xad\xa4\x03", + .ilen = 512, + .rlen = 512, + } +}; + /* * CTS (Cipher Text Stealing) mode tests */ -- GitLab From 34ed7fcd93eedf490bc87812985b6b22e8b4d435 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 26 Nov 2018 11:27:37 -0800 Subject: [PATCH 1965/2269] BACKPORT, FROMGIT: fscrypt: add Adiantum support Add support for the Adiantum encryption mode to fscrypt. Adiantum is a tweakable, length-preserving encryption mode with security provably reducible to that of XChaCha12 and AES-256, subject to a security bound. It's also a true wide-block mode, unlike XTS. See the paper "Adiantum: length-preserving encryption for entry-level processors" (https://eprint.iacr.org/2018/720.pdf) for more details. Also see commit 059c2a4d8e16 ("crypto: adiantum - add Adiantum support"). On sufficiently long messages, Adiantum's bottlenecks are XChaCha12 and the NH hash function. These algorithms are fast even on processors without dedicated crypto instructions. Adiantum makes it feasible to enable storage encryption on low-end mobile devices that lack AES instructions; currently such devices are unencrypted. On ARM Cortex-A7, on 4096-byte messages Adiantum encryption is about 4 times faster than AES-256-XTS encryption; decryption is about 5 times faster. In fscrypt, Adiantum is suitable for encrypting both file contents and names. With filenames, it fixes a known weakness: when two filenames in a directory share a common prefix of >= 16 bytes, with CTS-CBC their encrypted filenames share a common prefix too, leaking information. Adiantum does not have this problem. Since Adiantum also accepts long tweaks (IVs), it's also safe to use the master key directly for Adiantum encryption rather than deriving per-file keys, provided that the per-file nonce is included in the IVs and the master key isn't used for any other encryption mode. This configuration saves memory and improves performance. A new fscrypt policy flag is added to allow users to opt-in to this configuration. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o (cherry picked from commit 889645b87e96cecbdf7d76ab86447d1f1c6b41d3 https://git.kernel.org/pub/scm/linux/kernel/git/tytso/fscrypt.git master) Conflicts: Documentation/filesystems/fscrypt.rst include/uapi/linux/fs.h Bug: 112008522 Test: For regression testing, built the kernel for x86_64 KVM and ran the encryption xfstests using kvm-xfstests: kvm-xfstests -c ext4,f2fs -g encrypt Tests for the Adiantum mode and "direct key" specifically aren't yet included in xfstests, but I also tried it manually with the following (run in the kvm-xfstests test appliance): cd / umount /vdc &> /dev/null mkfs.f2fs -O encrypt -f /dev/vdc mount /vdc cd /vdc rm -rf edir mkdir edir . ~/xfstests/common/encrypt KEYCTL_PROG=keyctl FSTYP=fscrypt _new_session_keyring k=$(_generate_encryption_key) xfs_io -c "set_encpolicy -c 9 -n 9 -f 0x4 $k" edir/ cp -a /usr edir/ diff -r /usr edir/usr/ dmesg should show that Adiantum is being used: fscrypt: Adiantum using implementation "adiantum(xchacha12-generic,aes-aesni,nhpoly1305-generic)" Change-Id: I29ffaa7ef9cbd23d2f6ed428814c607227241ce9 Signed-off-by: Eric Biggers --- Documentation/filesystems/fscrypt.rst | 641 ++++++++++++++++++++++++++ fs/crypto/crypto.c | 28 +- fs/crypto/fname.c | 22 +- fs/crypto/fscrypt_private.h | 67 ++- fs/crypto/keyinfo.c | 351 ++++++++++---- fs/crypto/policy.c | 5 +- include/uapi/linux/fs.h | 8 +- 7 files changed, 1009 insertions(+), 113 deletions(-) create mode 100644 Documentation/filesystems/fscrypt.rst diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst new file mode 100644 index 0000000000000..3a7b60521b94a --- /dev/null +++ b/Documentation/filesystems/fscrypt.rst @@ -0,0 +1,641 @@ +===================================== +Filesystem-level encryption (fscrypt) +===================================== + +Introduction +============ + +fscrypt is a library which filesystems can hook into to support +transparent encryption of files and directories. + +Note: "fscrypt" in this document refers to the kernel-level portion, +implemented in ``fs/crypto/``, as opposed to the userspace tool +`fscrypt `_. This document only +covers the kernel-level portion. For command-line examples of how to +use encryption, see the documentation for the userspace tool `fscrypt +`_. Also, it is recommended to use +the fscrypt userspace tool, or other existing userspace tools such as +`fscryptctl `_ or `Android's key +management system +`_, over +using the kernel's API directly. Using existing tools reduces the +chance of introducing your own security bugs. (Nevertheless, for +completeness this documentation covers the kernel's API anyway.) + +Unlike dm-crypt, fscrypt operates at the filesystem level rather than +at the block device level. This allows it to encrypt different files +with different keys and to have unencrypted files on the same +filesystem. This is useful for multi-user systems where each user's +data-at-rest needs to be cryptographically isolated from the others. +However, except for filenames, fscrypt does not encrypt filesystem +metadata. + +Unlike eCryptfs, which is a stacked filesystem, fscrypt is integrated +directly into supported filesystems --- currently ext4, F2FS, and +UBIFS. This allows encrypted files to be read and written without +caching both the decrypted and encrypted pages in the pagecache, +thereby nearly halving the memory used and bringing it in line with +unencrypted files. Similarly, half as many dentries and inodes are +needed. eCryptfs also limits encrypted filenames to 143 bytes, +causing application compatibility issues; fscrypt allows the full 255 +bytes (NAME_MAX). Finally, unlike eCryptfs, the fscrypt API can be +used by unprivileged users, with no need to mount anything. + +fscrypt does not support encrypting files in-place. Instead, it +supports marking an empty directory as encrypted. Then, after +userspace provides the key, all regular files, directories, and +symbolic links created in that directory tree are transparently +encrypted. + +Threat model +============ + +Offline attacks +--------------- + +Provided that userspace chooses a strong encryption key, fscrypt +protects the confidentiality of file contents and filenames in the +event of a single point-in-time permanent offline compromise of the +block device content. fscrypt does not protect the confidentiality of +non-filename metadata, e.g. file sizes, file permissions, file +timestamps, and extended attributes. Also, the existence and location +of holes (unallocated blocks which logically contain all zeroes) in +files is not protected. + +fscrypt is not guaranteed to protect confidentiality or authenticity +if an attacker is able to manipulate the filesystem offline prior to +an authorized user later accessing the filesystem. + +Online attacks +-------------- + +fscrypt (and storage encryption in general) can only provide limited +protection, if any at all, against online attacks. In detail: + +fscrypt is only resistant to side-channel attacks, such as timing or +electromagnetic attacks, to the extent that the underlying Linux +Cryptographic API algorithms are. If a vulnerable algorithm is used, +such as a table-based implementation of AES, it may be possible for an +attacker to mount a side channel attack against the online system. +Side channel attacks may also be mounted against applications +consuming decrypted data. + +After an encryption key has been provided, fscrypt is not designed to +hide the plaintext file contents or filenames from other users on the +same system, regardless of the visibility of the keyring key. +Instead, existing access control mechanisms such as file mode bits, +POSIX ACLs, LSMs, or mount namespaces should be used for this purpose. +Also note that as long as the encryption keys are *anywhere* in +memory, an online attacker can necessarily compromise them by mounting +a physical attack or by exploiting any kernel security vulnerability +which provides an arbitrary memory read primitive. + +While it is ostensibly possible to "evict" keys from the system, +recently accessed encrypted files will remain accessible at least +until the filesystem is unmounted or the VFS caches are dropped, e.g. +using ``echo 2 > /proc/sys/vm/drop_caches``. Even after that, if the +RAM is compromised before being powered off, it will likely still be +possible to recover portions of the plaintext file contents, if not +some of the encryption keys as well. (Since Linux v4.12, all +in-kernel keys related to fscrypt are sanitized before being freed. +However, userspace would need to do its part as well.) + +Currently, fscrypt does not prevent a user from maliciously providing +an incorrect key for another user's existing encrypted files. A +protection against this is planned. + +Key hierarchy +============= + +Master Keys +----------- + +Each encrypted directory tree is protected by a *master key*. Master +keys can be up to 64 bytes long, and must be at least as long as the +greater of the key length needed by the contents and filenames +encryption modes being used. For example, if AES-256-XTS is used for +contents encryption, the master key must be 64 bytes (512 bits). Note +that the XTS mode is defined to require a key twice as long as that +required by the underlying block cipher. + +To "unlock" an encrypted directory tree, userspace must provide the +appropriate master key. There can be any number of master keys, each +of which protects any number of directory trees on any number of +filesystems. + +Userspace should generate master keys either using a cryptographically +secure random number generator, or by using a KDF (Key Derivation +Function). Note that whenever a KDF is used to "stretch" a +lower-entropy secret such as a passphrase, it is critical that a KDF +designed for this purpose be used, such as scrypt, PBKDF2, or Argon2. + +Per-file keys +------------- + +Since each master key can protect many files, it is necessary to +"tweak" the encryption of each file so that the same plaintext in two +files doesn't map to the same ciphertext, or vice versa. In most +cases, fscrypt does this by deriving per-file keys. When a new +encrypted inode (regular file, directory, or symlink) is created, +fscrypt randomly generates a 16-byte nonce and stores it in the +inode's encryption xattr. Then, it uses a KDF (Key Derivation +Function) to derive the file's key from the master key and nonce. + +The Adiantum encryption mode (see `Encryption modes and usage`_) is +special, since it accepts longer IVs and is suitable for both contents +and filenames encryption. For it, a "direct key" option is offered +where the file's nonce is included in the IVs and the master key is +used for encryption directly. This improves performance; however, +users must not use the same master key for any other encryption mode. + +Below, the KDF and design considerations are described in more detail. + +The current KDF works by encrypting the master key with AES-128-ECB, +using the file's nonce as the AES key. The output is used as the +derived key. If the output is longer than needed, then it is +truncated to the needed length. + +Note: this KDF meets the primary security requirement, which is to +produce unique derived keys that preserve the entropy of the master +key, assuming that the master key is already a good pseudorandom key. +However, it is nonstandard and has some problems such as being +reversible, so it is generally considered to be a mistake! It may be +replaced with HKDF or another more standard KDF in the future. + +Key derivation was chosen over key wrapping because wrapped keys would +require larger xattrs which would be less likely to fit in-line in the +filesystem's inode table, and there didn't appear to be any +significant advantages to key wrapping. In particular, currently +there is no requirement to support unlocking a file with multiple +alternative master keys or to support rotating master keys. Instead, +the master keys may be wrapped in userspace, e.g. as is done by the +`fscrypt `_ tool. + +Including the inode number in the IVs was considered. However, it was +rejected as it would have prevented ext4 filesystems from being +resized, and by itself still wouldn't have been sufficient to prevent +the same key from being directly reused for both XTS and CTS-CBC. + +Encryption modes and usage +========================== + +fscrypt allows one encryption mode to be specified for file contents +and one encryption mode to be specified for filenames. Different +directory trees are permitted to use different encryption modes. +Currently, the following pairs of encryption modes are supported: + +- AES-256-XTS for contents and AES-256-CTS-CBC for filenames +- AES-128-CBC for contents and AES-128-CTS-CBC for filenames +- Adiantum for both contents and filenames + +If unsure, you should use the (AES-256-XTS, AES-256-CTS-CBC) pair. + +AES-128-CBC was added only for low-powered embedded devices with +crypto accelerators such as CAAM or CESA that do not support XTS. + +Adiantum is a (primarily) stream cipher-based mode that is fast even +on CPUs without dedicated crypto instructions. It's also a true +wide-block mode, unlike XTS. It can also eliminate the need to derive +per-file keys. However, it depends on the security of two primitives, +XChaCha12 and AES-256, rather than just one. See the paper +"Adiantum: length-preserving encryption for entry-level processors" +(https://eprint.iacr.org/2018/720.pdf) for more details. To use +Adiantum, CONFIG_CRYPTO_ADIANTUM must be enabled. Also, fast +implementations of ChaCha and NHPoly1305 should be enabled, e.g. +CONFIG_CRYPTO_CHACHA20_NEON and CONFIG_CRYPTO_NHPOLY1305_NEON for ARM. + +New encryption modes can be added relatively easily, without changes +to individual filesystems. However, authenticated encryption (AE) +modes are not currently supported because of the difficulty of dealing +with ciphertext expansion. + +Contents encryption +------------------- + +For file contents, each filesystem block is encrypted independently. +Currently, only the case where the filesystem block size is equal to +the system's page size (usually 4096 bytes) is supported. + +Each block's IV is set to the logical block number within the file as +a little endian number, except that: + +- With CBC mode encryption, ESSIV is also used. Specifically, each IV + is encrypted with AES-256 where the AES-256 key is the SHA-256 hash + of the file's data encryption key. + +- In the "direct key" configuration (FS_POLICY_FLAG_DIRECT_KEY set in + the fscrypt_policy), the file's nonce is also appended to the IV. + Currently this is only allowed with the Adiantum encryption mode. + +Filenames encryption +-------------------- + +For filenames, each full filename is encrypted at once. Because of +the requirements to retain support for efficient directory lookups and +filenames of up to 255 bytes, the same IV is used for every filename +in a directory. + +However, each encrypted directory still uses a unique key; or +alternatively (for the "direct key" configuration) has the file's +nonce included in the IVs. Thus, IV reuse is limited to within a +single directory. + +With CTS-CBC, the IV reuse means that when the plaintext filenames +share a common prefix at least as long as the cipher block size (16 +bytes for AES), the corresponding encrypted filenames will also share +a common prefix. This is undesirable. Adiantum does not have this +weakness, as it is a wide-block encryption mode. + +All supported filenames encryption modes accept any plaintext length +>= 16 bytes; cipher block alignment is not required. However, +filenames shorter than 16 bytes are NUL-padded to 16 bytes before +being encrypted. In addition, to reduce leakage of filename lengths +via their ciphertexts, all filenames are NUL-padded to the next 4, 8, +16, or 32-byte boundary (configurable). 32 is recommended since this +provides the best confidentiality, at the cost of making directory +entries consume slightly more space. Note that since NUL (``\0``) is +not otherwise a valid character in filenames, the padding will never +produce duplicate plaintexts. + +Symbolic link targets are considered a type of filename and are +encrypted in the same way as filenames in directory entries, except +that IV reuse is not a problem as each symlink has its own inode. + +User API +======== + +Setting an encryption policy +---------------------------- + +The FS_IOC_SET_ENCRYPTION_POLICY ioctl sets an encryption policy on an +empty directory or verifies that a directory or regular file already +has the specified encryption policy. It takes in a pointer to a +:c:type:`struct fscrypt_policy`, defined as follows:: + + #define FS_KEY_DESCRIPTOR_SIZE 8 + + struct fscrypt_policy { + __u8 version; + __u8 contents_encryption_mode; + __u8 filenames_encryption_mode; + __u8 flags; + __u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE]; + }; + +This structure must be initialized as follows: + +- ``version`` must be 0. + +- ``contents_encryption_mode`` and ``filenames_encryption_mode`` must + be set to constants from ```` which identify the + encryption modes to use. If unsure, use + FS_ENCRYPTION_MODE_AES_256_XTS (1) for ``contents_encryption_mode`` + and FS_ENCRYPTION_MODE_AES_256_CTS (4) for + ``filenames_encryption_mode``. + +- ``flags`` must contain a value from ```` which + identifies the amount of NUL-padding to use when encrypting + filenames. If unsure, use FS_POLICY_FLAGS_PAD_32 (0x3). + In addition, if the chosen encryption modes are both + FS_ENCRYPTION_MODE_ADIANTUM, this can contain + FS_POLICY_FLAG_DIRECT_KEY to specify that the master key should be + used directly, without key derivation. + +- ``master_key_descriptor`` specifies how to find the master key in + the keyring; see `Adding keys`_. It is up to userspace to choose a + unique ``master_key_descriptor`` for each master key. The e4crypt + and fscrypt tools use the first 8 bytes of + ``SHA-512(SHA-512(master_key))``, but this particular scheme is not + required. Also, the master key need not be in the keyring yet when + FS_IOC_SET_ENCRYPTION_POLICY is executed. However, it must be added + before any files can be created in the encrypted directory. + +If the file is not yet encrypted, then FS_IOC_SET_ENCRYPTION_POLICY +verifies that the file is an empty directory. If so, the specified +encryption policy is assigned to the directory, turning it into an +encrypted directory. After that, and after providing the +corresponding master key as described in `Adding keys`_, all regular +files, directories (recursively), and symlinks created in the +directory will be encrypted, inheriting the same encryption policy. +The filenames in the directory's entries will be encrypted as well. + +Alternatively, if the file is already encrypted, then +FS_IOC_SET_ENCRYPTION_POLICY validates that the specified encryption +policy exactly matches the actual one. If they match, then the ioctl +returns 0. Otherwise, it fails with EEXIST. This works on both +regular files and directories, including nonempty directories. + +Note that the ext4 filesystem does not allow the root directory to be +encrypted, even if it is empty. Users who want to encrypt an entire +filesystem with one key should consider using dm-crypt instead. + +FS_IOC_SET_ENCRYPTION_POLICY can fail with the following errors: + +- ``EACCES``: the file is not owned by the process's uid, nor does the + process have the CAP_FOWNER capability in a namespace with the file + owner's uid mapped +- ``EEXIST``: the file is already encrypted with an encryption policy + different from the one specified +- ``EINVAL``: an invalid encryption policy was specified (invalid + version, mode(s), or flags) +- ``ENOTDIR``: the file is unencrypted and is a regular file, not a + directory +- ``ENOTEMPTY``: the file is unencrypted and is a nonempty directory +- ``ENOTTY``: this type of filesystem does not implement encryption +- ``EOPNOTSUPP``: the kernel was not configured with encryption + support for this filesystem, or the filesystem superblock has not + had encryption enabled on it. (For example, to use encryption on an + ext4 filesystem, CONFIG_EXT4_ENCRYPTION must be enabled in the + kernel config, and the superblock must have had the "encrypt" + feature flag enabled using ``tune2fs -O encrypt`` or ``mkfs.ext4 -O + encrypt``.) +- ``EPERM``: this directory may not be encrypted, e.g. because it is + the root directory of an ext4 filesystem +- ``EROFS``: the filesystem is readonly + +Getting an encryption policy +---------------------------- + +The FS_IOC_GET_ENCRYPTION_POLICY ioctl retrieves the :c:type:`struct +fscrypt_policy`, if any, for a directory or regular file. See above +for the struct definition. No additional permissions are required +beyond the ability to open the file. + +FS_IOC_GET_ENCRYPTION_POLICY can fail with the following errors: + +- ``EINVAL``: the file is encrypted, but it uses an unrecognized + encryption context format +- ``ENODATA``: the file is not encrypted +- ``ENOTTY``: this type of filesystem does not implement encryption +- ``EOPNOTSUPP``: the kernel was not configured with encryption + support for this filesystem + +Note: if you only need to know whether a file is encrypted or not, on +most filesystems it is also possible to use the FS_IOC_GETFLAGS ioctl +and check for FS_ENCRYPT_FL, or to use the statx() system call and +check for STATX_ATTR_ENCRYPTED in stx_attributes. + +Getting the per-filesystem salt +------------------------------- + +Some filesystems, such as ext4 and F2FS, also support the deprecated +ioctl FS_IOC_GET_ENCRYPTION_PWSALT. This ioctl retrieves a randomly +generated 16-byte value stored in the filesystem superblock. This +value is intended to used as a salt when deriving an encryption key +from a passphrase or other low-entropy user credential. + +FS_IOC_GET_ENCRYPTION_PWSALT is deprecated. Instead, prefer to +generate and manage any needed salt(s) in userspace. + +Adding keys +----------- + +To provide a master key, userspace must add it to an appropriate +keyring using the add_key() system call (see: +``Documentation/security/keys/core.rst``). The key type must be +"logon"; keys of this type are kept in kernel memory and cannot be +read back by userspace. The key description must be "fscrypt:" +followed by the 16-character lower case hex representation of the +``master_key_descriptor`` that was set in the encryption policy. The +key payload must conform to the following structure:: + + #define FS_MAX_KEY_SIZE 64 + + struct fscrypt_key { + u32 mode; + u8 raw[FS_MAX_KEY_SIZE]; + u32 size; + }; + +``mode`` is ignored; just set it to 0. The actual key is provided in +``raw`` with ``size`` indicating its size in bytes. That is, the +bytes ``raw[0..size-1]`` (inclusive) are the actual key. + +The key description prefix "fscrypt:" may alternatively be replaced +with a filesystem-specific prefix such as "ext4:". However, the +filesystem-specific prefixes are deprecated and should not be used in +new programs. + +There are several different types of keyrings in which encryption keys +may be placed, such as a session keyring, a user session keyring, or a +user keyring. Each key must be placed in a keyring that is "attached" +to all processes that might need to access files encrypted with it, in +the sense that request_key() will find the key. Generally, if only +processes belonging to a specific user need to access a given +encrypted directory and no session keyring has been installed, then +that directory's key should be placed in that user's user session +keyring or user keyring. Otherwise, a session keyring should be +installed if needed, and the key should be linked into that session +keyring, or in a keyring linked into that session keyring. + +Note: introducing the complex visibility semantics of keyrings here +was arguably a mistake --- especially given that by design, after any +process successfully opens an encrypted file (thereby setting up the +per-file key), possessing the keyring key is not actually required for +any process to read/write the file until its in-memory inode is +evicted. In the future there probably should be a way to provide keys +directly to the filesystem instead, which would make the intended +semantics clearer. + +Access semantics +================ + +With the key +------------ + +With the encryption key, encrypted regular files, directories, and +symlinks behave very similarly to their unencrypted counterparts --- +after all, the encryption is intended to be transparent. However, +astute users may notice some differences in behavior: + +- Unencrypted files, or files encrypted with a different encryption + policy (i.e. different key, modes, or flags), cannot be renamed or + linked into an encrypted directory; see `Encryption policy + enforcement`_. Attempts to do so will fail with EPERM. However, + encrypted files can be renamed within an encrypted directory, or + into an unencrypted directory. + +- Direct I/O is not supported on encrypted files. Attempts to use + direct I/O on such files will fall back to buffered I/O. + +- The fallocate operations FALLOC_FL_COLLAPSE_RANGE, + FALLOC_FL_INSERT_RANGE, and FALLOC_FL_ZERO_RANGE are not supported + on encrypted files and will fail with EOPNOTSUPP. + +- Online defragmentation of encrypted files is not supported. The + EXT4_IOC_MOVE_EXT and F2FS_IOC_MOVE_RANGE ioctls will fail with + EOPNOTSUPP. + +- The ext4 filesystem does not support data journaling with encrypted + regular files. It will fall back to ordered data mode instead. + +- DAX (Direct Access) is not supported on encrypted files. + +- The st_size of an encrypted symlink will not necessarily give the + length of the symlink target as required by POSIX. It will actually + give the length of the ciphertext, which will be slightly longer + than the plaintext due to NUL-padding and an extra 2-byte overhead. + +- The maximum length of an encrypted symlink is 2 bytes shorter than + the maximum length of an unencrypted symlink. For example, on an + EXT4 filesystem with a 4K block size, unencrypted symlinks can be up + to 4095 bytes long, while encrypted symlinks can only be up to 4093 + bytes long (both lengths excluding the terminating null). + +Note that mmap *is* supported. This is possible because the pagecache +for an encrypted file contains the plaintext, not the ciphertext. + +Without the key +--------------- + +Some filesystem operations may be performed on encrypted regular +files, directories, and symlinks even before their encryption key has +been provided: + +- File metadata may be read, e.g. using stat(). + +- Directories may be listed, in which case the filenames will be + listed in an encoded form derived from their ciphertext. The + current encoding algorithm is described in `Filename hashing and + encoding`_. The algorithm is subject to change, but it is + guaranteed that the presented filenames will be no longer than + NAME_MAX bytes, will not contain the ``/`` or ``\0`` characters, and + will uniquely identify directory entries. + + The ``.`` and ``..`` directory entries are special. They are always + present and are not encrypted or encoded. + +- Files may be deleted. That is, nondirectory files may be deleted + with unlink() as usual, and empty directories may be deleted with + rmdir() as usual. Therefore, ``rm`` and ``rm -r`` will work as + expected. + +- Symlink targets may be read and followed, but they will be presented + in encrypted form, similar to filenames in directories. Hence, they + are unlikely to point to anywhere useful. + +Without the key, regular files cannot be opened or truncated. +Attempts to do so will fail with ENOKEY. This implies that any +regular file operations that require a file descriptor, such as +read(), write(), mmap(), fallocate(), and ioctl(), are also forbidden. + +Also without the key, files of any type (including directories) cannot +be created or linked into an encrypted directory, nor can a name in an +encrypted directory be the source or target of a rename, nor can an +O_TMPFILE temporary file be created in an encrypted directory. All +such operations will fail with ENOKEY. + +It is not currently possible to backup and restore encrypted files +without the encryption key. This would require special APIs which +have not yet been implemented. + +Encryption policy enforcement +============================= + +After an encryption policy has been set on a directory, all regular +files, directories, and symbolic links created in that directory +(recursively) will inherit that encryption policy. Special files --- +that is, named pipes, device nodes, and UNIX domain sockets --- will +not be encrypted. + +Except for those special files, it is forbidden to have unencrypted +files, or files encrypted with a different encryption policy, in an +encrypted directory tree. Attempts to link or rename such a file into +an encrypted directory will fail with EPERM. This is also enforced +during ->lookup() to provide limited protection against offline +attacks that try to disable or downgrade encryption in known locations +where applications may later write sensitive data. It is recommended +that systems implementing a form of "verified boot" take advantage of +this by validating all top-level encryption policies prior to access. + +Implementation details +====================== + +Encryption context +------------------ + +An encryption policy is represented on-disk by a :c:type:`struct +fscrypt_context`. It is up to individual filesystems to decide where +to store it, but normally it would be stored in a hidden extended +attribute. It should *not* be exposed by the xattr-related system +calls such as getxattr() and setxattr() because of the special +semantics of the encryption xattr. (In particular, there would be +much confusion if an encryption policy were to be added to or removed +from anything other than an empty directory.) The struct is defined +as follows:: + + #define FS_KEY_DESCRIPTOR_SIZE 8 + #define FS_KEY_DERIVATION_NONCE_SIZE 16 + + struct fscrypt_context { + u8 format; + u8 contents_encryption_mode; + u8 filenames_encryption_mode; + u8 flags; + u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE]; + u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE]; + }; + +Note that :c:type:`struct fscrypt_context` contains the same +information as :c:type:`struct fscrypt_policy` (see `Setting an +encryption policy`_), except that :c:type:`struct fscrypt_context` +also contains a nonce. The nonce is randomly generated by the kernel +and is used to derive the inode's encryption key as described in +`Per-file keys`_. + +Data path changes +----------------- + +For the read path (->readpage()) of regular files, filesystems can +read the ciphertext into the page cache and decrypt it in-place. The +page lock must be held until decryption has finished, to prevent the +page from becoming visible to userspace prematurely. + +For the write path (->writepage()) of regular files, filesystems +cannot encrypt data in-place in the page cache, since the cached +plaintext must be preserved. Instead, filesystems must encrypt into a +temporary buffer or "bounce page", then write out the temporary +buffer. Some filesystems, such as UBIFS, already use temporary +buffers regardless of encryption. Other filesystems, such as ext4 and +F2FS, have to allocate bounce pages specially for encryption. + +Filename hashing and encoding +----------------------------- + +Modern filesystems accelerate directory lookups by using indexed +directories. An indexed directory is organized as a tree keyed by +filename hashes. When a ->lookup() is requested, the filesystem +normally hashes the filename being looked up so that it can quickly +find the corresponding directory entry, if any. + +With encryption, lookups must be supported and efficient both with and +without the encryption key. Clearly, it would not work to hash the +plaintext filenames, since the plaintext filenames are unavailable +without the key. (Hashing the plaintext filenames would also make it +impossible for the filesystem's fsck tool to optimize encrypted +directories.) Instead, filesystems hash the ciphertext filenames, +i.e. the bytes actually stored on-disk in the directory entries. When +asked to do a ->lookup() with the key, the filesystem just encrypts +the user-supplied name to get the ciphertext. + +Lookups without the key are more complicated. The raw ciphertext may +contain the ``\0`` and ``/`` characters, which are illegal in +filenames. Therefore, readdir() must base64-encode the ciphertext for +presentation. For most filenames, this works fine; on ->lookup(), the +filesystem just base64-decodes the user-supplied name to get back to +the raw ciphertext. + +However, for very long filenames, base64 encoding would cause the +filename length to exceed NAME_MAX. To prevent this, readdir() +actually presents long filenames in an abbreviated form which encodes +a strong "hash" of the ciphertext filename, along with the optional +filesystem-specific hash(es) needed for directory lookups. This +allows the filesystem to still, with a high degree of confidence, map +the filename given in ->lookup() back to a particular directory entry +that was previously listed by readdir(). See :c:type:`struct +fscrypt_digested_name` in the source for more details. + +Note that the precise way that filenames are presented to userspace +without the key is subject to change in the future. It is only meant +as a way to temporarily present valid filenames so that commands like +``rm -r`` work as expected on encrypted directories. diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 0f46cf550907f..4dc788e3bc96b 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -133,15 +133,25 @@ struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, gfp_t gfp_flags) } EXPORT_SYMBOL(fscrypt_get_ctx); +void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, + const struct fscrypt_info *ci) +{ + memset(iv, 0, ci->ci_mode->ivsize); + iv->lblk_num = cpu_to_le64(lblk_num); + + if (ci->ci_flags & FS_POLICY_FLAG_DIRECT_KEY) + memcpy(iv->nonce, ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE); + + if (ci->ci_essiv_tfm != NULL) + crypto_cipher_encrypt_one(ci->ci_essiv_tfm, iv->raw, iv->raw); +} + int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, u64 lblk_num, struct page *src_page, struct page *dest_page, unsigned int len, unsigned int offs, gfp_t gfp_flags) { - struct { - __le64 index; - u8 padding[FS_IV_SIZE - sizeof(__le64)]; - } iv; + union fscrypt_iv iv; struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); struct scatterlist dst, src; @@ -151,15 +161,7 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, BUG_ON(len == 0); - BUILD_BUG_ON(sizeof(iv) != FS_IV_SIZE); - BUILD_BUG_ON(AES_BLOCK_SIZE != FS_IV_SIZE); - iv.index = cpu_to_le64(lblk_num); - memset(iv.padding, 0, sizeof(iv.padding)); - - if (ci->ci_essiv_tfm != NULL) { - crypto_cipher_encrypt_one(ci->ci_essiv_tfm, (u8 *)&iv, - (u8 *)&iv); - } + fscrypt_generate_iv(&iv, lblk_num, ci); req = skcipher_request_alloc(tfm, gfp_flags); if (!req) diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index d7a0f682ca122..7ff40a73dbece 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -40,10 +40,11 @@ int fname_encrypt(struct inode *inode, const struct qstr *iname, { struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); - struct crypto_skcipher *tfm = inode->i_crypt_info->ci_ctfm; - int res = 0; - char iv[FS_CRYPTO_BLOCK_SIZE]; + struct fscrypt_info *ci = inode->i_crypt_info; + struct crypto_skcipher *tfm = ci->ci_ctfm; + union fscrypt_iv iv; struct scatterlist sg; + int res; /* * Copy the filename to the output buffer for encrypting in-place and @@ -55,7 +56,7 @@ int fname_encrypt(struct inode *inode, const struct qstr *iname, memset(out + iname->len, 0, olen - iname->len); /* Initialize the IV */ - memset(iv, 0, FS_CRYPTO_BLOCK_SIZE); + fscrypt_generate_iv(&iv, 0, ci); /* Set up the encryption request */ req = skcipher_request_alloc(tfm, GFP_NOFS); @@ -65,7 +66,7 @@ int fname_encrypt(struct inode *inode, const struct qstr *iname, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, &wait); sg_init_one(&sg, out, olen); - skcipher_request_set_crypt(req, &sg, &sg, olen, iv); + skcipher_request_set_crypt(req, &sg, &sg, olen, &iv); /* Do the encryption */ res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); @@ -94,9 +95,10 @@ static int fname_decrypt(struct inode *inode, struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); struct scatterlist src_sg, dst_sg; - struct crypto_skcipher *tfm = inode->i_crypt_info->ci_ctfm; - int res = 0; - char iv[FS_CRYPTO_BLOCK_SIZE]; + struct fscrypt_info *ci = inode->i_crypt_info; + struct crypto_skcipher *tfm = ci->ci_ctfm; + union fscrypt_iv iv; + int res; /* Allocate request */ req = skcipher_request_alloc(tfm, GFP_NOFS); @@ -107,12 +109,12 @@ static int fname_decrypt(struct inode *inode, crypto_req_done, &wait); /* Initialize IV */ - memset(iv, 0, FS_CRYPTO_BLOCK_SIZE); + fscrypt_generate_iv(&iv, 0, ci); /* Create decryption request */ sg_init_one(&src_sg, iname->name, iname->len); sg_init_one(&dst_sg, oname->name, oname->len); - skcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv); + skcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, &iv); res = crypto_wait_req(crypto_skcipher_decrypt(req), &wait); skcipher_request_free(req); if (res < 0) { diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 79debfc9cef91..7424f851eb5cf 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -17,7 +17,6 @@ #include /* Encryption parameters */ -#define FS_IV_SIZE 16 #define FS_KEY_DERIVATION_NONCE_SIZE 16 /** @@ -52,16 +51,42 @@ struct fscrypt_symlink_data { } __packed; /* - * A pointer to this structure is stored in the file system's in-core - * representation of an inode. + * fscrypt_info - the "encryption key" for an inode + * + * When an encrypted file's key is made available, an instance of this struct is + * allocated and stored in ->i_crypt_info. Once created, it remains until the + * inode is evicted. */ struct fscrypt_info { + + /* The actual crypto transform used for encryption and decryption */ + struct crypto_skcipher *ci_ctfm; + + /* + * Cipher for ESSIV IV generation. Only set for CBC contents + * encryption, otherwise is NULL. + */ + struct crypto_cipher *ci_essiv_tfm; + + /* + * Encryption mode used for this inode. It corresponds to either + * ci_data_mode or ci_filename_mode, depending on the inode type. + */ + struct fscrypt_mode *ci_mode; + + /* + * If non-NULL, then this inode uses a master key directly rather than a + * derived key, and ci_ctfm will equal ci_master_key->mk_ctfm. + * Otherwise, this inode uses a derived key. + */ + struct fscrypt_master_key *ci_master_key; + + /* fields from the fscrypt_context */ u8 ci_data_mode; u8 ci_filename_mode; u8 ci_flags; - struct crypto_skcipher *ci_ctfm; - struct crypto_cipher *ci_essiv_tfm; - u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE]; + u8 ci_master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE]; + u8 ci_nonce[FS_KEY_DERIVATION_NONCE_SIZE]; }; typedef enum { @@ -83,6 +108,10 @@ static inline bool fscrypt_valid_enc_modes(u32 contents_mode, filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS) return true; + if (contents_mode == FS_ENCRYPTION_MODE_ADIANTUM && + filenames_mode == FS_ENCRYPTION_MODE_ADIANTUM) + return true; + return false; } @@ -107,6 +136,22 @@ fscrypt_msg(struct super_block *sb, const char *level, const char *fmt, ...); #define fscrypt_err(sb, fmt, ...) \ fscrypt_msg(sb, KERN_ERR, fmt, ##__VA_ARGS__) +#define FSCRYPT_MAX_IV_SIZE 32 + +union fscrypt_iv { + struct { + /* logical block number within the file */ + __le64 lblk_num; + + /* per-file nonce; only set in DIRECT_KEY mode */ + u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE]; + }; + u8 raw[FSCRYPT_MAX_IV_SIZE]; +}; + +void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, + const struct fscrypt_info *ci); + /* fname.c */ extern int fname_encrypt(struct inode *inode, const struct qstr *iname, u8 *out, unsigned int olen); @@ -115,6 +160,16 @@ extern bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 *encrypted_len_ret); /* keyinfo.c */ + +struct fscrypt_mode { + const char *friendly_name; + const char *cipher_str; + int keysize; + int ivsize; + bool logged_impl_name; + bool needs_essiv; +}; + extern void __exit fscrypt_essiv_cleanup(void); #endif /* _FSCRYPT_PRIVATE_H */ diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 7874c9bb2fc53..1e11a683f63df 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -10,15 +10,21 @@ */ #include +#include #include #include #include +#include #include #include #include "fscrypt_private.h" static struct crypto_shash *essiv_hash_tfm; +/* Table of keys referenced by FS_POLICY_FLAG_DIRECT_KEY policies */ +static DEFINE_HASHTABLE(fscrypt_master_keys, 6); /* 6 bits = 64 buckets */ +static DEFINE_SPINLOCK(fscrypt_master_keys_lock); + /* * Key derivation function. This generates the derived key by encrypting the * master key with AES-128-ECB using the inode's nonce as the AES key. @@ -123,56 +129,37 @@ invalid: return ERR_PTR(-ENOKEY); } -/* Find the master key, then derive the inode's actual encryption key */ -static int find_and_derive_key(const struct inode *inode, - const struct fscrypt_context *ctx, - u8 *derived_key, unsigned int derived_keysize) -{ - struct key *key; - const struct fscrypt_key *payload; - int err; - - key = find_and_lock_process_key(FS_KEY_DESC_PREFIX, - ctx->master_key_descriptor, - derived_keysize, &payload); - if (key == ERR_PTR(-ENOKEY) && inode->i_sb->s_cop->key_prefix) { - key = find_and_lock_process_key(inode->i_sb->s_cop->key_prefix, - ctx->master_key_descriptor, - derived_keysize, &payload); - } - if (IS_ERR(key)) - return PTR_ERR(key); - err = derive_key_aes(payload->raw, ctx, derived_key, derived_keysize); - up_read(&key->sem); - key_put(key); - return err; -} - -static struct fscrypt_mode { - const char *friendly_name; - const char *cipher_str; - int keysize; - bool logged_impl_name; -} available_modes[] = { +static struct fscrypt_mode available_modes[] = { [FS_ENCRYPTION_MODE_AES_256_XTS] = { .friendly_name = "AES-256-XTS", .cipher_str = "xts(aes)", .keysize = 64, + .ivsize = 16, }, [FS_ENCRYPTION_MODE_AES_256_CTS] = { .friendly_name = "AES-256-CTS-CBC", .cipher_str = "cts(cbc(aes))", .keysize = 32, + .ivsize = 16, }, [FS_ENCRYPTION_MODE_AES_128_CBC] = { .friendly_name = "AES-128-CBC", .cipher_str = "cbc(aes)", .keysize = 16, + .ivsize = 16, + .needs_essiv = true, }, [FS_ENCRYPTION_MODE_AES_128_CTS] = { .friendly_name = "AES-128-CTS-CBC", .cipher_str = "cts(cbc(aes))", .keysize = 16, + .ivsize = 16, + }, + [FS_ENCRYPTION_MODE_ADIANTUM] = { + .friendly_name = "Adiantum", + .cipher_str = "adiantum(xchacha12,aes)", + .keysize = 32, + .ivsize = 32, }, }; @@ -198,14 +185,196 @@ select_encryption_mode(const struct fscrypt_info *ci, const struct inode *inode) return ERR_PTR(-EINVAL); } -static void put_crypt_info(struct fscrypt_info *ci) +/* Find the master key, then derive the inode's actual encryption key */ +static int find_and_derive_key(const struct inode *inode, + const struct fscrypt_context *ctx, + u8 *derived_key, const struct fscrypt_mode *mode) { - if (!ci) + struct key *key; + const struct fscrypt_key *payload; + int err; + + key = find_and_lock_process_key(FS_KEY_DESC_PREFIX, + ctx->master_key_descriptor, + mode->keysize, &payload); + if (key == ERR_PTR(-ENOKEY) && inode->i_sb->s_cop->key_prefix) { + key = find_and_lock_process_key(inode->i_sb->s_cop->key_prefix, + ctx->master_key_descriptor, + mode->keysize, &payload); + } + if (IS_ERR(key)) + return PTR_ERR(key); + + if (ctx->flags & FS_POLICY_FLAG_DIRECT_KEY) { + if (mode->ivsize < offsetofend(union fscrypt_iv, nonce)) { + fscrypt_warn(inode->i_sb, + "direct key mode not allowed with %s", + mode->friendly_name); + err = -EINVAL; + } else if (ctx->contents_encryption_mode != + ctx->filenames_encryption_mode) { + fscrypt_warn(inode->i_sb, + "direct key mode not allowed with different contents and filenames modes"); + err = -EINVAL; + } else { + memcpy(derived_key, payload->raw, mode->keysize); + err = 0; + } + } else { + err = derive_key_aes(payload->raw, ctx, derived_key, + mode->keysize); + } + up_read(&key->sem); + key_put(key); + return err; +} + +/* Allocate and key a symmetric cipher object for the given encryption mode */ +static struct crypto_skcipher * +allocate_skcipher_for_mode(struct fscrypt_mode *mode, const u8 *raw_key, + const struct inode *inode) +{ + struct crypto_skcipher *tfm; + int err; + + tfm = crypto_alloc_skcipher(mode->cipher_str, 0, 0); + if (IS_ERR(tfm)) { + fscrypt_warn(inode->i_sb, + "error allocating '%s' transform for inode %lu: %ld", + mode->cipher_str, inode->i_ino, PTR_ERR(tfm)); + return tfm; + } + if (unlikely(!mode->logged_impl_name)) { + /* + * fscrypt performance can vary greatly depending on which + * crypto algorithm implementation is used. Help people debug + * performance problems by logging the ->cra_driver_name the + * first time a mode is used. Note that multiple threads can + * race here, but it doesn't really matter. + */ + mode->logged_impl_name = true; + pr_info("fscrypt: %s using implementation \"%s\"\n", + mode->friendly_name, + crypto_skcipher_alg(tfm)->base.cra_driver_name); + } + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY); + err = crypto_skcipher_setkey(tfm, raw_key, mode->keysize); + if (err) + goto err_free_tfm; + + return tfm; + +err_free_tfm: + crypto_free_skcipher(tfm); + return ERR_PTR(err); +} + +/* Master key referenced by FS_POLICY_FLAG_DIRECT_KEY policy */ +struct fscrypt_master_key { + struct hlist_node mk_node; + refcount_t mk_refcount; + const struct fscrypt_mode *mk_mode; + struct crypto_skcipher *mk_ctfm; + u8 mk_descriptor[FS_KEY_DESCRIPTOR_SIZE]; + u8 mk_raw[FS_MAX_KEY_SIZE]; +}; + +static void free_master_key(struct fscrypt_master_key *mk) +{ + if (mk) { + crypto_free_skcipher(mk->mk_ctfm); + kzfree(mk); + } +} + +static void put_master_key(struct fscrypt_master_key *mk) +{ + if (!refcount_dec_and_lock(&mk->mk_refcount, &fscrypt_master_keys_lock)) return; + hash_del(&mk->mk_node); + spin_unlock(&fscrypt_master_keys_lock); - crypto_free_skcipher(ci->ci_ctfm); - crypto_free_cipher(ci->ci_essiv_tfm); - kmem_cache_free(fscrypt_info_cachep, ci); + free_master_key(mk); +} + +/* + * Find/insert the given master key into the fscrypt_master_keys table. If + * found, it is returned with elevated refcount, and 'to_insert' is freed if + * non-NULL. If not found, 'to_insert' is inserted and returned if it's + * non-NULL; otherwise NULL is returned. + */ +static struct fscrypt_master_key * +find_or_insert_master_key(struct fscrypt_master_key *to_insert, + const u8 *raw_key, const struct fscrypt_mode *mode, + const struct fscrypt_info *ci) +{ + unsigned long hash_key; + struct fscrypt_master_key *mk; + + /* + * Careful: to avoid potentially leaking secret key bytes via timing + * information, we must key the hash table by descriptor rather than by + * raw key, and use crypto_memneq() when comparing raw keys. + */ + + BUILD_BUG_ON(sizeof(hash_key) > FS_KEY_DESCRIPTOR_SIZE); + memcpy(&hash_key, ci->ci_master_key_descriptor, sizeof(hash_key)); + + spin_lock(&fscrypt_master_keys_lock); + hash_for_each_possible(fscrypt_master_keys, mk, mk_node, hash_key) { + if (memcmp(ci->ci_master_key_descriptor, mk->mk_descriptor, + FS_KEY_DESCRIPTOR_SIZE) != 0) + continue; + if (mode != mk->mk_mode) + continue; + if (crypto_memneq(raw_key, mk->mk_raw, mode->keysize)) + continue; + /* using existing tfm with same (descriptor, mode, raw_key) */ + refcount_inc(&mk->mk_refcount); + spin_unlock(&fscrypt_master_keys_lock); + free_master_key(to_insert); + return mk; + } + if (to_insert) + hash_add(fscrypt_master_keys, &to_insert->mk_node, hash_key); + spin_unlock(&fscrypt_master_keys_lock); + return to_insert; +} + +/* Prepare to encrypt directly using the master key in the given mode */ +static struct fscrypt_master_key * +fscrypt_get_master_key(const struct fscrypt_info *ci, struct fscrypt_mode *mode, + const u8 *raw_key, const struct inode *inode) +{ + struct fscrypt_master_key *mk; + int err; + + /* Is there already a tfm for this key? */ + mk = find_or_insert_master_key(NULL, raw_key, mode, ci); + if (mk) + return mk; + + /* Nope, allocate one. */ + mk = kzalloc(sizeof(*mk), GFP_NOFS); + if (!mk) + return ERR_PTR(-ENOMEM); + refcount_set(&mk->mk_refcount, 1); + mk->mk_mode = mode; + mk->mk_ctfm = allocate_skcipher_for_mode(mode, raw_key, inode); + if (IS_ERR(mk->mk_ctfm)) { + err = PTR_ERR(mk->mk_ctfm); + mk->mk_ctfm = NULL; + goto err_free_mk; + } + memcpy(mk->mk_descriptor, ci->ci_master_key_descriptor, + FS_KEY_DESCRIPTOR_SIZE); + memcpy(mk->mk_raw, raw_key, mode->keysize); + + return find_or_insert_master_key(mk, raw_key, mode, ci); + +err_free_mk: + free_master_key(mk); + return ERR_PTR(err); } static int derive_essiv_salt(const u8 *key, int keysize, u8 *salt) @@ -275,11 +444,67 @@ void __exit fscrypt_essiv_cleanup(void) crypto_free_shash(essiv_hash_tfm); } +/* + * Given the encryption mode and key (normally the derived key, but for + * FS_POLICY_FLAG_DIRECT_KEY mode it's the master key), set up the inode's + * symmetric cipher transform object(s). + */ +static int setup_crypto_transform(struct fscrypt_info *ci, + struct fscrypt_mode *mode, + const u8 *raw_key, const struct inode *inode) +{ + struct fscrypt_master_key *mk; + struct crypto_skcipher *ctfm; + int err; + + if (ci->ci_flags & FS_POLICY_FLAG_DIRECT_KEY) { + mk = fscrypt_get_master_key(ci, mode, raw_key, inode); + if (IS_ERR(mk)) + return PTR_ERR(mk); + ctfm = mk->mk_ctfm; + } else { + mk = NULL; + ctfm = allocate_skcipher_for_mode(mode, raw_key, inode); + if (IS_ERR(ctfm)) + return PTR_ERR(ctfm); + } + ci->ci_master_key = mk; + ci->ci_ctfm = ctfm; + + if (mode->needs_essiv) { + /* ESSIV implies 16-byte IVs which implies !DIRECT_KEY */ + WARN_ON(mode->ivsize != AES_BLOCK_SIZE); + WARN_ON(ci->ci_flags & FS_POLICY_FLAG_DIRECT_KEY); + + err = init_essiv_generator(ci, raw_key, mode->keysize); + if (err) { + fscrypt_warn(inode->i_sb, + "error initializing ESSIV generator for inode %lu: %d", + inode->i_ino, err); + return err; + } + } + return 0; +} + +static void put_crypt_info(struct fscrypt_info *ci) +{ + if (!ci) + return; + + if (ci->ci_master_key) { + put_master_key(ci->ci_master_key); + } else { + crypto_free_skcipher(ci->ci_ctfm); + crypto_free_cipher(ci->ci_essiv_tfm); + } + kmem_cache_free(fscrypt_info_cachep, ci); +} + int fscrypt_get_encryption_info(struct inode *inode) { struct fscrypt_info *crypt_info; struct fscrypt_context ctx; - struct crypto_skcipher *ctfm; struct fscrypt_mode *mode; u8 *raw_key = NULL; int res; @@ -312,74 +537,42 @@ int fscrypt_get_encryption_info(struct inode *inode) if (ctx.flags & ~FS_POLICY_FLAGS_VALID) return -EINVAL; - crypt_info = kmem_cache_alloc(fscrypt_info_cachep, GFP_NOFS); + crypt_info = kmem_cache_zalloc(fscrypt_info_cachep, GFP_NOFS); if (!crypt_info) return -ENOMEM; crypt_info->ci_flags = ctx.flags; crypt_info->ci_data_mode = ctx.contents_encryption_mode; crypt_info->ci_filename_mode = ctx.filenames_encryption_mode; - crypt_info->ci_ctfm = NULL; - crypt_info->ci_essiv_tfm = NULL; - memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor, - sizeof(crypt_info->ci_master_key)); + memcpy(crypt_info->ci_master_key_descriptor, ctx.master_key_descriptor, + FS_KEY_DESCRIPTOR_SIZE); + memcpy(crypt_info->ci_nonce, ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE); mode = select_encryption_mode(crypt_info, inode); if (IS_ERR(mode)) { res = PTR_ERR(mode); goto out; } + WARN_ON(mode->ivsize > FSCRYPT_MAX_IV_SIZE); + crypt_info->ci_mode = mode; /* - * This cannot be a stack buffer because it is passed to the scatterlist - * crypto API as part of key derivation. + * This cannot be a stack buffer because it may be passed to the + * scatterlist crypto API as part of key derivation. */ res = -ENOMEM; raw_key = kmalloc(mode->keysize, GFP_NOFS); if (!raw_key) goto out; - res = find_and_derive_key(inode, &ctx, raw_key, mode->keysize); + res = find_and_derive_key(inode, &ctx, raw_key, mode); if (res) goto out; - ctfm = crypto_alloc_skcipher(mode->cipher_str, 0, 0); - if (IS_ERR(ctfm)) { - res = PTR_ERR(ctfm); - fscrypt_warn(inode->i_sb, - "error allocating '%s' transform for inode %lu: %d", - mode->cipher_str, inode->i_ino, res); - goto out; - } - if (unlikely(!mode->logged_impl_name)) { - /* - * fscrypt performance can vary greatly depending on which - * crypto algorithm implementation is used. Help people debug - * performance problems by logging the ->cra_driver_name the - * first time a mode is used. Note that multiple threads can - * race here, but it doesn't really matter. - */ - mode->logged_impl_name = true; - pr_info("fscrypt: %s using implementation \"%s\"\n", - mode->friendly_name, - crypto_skcipher_alg(ctfm)->base.cra_driver_name); - } - crypt_info->ci_ctfm = ctfm; - crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_REQ_WEAK_KEY); - res = crypto_skcipher_setkey(ctfm, raw_key, mode->keysize); + res = setup_crypto_transform(crypt_info, mode, raw_key, inode); if (res) goto out; - if (S_ISREG(inode->i_mode) && - crypt_info->ci_data_mode == FS_ENCRYPTION_MODE_AES_128_CBC) { - res = init_essiv_generator(crypt_info, raw_key, mode->keysize); - if (res) { - fscrypt_warn(inode->i_sb, - "error initializing ESSIV generator for inode %lu: %d", - inode->i_ino, res); - goto out; - } - } if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) == NULL) crypt_info = NULL; out: diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index c6d431a5cce93..f490de921ce82 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -199,7 +199,8 @@ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) child_ci = child->i_crypt_info; if (parent_ci && child_ci) { - return memcmp(parent_ci->ci_master_key, child_ci->ci_master_key, + return memcmp(parent_ci->ci_master_key_descriptor, + child_ci->ci_master_key_descriptor, FS_KEY_DESCRIPTOR_SIZE) == 0 && (parent_ci->ci_data_mode == child_ci->ci_data_mode) && (parent_ci->ci_filename_mode == @@ -254,7 +255,7 @@ int fscrypt_inherit_context(struct inode *parent, struct inode *child, ctx.contents_encryption_mode = ci->ci_data_mode; ctx.filenames_encryption_mode = ci->ci_filename_mode; ctx.flags = ci->ci_flags; - memcpy(ctx.master_key_descriptor, ci->ci_master_key, + memcpy(ctx.master_key_descriptor, ci->ci_master_key_descriptor, FS_KEY_DESCRIPTOR_SIZE); get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE); BUILD_BUG_ON(sizeof(ctx) != FSCRYPT_SET_CONTEXT_MAX_SIZE); diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 971e82aec6d0a..e4d0261080dbc 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -265,7 +265,8 @@ struct fsxattr { #define FS_POLICY_FLAGS_PAD_16 0x02 #define FS_POLICY_FLAGS_PAD_32 0x03 #define FS_POLICY_FLAGS_PAD_MASK 0x03 -#define FS_POLICY_FLAGS_VALID 0x03 +#define FS_POLICY_FLAG_DIRECT_KEY 0x04 /* use master key directly */ +#define FS_POLICY_FLAGS_VALID 0x07 /* Encryption algorithms */ #define FS_ENCRYPTION_MODE_INVALID 0 @@ -275,8 +276,9 @@ struct fsxattr { #define FS_ENCRYPTION_MODE_AES_256_CTS 4 #define FS_ENCRYPTION_MODE_AES_128_CBC 5 #define FS_ENCRYPTION_MODE_AES_128_CTS 6 -#define FS_ENCRYPTION_MODE_SPECK128_256_XTS 7 -#define FS_ENCRYPTION_MODE_SPECK128_256_CTS 8 +#define FS_ENCRYPTION_MODE_SPECK128_256_XTS 7 /* Removed, do not use. */ +#define FS_ENCRYPTION_MODE_SPECK128_256_CTS 8 /* Removed, do not use. */ +#define FS_ENCRYPTION_MODE_ADIANTUM 9 struct fscrypt_policy { __u8 version; -- GitLab From 23bc51816857fa84a6581f967e8e134404d2344f Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Mon, 3 Dec 2018 12:53:02 -0800 Subject: [PATCH 1966/2269] ANDROID: cuttlefish_defconfig: Enable CONFIG_CRYPTO_ADIANTUM Bug: 120439617 Change-Id: I0a192256b4bbf101221a3bb31c4f288581bbddc5 Signed-off-by: Alistair Strachan --- arch/arm64/configs/cuttlefish_defconfig | 1 + arch/x86/configs/x86_64_cuttlefish_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index a5c6506f4bbb3..9198b76f1d656 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -422,6 +422,7 @@ CONFIG_SECURITY_NETWORK=y CONFIG_LSM_MMAP_MIN_ADDR=65536 CONFIG_HARDENED_USERCOPY=y CONFIG_SECURITY_SELINUX=y +CONFIG_CRYPTO_ADIANTUM=y CONFIG_CRYPTO_SHA512=y CONFIG_CRYPTO_LZ4=y CONFIG_CRYPTO_ZSTD=y diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 6425593f7aec9..1ffe21526f506 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -452,6 +452,7 @@ CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 CONFIG_CRYPTO_RSA=y # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_ADIANTUM=y CONFIG_CRYPTO_SHA512=y CONFIG_CRYPTO_LZ4=y CONFIG_CRYPTO_ZSTD=y -- GitLab From 4ceafbb5cc3df7390bd3255cfe8fec6f4271dcbb Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Fri, 7 Dec 2018 15:36:53 -0800 Subject: [PATCH 1967/2269] Fix merge issue from 4.14.85. In d11d7f1ccfb1 ("Merge 4.14.85 into android-4.14"), the changes for 9d844b0e6692 ("kbuild: allow to use GCC toolchain not in Clang search path") were not merged in. Restore the needed bits. Change-Id: Ib3c3ae91d5b82b736499052a53f0de2b246027fa Signed-off-by: Alistair Strachan --- Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8787400b021b1..e479314fed78b 100644 --- a/Makefile +++ b/Makefile @@ -485,7 +485,9 @@ CLANG_TARGET := --target=$(notdir $(CLANG_TRIPLE:%-=%)) ifeq ($(shell $(srctree)/scripts/clang-android.sh $(CC) $(CLANG_TARGET)), y) $(error "Clang with Android --target detected. Did you specify CLANG_TRIPLE?") endif -GCC_TOOLCHAIN := $(realpath $(dir $(shell which $(LD)))/..) +GCC_TOOLCHAIN_DIR := $(dir $(shell which $(LD))) +CLANG_PREFIX := --prefix=$(GCC_TOOLCHAIN_DIR) +GCC_TOOLCHAIN := $(realpath $(GCC_TOOLCHAIN_DIR)/..) endif ifneq ($(GCC_TOOLCHAIN),) CLANG_GCC_TC := --gcc-toolchain=$(GCC_TOOLCHAIN) -- GitLab From f1d799a76aaea917c8d88ae97d7d179ebb39b08d Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Fri, 7 Dec 2018 11:34:16 -0800 Subject: [PATCH 1968/2269] ANDROID: Move from clang r328903 to r346389b. Bug: 120439617 Bug: 120503084 Change-Id: I21bb183cac03753d1ba719a69305e2199c3f3227 Signed-off-by: Alistair Strachan --- build.config.cuttlefish.aarch64 | 2 +- build.config.cuttlefish.x86_64 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/build.config.cuttlefish.aarch64 b/build.config.cuttlefish.aarch64 index d2dcc966ee68b..bea1b25269bff 100644 --- a/build.config.cuttlefish.aarch64 +++ b/build.config.cuttlefish.aarch64 @@ -6,7 +6,7 @@ DEFCONFIG=cuttlefish_defconfig EXTRA_CMDS='' KERNEL_DIR=common POST_DEFCONFIG_CMDS="check_defconfig" -CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r328903/bin +CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r346389b/bin LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/aarch64/aarch64-linux-android-4.9/bin FILES=" arch/arm64/boot/Image.gz diff --git a/build.config.cuttlefish.x86_64 b/build.config.cuttlefish.x86_64 index 694ed56a5f47c..90161b456ad20 100644 --- a/build.config.cuttlefish.x86_64 +++ b/build.config.cuttlefish.x86_64 @@ -6,7 +6,7 @@ DEFCONFIG=x86_64_cuttlefish_defconfig EXTRA_CMDS='' KERNEL_DIR=common POST_DEFCONFIG_CMDS="check_defconfig" -CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r328903/bin +CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r346389b/bin LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin FILES=" arch/x86/boot/bzImage -- GitLab From 049053b7aee6b306adcc7633b4fe97ade9c9094a Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Fri, 7 Dec 2018 11:50:07 -0800 Subject: [PATCH 1969/2269] ANDROID: cuttlefish_defconfig: Enable CONFIG_ARM64_LSE_ATOMICS Enabling this was previously blocked by a lack of support for this feature in clang, but that problem has been resolved in a newer version of the compiler. Bug: 120439617 Change-Id: I0f5fd2439c5a71ee0988648970576b46b2c4d20b Signed-off-by: Alistair Strachan --- arch/arm64/configs/cuttlefish_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 9198b76f1d656..188317af108ec 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -56,6 +56,7 @@ CONFIG_SWP_EMULATION=y CONFIG_CP15_BARRIER_EMULATION=y CONFIG_SETEND_EMULATION=y CONFIG_ARM64_SW_TTBR0_PAN=y +CONFIG_ARM64_LSE_ATOMICS=y CONFIG_RANDOMIZE_BASE=y CONFIG_CMDLINE="console=ttyAMA0" CONFIG_CMDLINE_EXTEND=y -- GitLab From f5ec9987bfbd1004b643e2c914306dc79ce40912 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Thu, 11 Jan 2018 17:22:29 +0800 Subject: [PATCH 1970/2269] Kbuild: suppress packed-not-aligned warning for default setting only commit 321cb0308a9e76841394b4bbab6a1107cfedbae0 upstream. gcc-8 reports many -Wpacked-not-aligned warnings. The below are some examples. ./include/linux/ceph/msgr.h:67:1: warning: alignment 1 of 'struct ceph_entity_addr' is less than 8 [-Wpacked-not-aligned] } __attribute__ ((packed)); ./include/linux/ceph/msgr.h:67:1: warning: alignment 1 of 'struct ceph_entity_addr' is less than 8 [-Wpacked-not-aligned] } __attribute__ ((packed)); ./include/linux/ceph/msgr.h:67:1: warning: alignment 1 of 'struct ceph_entity_addr' is less than 8 [-Wpacked-not-aligned] } __attribute__ ((packed)); This patch suppresses this kind of warnings for default setting. Signed-off-by: Xiongfeng Wang Signed-off-by: Masahiro Yamada Signed-off-by: Greg Kroah-Hartman --- scripts/Makefile.extrawarn | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index c6ebf4239e640..8d5357053f865 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -11,6 +11,8 @@ # are not supported by all versions of the compiler # ========================================================================== +KBUILD_CFLAGS += $(call cc-disable-warning, packed-not-aligned) + ifeq ("$(origin W)", "command line") export KBUILD_ENABLE_EXTRA_GCC_CHECKS := $(W) endif @@ -26,6 +28,7 @@ warning-1 += -Wold-style-definition warning-1 += $(call cc-option, -Wmissing-include-dirs) warning-1 += $(call cc-option, -Wunused-but-set-variable) warning-1 += $(call cc-option, -Wunused-const-variable) +warning-1 += $(call cc-option, -Wpacked-not-aligned) warning-1 += $(call cc-disable-warning, missing-field-initializers) warning-1 += $(call cc-disable-warning, sign-compare) -- GitLab From 9f8d10971a2395359751487c517bf33e5d8f9e43 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 31 Aug 2018 07:47:28 +1000 Subject: [PATCH 1971/2269] disable stringop truncation warnings for now commit 217c3e0196758662aa0429863b09d1c13da1c5d6 upstream. They are too noisy Signed-off-by: Stephen Rothwell Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index 572bd98d23444..77ffc1dc4e79b 100644 --- a/Makefile +++ b/Makefile @@ -803,6 +803,9 @@ KBUILD_CFLAGS += $(call cc-option,-Wdeclaration-after-statement,) # disable pointer signed / unsigned warnings in gcc 4.0 KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign) +# disable stringop warnings in gcc 8+ +KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation) + # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) -- GitLab From 337f2837c5b29c1587dd7efc4befc9f6f84a2746 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 30 Nov 2018 12:13:15 -0800 Subject: [PATCH 1972/2269] test_hexdump: use memcpy instead of strncpy commit b1286ed7158e9b62787508066283ab0b8850b518 upstream. New versions of gcc reasonably warn about the odd pattern of strncpy(p, q, strlen(q)); which really doesn't make sense: the strncpy() ends up being just a slow and odd way to write memcpy() in this case. Apparently there was a patch for this floating around earlier, but it got lost. Acked-again-by: Andy Shevchenko Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/test_hexdump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_hexdump.c b/lib/test_hexdump.c index 3f415d8101f32..1c3c513add77f 100644 --- a/lib/test_hexdump.c +++ b/lib/test_hexdump.c @@ -81,7 +81,7 @@ static void __init test_hexdump_prepare_test(size_t len, int rowsize, const char *q = *result++; size_t amount = strlen(q); - strncpy(p, q, amount); + memcpy(p, q, amount); p += amount; *p++ = ' '; -- GitLab From af882cb0bcb5573014e2203dbfd67e2737bb10d3 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 1 Jul 2018 13:57:16 -0700 Subject: [PATCH 1973/2269] kobject: Replace strncpy with memcpy commit 77d2a24b6107bd9b3bf2403a65c1428a9da83dd0 upstream. gcc 8.1.0 complains: lib/kobject.c:128:3: warning: 'strncpy' output truncated before terminating nul copying as many bytes from a string as its length [-Wstringop-truncation] lib/kobject.c: In function 'kobject_get_path': lib/kobject.c:125:13: note: length computed here Using strncpy() is indeed less than perfect since the length of data to be copied has already been determined with strlen(). Replace strncpy() with memcpy() to address the warning and optimize the code a little. Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/kobject.c b/lib/kobject.c index 34f847252c021..bbbb067de8ecd 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -127,7 +127,7 @@ static void fill_kobj_path(struct kobject *kobj, char *path, int length) int cur = strlen(kobject_name(parent)); /* back up enough to print this name with '/' */ length -= cur; - strncpy(path + length, kobject_name(parent), cur); + memcpy(path + length, kobject_name(parent), cur); *(path + --length) = '/'; } -- GitLab From 23df63002205f17d307af118fe0025f6e49d389a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 27 Jun 2018 14:59:00 +0200 Subject: [PATCH 1974/2269] ALSA: intel_hdmi: Use strlcpy() instead of strncpy() commit c288248f5b26cd5563112fcdc077bf44964a942d upstream. hdmi_lpe_audio_probe() copies the pcm name string via strncpy(), but as a gcc8 warning suggests, it misses a NUL terminator, and unlikely the expected result. Use the proper one, strlcpy() instead. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/x86/intel_hdmi_audio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index 697872d8308e2..8b7abbd691164 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -1839,7 +1839,7 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) /* setup private data which can be retrieved when required */ pcm->private_data = ctx; pcm->info_flags = 0; - strncpy(pcm->name, card->shortname, strlen(card->shortname)); + strlcpy(pcm->name, card->shortname, strlen(card->shortname)); /* setup the ops for playabck */ snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &had_pcm_ops); -- GitLab From f5028606c29248d39c8fba26730517c7c624cdaa Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 30 Nov 2018 14:45:01 -0800 Subject: [PATCH 1975/2269] unifdef: use memcpy instead of strncpy commit 38c7b224ce22c25fed04007839edf974bd13439d upstream. New versions of gcc reasonably warn about the odd pattern of strncpy(p, q, strlen(q)); which really doesn't make sense: the strncpy() ends up being just a slow and odd way to write memcpy() in this case. There was a comment about _why_ the code used strncpy - to avoid the terminating NUL byte, but memcpy does the same and avoids the warning. Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- scripts/unifdef.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/unifdef.c b/scripts/unifdef.c index 7493c0ee51cc9..db00e3e30a59d 100644 --- a/scripts/unifdef.c +++ b/scripts/unifdef.c @@ -395,7 +395,7 @@ usage(void) * When we have processed a group that starts off with a known-false * #if/#elif sequence (which has therefore been deleted) followed by a * #elif that we don't understand and therefore must keep, we edit the - * latter into a #if to keep the nesting correct. We use strncpy() to + * latter into a #if to keep the nesting correct. We use memcpy() to * overwrite the 4 byte token "elif" with "if " without a '\0' byte. * * When we find a true #elif in a group, the following block will @@ -450,7 +450,7 @@ static void Idrop (void) { Fdrop(); ignoreon(); } static void Itrue (void) { Ftrue(); ignoreon(); } static void Ifalse(void) { Ffalse(); ignoreon(); } /* modify this line */ -static void Mpass (void) { strncpy(keyword, "if ", 4); Pelif(); } +static void Mpass (void) { memcpy(keyword, "if ", 4); Pelif(); } static void Mtrue (void) { keywordedit("else"); state(IS_TRUE_MIDDLE); } static void Melif (void) { keywordedit("endif"); state(IS_FALSE_TRAILER); } static void Melse (void) { keywordedit("endif"); state(IS_FALSE_ELSE); } -- GitLab From e26457da0cdb5498724fd46279057d49c95cbba6 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 1 Jul 2018 13:57:13 -0700 Subject: [PATCH 1976/2269] kernfs: Replace strncpy with memcpy commit 166126c1e54d927c2e8efa2702d420e0ce301fd9 upstream. gcc 8.1.0 complains: fs/kernfs/symlink.c:91:3: warning: 'strncpy' output truncated before terminating nul copying as many bytes from a string as its length fs/kernfs/symlink.c: In function 'kernfs_iop_get_link': fs/kernfs/symlink.c:88:14: note: length computed here Using strncpy() is indeed less than perfect since the length of data to be copied has already been determined with strlen(). Replace strncpy() with memcpy() to address the warning and optimize the code a little. Signed-off-by: Guenter Roeck Acked-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/symlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c index 08ccabd7047f3..5145ae2f0572e 100644 --- a/fs/kernfs/symlink.c +++ b/fs/kernfs/symlink.c @@ -88,7 +88,7 @@ static int kernfs_get_target_path(struct kernfs_node *parent, int slen = strlen(kn->name); len -= slen; - strncpy(s + len, kn->name, slen); + memcpy(s + len, kn->name, slen); if (len) s[--len] = '/'; -- GitLab From bb1d0ac3ef02d0e3e893b75933e556e86c915cfe Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Wed, 6 Jun 2018 15:56:54 -0700 Subject: [PATCH 1977/2269] ip_tunnel: Fix name string concatenate in __ip_tunnel_create() commit 000ade8016400d93b4d7c89970d96b8c14773d45 upstream. By passing a limit of 2 bytes to strncat, strncat is limited to writing fewer bytes than what it's supposed to append to the name here. Since the bounds are checked on the line above this, just remove the string bounds checks entirely since they're unneeded. Signed-off-by: Sultan Alsawaf Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_tunnel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 72eee34092ae4..fabc299cb875f 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -261,8 +261,8 @@ static struct net_device *__ip_tunnel_create(struct net *net, } else { if (strlen(ops->kind) > (IFNAMSIZ - 3)) goto failed; - strlcpy(name, ops->kind, IFNAMSIZ); - strncat(name, "%d", 2); + strcpy(name, ops->kind); + strcat(name, "%d"); } ASSERT_RTNL(); -- GitLab From 4e4b875eaf1e615cdaa789c22222565405349145 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Sep 2017 09:47:26 +0200 Subject: [PATCH 1978/2269] drm: gma500: fix logic error commit 67a3b63a54cbe18944191f43d644686731cf30c7 upstream. gcc-8 points out a condition that almost certainly doesn't do what the author had in mind: drivers/gpu/drm/gma500/mdfld_intel_display.c: In function 'mdfldWaitForPipeEnable': drivers/gpu/drm/gma500/mdfld_intel_display.c:102:37: error: bitwise comparison always evaluates to false [-Werror=tautological-compare] This changes it to a simple bit mask operation to check whether the bit is set. Fixes: 026abc333205 ("gma500: initial medfield merge") Signed-off-by: Arnd Bergmann Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20170905074741.435324-1-arnd@arndb.de Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/gma500/mdfld_intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/gma500/mdfld_intel_display.c b/drivers/gpu/drm/gma500/mdfld_intel_display.c index 531e4450c0009..5c066448be5b7 100644 --- a/drivers/gpu/drm/gma500/mdfld_intel_display.c +++ b/drivers/gpu/drm/gma500/mdfld_intel_display.c @@ -99,7 +99,7 @@ void mdfldWaitForPipeEnable(struct drm_device *dev, int pipe) /* Wait for for the pipe enable to take effect. */ for (count = 0; count < COUNT_MAX; count++) { temp = REG_READ(map->conf); - if ((temp & PIPEACONF_PIPE_STATE) == 1) + if (temp & PIPEACONF_PIPE_STATE) break; } } -- GitLab From 94d6d9e5f3c2532c1f13521a58ac9f120f30c4ba Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Dec 2017 15:47:00 +0100 Subject: [PATCH 1979/2269] scsi: bfa: convert to strlcpy/strlcat commit 8c5a50e8e7ad812a62f7ccf28d9a5e74fddf3000 upstream. The bfa driver has a number of real issues with string termination that gcc-8 now points out: drivers/scsi/bfa/bfad_bsg.c: In function 'bfad_iocmd_port_get_attr': drivers/scsi/bfa/bfad_bsg.c:320:9: error: argument to 'sizeof' in 'strncpy' call is the same expression as the source; did you mean to use the size of the destination? [-Werror=sizeof-pointer-memaccess] drivers/scsi/bfa/bfa_fcs.c: In function 'bfa_fcs_fabric_psymb_init': drivers/scsi/bfa/bfa_fcs.c:775:9: error: argument to 'sizeof' in 'strncat' call is the same expression as the source; did you mean to use the size of the destination? [-Werror=sizeof-pointer-memaccess] drivers/scsi/bfa/bfa_fcs.c:781:9: error: argument to 'sizeof' in 'strncat' call is the same expression as the source; did you mean to use the size of the destination? [-Werror=sizeof-pointer-memaccess] drivers/scsi/bfa/bfa_fcs.c:788:9: error: argument to 'sizeof' in 'strncat' call is the same expression as the source; did you mean to use the size of the destination? [-Werror=sizeof-pointer-memaccess] drivers/scsi/bfa/bfa_fcs.c:801:10: error: argument to 'sizeof' in 'strncat' call is the same expression as the source; did you mean to use the size of the destination? [-Werror=sizeof-pointer-memaccess] drivers/scsi/bfa/bfa_fcs.c:808:10: error: argument to 'sizeof' in 'strncat' call is the same expression as the source; did you mean to use the size of the destination? [-Werror=sizeof-pointer-memaccess] drivers/scsi/bfa/bfa_fcs.c: In function 'bfa_fcs_fabric_nsymb_init': drivers/scsi/bfa/bfa_fcs.c:837:10: error: argument to 'sizeof' in 'strncat' call is the same expression as the source; did you mean to use the size of the destination? [-Werror=sizeof-pointer-memaccess] drivers/scsi/bfa/bfa_fcs.c:844:10: error: argument to 'sizeof' in 'strncat' call is the same expression as the source; did you mean to use the size of the destination? [-Werror=sizeof-pointer-memaccess] drivers/scsi/bfa/bfa_fcs.c:852:10: error: argument to 'sizeof' in 'strncat' call is the same expression as the source; did you mean to use the size of the destination? [-Werror=sizeof-pointer-memaccess] drivers/scsi/bfa/bfa_fcs.c: In function 'bfa_fcs_fabric_psymb_init': drivers/scsi/bfa/bfa_fcs.c:778:2: error: 'strncat' output may be truncated copying 10 bytes from a string of length 63 [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcs.c:784:2: error: 'strncat' output may be truncated copying 30 bytes from a string of length 63 [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcs.c:803:3: error: 'strncat' output may be truncated copying 44 bytes from a string of length 63 [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcs.c:811:3: error: 'strncat' output may be truncated copying 16 bytes from a string of length 63 [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcs.c: In function 'bfa_fcs_fabric_nsymb_init': drivers/scsi/bfa/bfa_fcs.c:840:2: error: 'strncat' output may be truncated copying 10 bytes from a string of length 63 [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcs.c:847:2: error: 'strncat' output may be truncated copying 30 bytes from a string of length 63 [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcs_lport.c: In function 'bfa_fcs_fdmi_get_hbaattr': drivers/scsi/bfa/bfa_fcs_lport.c:2657:10: error: argument to 'sizeof' in 'strncat' call is the same expression as the source; did you mean to use the size of the destination? [-Werror=sizeof-pointer-memaccess] drivers/scsi/bfa/bfa_fcs_lport.c:2659:11: error: argument to 'sizeof' in 'strncat' call is the same expression as the source; did you mean to use the size of the destination? [-Werror=sizeof-pointer-memaccess] drivers/scsi/bfa/bfa_fcs_lport.c: In function 'bfa_fcs_lport_ms_gmal_response': drivers/scsi/bfa/bfa_fcs_lport.c:3232:5: error: 'strncpy' output may be truncated copying 16 bytes from a string of length 247 [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcs_lport.c: In function 'bfa_fcs_lport_ns_send_rspn_id': drivers/scsi/bfa/bfa_fcs_lport.c:4670:3: error: 'strncpy' output truncated before terminating nul copying as many bytes from a string as its length [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcs_lport.c:4682:3: error: 'strncat' output truncated before terminating nul copying as many bytes from a string as its length [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcs_lport.c: In function 'bfa_fcs_lport_ns_util_send_rspn_id': drivers/scsi/bfa/bfa_fcs_lport.c:5206:3: error: 'strncpy' output truncated before terminating nul copying as many bytes from a string as its length [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcs_lport.c:5215:3: error: 'strncat' output truncated before terminating nul copying as many bytes from a string as its length [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcs_lport.c: In function 'bfa_fcs_fdmi_get_portattr': drivers/scsi/bfa/bfa_fcs_lport.c:2751:2: error: 'strncpy' specified bound 128 equals destination size [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcbuild.c: In function 'fc_rspnid_build': drivers/scsi/bfa/bfa_fcbuild.c:1254:2: error: 'strncpy' output truncated before terminating nul copying as many bytes from a string as its length [-Werror=stringop-truncation] drivers/scsi/bfa/bfa_fcbuild.c:1253:25: note: length computed here drivers/scsi/bfa/bfa_fcbuild.c: In function 'fc_rsnn_nn_build': drivers/scsi/bfa/bfa_fcbuild.c:1275:2: error: 'strncpy' output truncated before terminating nul copying as many bytes from a string as its length [-Werror=stringop-truncation] In most cases, this can be addressed by correctly calling strlcpy and strlcat instead of strncpy/strncat, with the size of the destination buffer as the last argument. For consistency, I'm changing the other callers of strncpy() in this driver the same way. Signed-off-by: Arnd Bergmann Reviewed-by: Johannes Thumshirn Acked-by: Sudarsana Kalluru Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/bfa/bfa_fcbuild.c | 8 ++-- drivers/scsi/bfa/bfa_fcs.c | 78 ++++++++++++++++---------------- drivers/scsi/bfa/bfa_fcs_lport.c | 62 ++++++++++--------------- drivers/scsi/bfa/bfa_ioc.c | 2 +- drivers/scsi/bfa/bfa_svc.c | 4 +- drivers/scsi/bfa/bfad.c | 20 ++++---- drivers/scsi/bfa/bfad_attr.c | 2 +- drivers/scsi/bfa/bfad_bsg.c | 6 +-- 8 files changed, 84 insertions(+), 98 deletions(-) diff --git a/drivers/scsi/bfa/bfa_fcbuild.c b/drivers/scsi/bfa/bfa_fcbuild.c index b8dadc9cc9935..d3b00a475aebb 100644 --- a/drivers/scsi/bfa/bfa_fcbuild.c +++ b/drivers/scsi/bfa/bfa_fcbuild.c @@ -1250,8 +1250,8 @@ fc_rspnid_build(struct fchs_s *fchs, void *pyld, u32 s_id, u16 ox_id, memset(rspnid, 0, sizeof(struct fcgs_rspnid_req_s)); rspnid->dap = s_id; - rspnid->spn_len = (u8) strlen((char *)name); - strncpy((char *)rspnid->spn, (char *)name, rspnid->spn_len); + strlcpy(rspnid->spn, name, sizeof(rspnid->spn)); + rspnid->spn_len = (u8) strlen(rspnid->spn); return sizeof(struct fcgs_rspnid_req_s) + sizeof(struct ct_hdr_s); } @@ -1271,8 +1271,8 @@ fc_rsnn_nn_build(struct fchs_s *fchs, void *pyld, u32 s_id, memset(rsnn_nn, 0, sizeof(struct fcgs_rsnn_nn_req_s)); rsnn_nn->node_name = node_name; - rsnn_nn->snn_len = (u8) strlen((char *)name); - strncpy((char *)rsnn_nn->snn, (char *)name, rsnn_nn->snn_len); + strlcpy(rsnn_nn->snn, name, sizeof(rsnn_nn->snn)); + rsnn_nn->snn_len = (u8) strlen(rsnn_nn->snn); return sizeof(struct fcgs_rsnn_nn_req_s) + sizeof(struct ct_hdr_s); } diff --git a/drivers/scsi/bfa/bfa_fcs.c b/drivers/scsi/bfa/bfa_fcs.c index 4aa61e20e82d7..932feb0ed4da1 100644 --- a/drivers/scsi/bfa/bfa_fcs.c +++ b/drivers/scsi/bfa/bfa_fcs.c @@ -769,23 +769,23 @@ bfa_fcs_fabric_psymb_init(struct bfa_fcs_fabric_s *fabric) bfa_ioc_get_adapter_model(&fabric->fcs->bfa->ioc, model); /* Model name/number */ - strncpy((char *)&port_cfg->sym_name, model, - BFA_FCS_PORT_SYMBNAME_MODEL_SZ); - strncat((char *)&port_cfg->sym_name, BFA_FCS_PORT_SYMBNAME_SEPARATOR, - sizeof(BFA_FCS_PORT_SYMBNAME_SEPARATOR)); + strlcpy(port_cfg->sym_name.symname, model, + BFA_SYMNAME_MAXLEN); + strlcat(port_cfg->sym_name.symname, BFA_FCS_PORT_SYMBNAME_SEPARATOR, + BFA_SYMNAME_MAXLEN); /* Driver Version */ - strncat((char *)&port_cfg->sym_name, (char *)driver_info->version, - BFA_FCS_PORT_SYMBNAME_VERSION_SZ); - strncat((char *)&port_cfg->sym_name, BFA_FCS_PORT_SYMBNAME_SEPARATOR, - sizeof(BFA_FCS_PORT_SYMBNAME_SEPARATOR)); + strlcat(port_cfg->sym_name.symname, driver_info->version, + BFA_SYMNAME_MAXLEN); + strlcat(port_cfg->sym_name.symname, BFA_FCS_PORT_SYMBNAME_SEPARATOR, + BFA_SYMNAME_MAXLEN); /* Host machine name */ - strncat((char *)&port_cfg->sym_name, - (char *)driver_info->host_machine_name, - BFA_FCS_PORT_SYMBNAME_MACHINENAME_SZ); - strncat((char *)&port_cfg->sym_name, BFA_FCS_PORT_SYMBNAME_SEPARATOR, - sizeof(BFA_FCS_PORT_SYMBNAME_SEPARATOR)); + strlcat(port_cfg->sym_name.symname, + driver_info->host_machine_name, + BFA_SYMNAME_MAXLEN); + strlcat(port_cfg->sym_name.symname, BFA_FCS_PORT_SYMBNAME_SEPARATOR, + BFA_SYMNAME_MAXLEN); /* * Host OS Info : @@ -793,24 +793,24 @@ bfa_fcs_fabric_psymb_init(struct bfa_fcs_fabric_s *fabric) * OS name string and instead copy the entire OS info string (64 bytes). */ if (driver_info->host_os_patch[0] == '\0') { - strncat((char *)&port_cfg->sym_name, - (char *)driver_info->host_os_name, - BFA_FCS_OS_STR_LEN); - strncat((char *)&port_cfg->sym_name, + strlcat(port_cfg->sym_name.symname, + driver_info->host_os_name, + BFA_SYMNAME_MAXLEN); + strlcat(port_cfg->sym_name.symname, BFA_FCS_PORT_SYMBNAME_SEPARATOR, - sizeof(BFA_FCS_PORT_SYMBNAME_SEPARATOR)); + BFA_SYMNAME_MAXLEN); } else { - strncat((char *)&port_cfg->sym_name, - (char *)driver_info->host_os_name, - BFA_FCS_PORT_SYMBNAME_OSINFO_SZ); - strncat((char *)&port_cfg->sym_name, + strlcat(port_cfg->sym_name.symname, + driver_info->host_os_name, + BFA_SYMNAME_MAXLEN); + strlcat(port_cfg->sym_name.symname, BFA_FCS_PORT_SYMBNAME_SEPARATOR, - sizeof(BFA_FCS_PORT_SYMBNAME_SEPARATOR)); + BFA_SYMNAME_MAXLEN); /* Append host OS Patch Info */ - strncat((char *)&port_cfg->sym_name, - (char *)driver_info->host_os_patch, - BFA_FCS_PORT_SYMBNAME_OSPATCH_SZ); + strlcat(port_cfg->sym_name.symname, + driver_info->host_os_patch, + BFA_SYMNAME_MAXLEN); } /* null terminate */ @@ -830,26 +830,26 @@ bfa_fcs_fabric_nsymb_init(struct bfa_fcs_fabric_s *fabric) bfa_ioc_get_adapter_model(&fabric->fcs->bfa->ioc, model); /* Model name/number */ - strncpy((char *)&port_cfg->node_sym_name, model, - BFA_FCS_PORT_SYMBNAME_MODEL_SZ); - strncat((char *)&port_cfg->node_sym_name, + strlcpy(port_cfg->node_sym_name.symname, model, + BFA_SYMNAME_MAXLEN); + strlcat(port_cfg->node_sym_name.symname, BFA_FCS_PORT_SYMBNAME_SEPARATOR, - sizeof(BFA_FCS_PORT_SYMBNAME_SEPARATOR)); + BFA_SYMNAME_MAXLEN); /* Driver Version */ - strncat((char *)&port_cfg->node_sym_name, (char *)driver_info->version, - BFA_FCS_PORT_SYMBNAME_VERSION_SZ); - strncat((char *)&port_cfg->node_sym_name, + strlcat(port_cfg->node_sym_name.symname, (char *)driver_info->version, + BFA_SYMNAME_MAXLEN); + strlcat(port_cfg->node_sym_name.symname, BFA_FCS_PORT_SYMBNAME_SEPARATOR, - sizeof(BFA_FCS_PORT_SYMBNAME_SEPARATOR)); + BFA_SYMNAME_MAXLEN); /* Host machine name */ - strncat((char *)&port_cfg->node_sym_name, - (char *)driver_info->host_machine_name, - BFA_FCS_PORT_SYMBNAME_MACHINENAME_SZ); - strncat((char *)&port_cfg->node_sym_name, + strlcat(port_cfg->node_sym_name.symname, + driver_info->host_machine_name, + BFA_SYMNAME_MAXLEN); + strlcat(port_cfg->node_sym_name.symname, BFA_FCS_PORT_SYMBNAME_SEPARATOR, - sizeof(BFA_FCS_PORT_SYMBNAME_SEPARATOR)); + BFA_SYMNAME_MAXLEN); /* null terminate */ port_cfg->node_sym_name.symname[BFA_SYMNAME_MAXLEN - 1] = 0; diff --git a/drivers/scsi/bfa/bfa_fcs_lport.c b/drivers/scsi/bfa/bfa_fcs_lport.c index 638c0a2857f79..b4f2c1d8742e8 100644 --- a/drivers/scsi/bfa/bfa_fcs_lport.c +++ b/drivers/scsi/bfa/bfa_fcs_lport.c @@ -2642,10 +2642,10 @@ bfa_fcs_fdmi_get_hbaattr(struct bfa_fcs_lport_fdmi_s *fdmi, bfa_ioc_get_adapter_fw_ver(&port->fcs->bfa->ioc, hba_attr->fw_version); - strncpy(hba_attr->driver_version, (char *)driver_info->version, + strlcpy(hba_attr->driver_version, (char *)driver_info->version, sizeof(hba_attr->driver_version)); - strncpy(hba_attr->os_name, driver_info->host_os_name, + strlcpy(hba_attr->os_name, driver_info->host_os_name, sizeof(hba_attr->os_name)); /* @@ -2653,23 +2653,23 @@ bfa_fcs_fdmi_get_hbaattr(struct bfa_fcs_lport_fdmi_s *fdmi, * to the os name along with a separator */ if (driver_info->host_os_patch[0] != '\0') { - strncat(hba_attr->os_name, BFA_FCS_PORT_SYMBNAME_SEPARATOR, - sizeof(BFA_FCS_PORT_SYMBNAME_SEPARATOR)); - strncat(hba_attr->os_name, driver_info->host_os_patch, - sizeof(driver_info->host_os_patch)); + strlcat(hba_attr->os_name, BFA_FCS_PORT_SYMBNAME_SEPARATOR, + sizeof(hba_attr->os_name)); + strlcat(hba_attr->os_name, driver_info->host_os_patch, + sizeof(hba_attr->os_name)); } /* Retrieve the max frame size from the port attr */ bfa_fcs_fdmi_get_portattr(fdmi, &fcs_port_attr); hba_attr->max_ct_pyld = fcs_port_attr.max_frm_size; - strncpy(hba_attr->node_sym_name.symname, + strlcpy(hba_attr->node_sym_name.symname, port->port_cfg.node_sym_name.symname, BFA_SYMNAME_MAXLEN); strcpy(hba_attr->vendor_info, "QLogic"); hba_attr->num_ports = cpu_to_be32(bfa_ioc_get_nports(&port->fcs->bfa->ioc)); hba_attr->fabric_name = port->fabric->lps->pr_nwwn; - strncpy(hba_attr->bios_ver, hba_attr->option_rom_ver, BFA_VERSION_LEN); + strlcpy(hba_attr->bios_ver, hba_attr->option_rom_ver, BFA_VERSION_LEN); } @@ -2736,20 +2736,20 @@ bfa_fcs_fdmi_get_portattr(struct bfa_fcs_lport_fdmi_s *fdmi, /* * OS device Name */ - strncpy(port_attr->os_device_name, (char *)driver_info->os_device_name, + strlcpy(port_attr->os_device_name, driver_info->os_device_name, sizeof(port_attr->os_device_name)); /* * Host name */ - strncpy(port_attr->host_name, (char *)driver_info->host_machine_name, + strlcpy(port_attr->host_name, driver_info->host_machine_name, sizeof(port_attr->host_name)); port_attr->node_name = bfa_fcs_lport_get_nwwn(port); port_attr->port_name = bfa_fcs_lport_get_pwwn(port); - strncpy(port_attr->port_sym_name.symname, - (char *)&bfa_fcs_lport_get_psym_name(port), BFA_SYMNAME_MAXLEN); + strlcpy(port_attr->port_sym_name.symname, + bfa_fcs_lport_get_psym_name(port).symname, BFA_SYMNAME_MAXLEN); bfa_fcs_lport_get_attr(port, &lport_attr); port_attr->port_type = cpu_to_be32(lport_attr.port_type); port_attr->scos = pport_attr.cos_supported; @@ -3229,7 +3229,7 @@ bfa_fcs_lport_ms_gmal_response(void *fcsarg, struct bfa_fcxp_s *fcxp, rsp_str[gmal_entry->len-1] = 0; /* copy IP Address to fabric */ - strncpy(bfa_fcs_lport_get_fabric_ipaddr(port), + strlcpy(bfa_fcs_lport_get_fabric_ipaddr(port), gmal_entry->ip_addr, BFA_FCS_FABRIC_IPADDR_SZ); break; @@ -4667,21 +4667,13 @@ bfa_fcs_lport_ns_send_rspn_id(void *ns_cbarg, struct bfa_fcxp_s *fcxp_alloced) * to that of the base port. */ - strncpy((char *)psymbl, - (char *) & - (bfa_fcs_lport_get_psym_name + strlcpy(symbl, + (char *)&(bfa_fcs_lport_get_psym_name (bfa_fcs_get_base_port(port->fcs))), - strlen((char *) & - bfa_fcs_lport_get_psym_name(bfa_fcs_get_base_port - (port->fcs)))); - - /* Ensure we have a null terminating string. */ - ((char *)psymbl)[strlen((char *) & - bfa_fcs_lport_get_psym_name(bfa_fcs_get_base_port - (port->fcs)))] = 0; - strncat((char *)psymbl, - (char *) &(bfa_fcs_lport_get_psym_name(port)), - strlen((char *) &bfa_fcs_lport_get_psym_name(port))); + sizeof(symbl)); + + strlcat(symbl, (char *)&(bfa_fcs_lport_get_psym_name(port)), + sizeof(symbl)); } else { psymbl = (u8 *) &(bfa_fcs_lport_get_psym_name(port)); } @@ -5173,7 +5165,6 @@ bfa_fcs_lport_ns_util_send_rspn_id(void *cbarg, struct bfa_fcxp_s *fcxp_alloced) struct fchs_s fchs; struct bfa_fcxp_s *fcxp; u8 symbl[256]; - u8 *psymbl = &symbl[0]; int len; /* Avoid sending RSPN in the following states. */ @@ -5203,22 +5194,17 @@ bfa_fcs_lport_ns_util_send_rspn_id(void *cbarg, struct bfa_fcxp_s *fcxp_alloced) * For Vports, we append the vport's port symbolic name * to that of the base port. */ - strncpy((char *)psymbl, (char *)&(bfa_fcs_lport_get_psym_name + strlcpy(symbl, (char *)&(bfa_fcs_lport_get_psym_name (bfa_fcs_get_base_port(port->fcs))), - strlen((char *)&bfa_fcs_lport_get_psym_name( - bfa_fcs_get_base_port(port->fcs)))); - - /* Ensure we have a null terminating string. */ - ((char *)psymbl)[strlen((char *)&bfa_fcs_lport_get_psym_name( - bfa_fcs_get_base_port(port->fcs)))] = 0; + sizeof(symbl)); - strncat((char *)psymbl, + strlcat(symbl, (char *)&(bfa_fcs_lport_get_psym_name(port)), - strlen((char *)&bfa_fcs_lport_get_psym_name(port))); + sizeof(symbl)); } len = fc_rspnid_build(&fchs, bfa_fcxp_get_reqbuf(fcxp), - bfa_fcs_lport_get_fcid(port), 0, psymbl); + bfa_fcs_lport_get_fcid(port), 0, symbl); bfa_fcxp_send(fcxp, NULL, port->fabric->vf_id, port->lp_tag, BFA_FALSE, FC_CLASS_3, len, &fchs, NULL, NULL, FC_MAX_PDUSZ, 0); diff --git a/drivers/scsi/bfa/bfa_ioc.c b/drivers/scsi/bfa/bfa_ioc.c index 256f4afaccf96..a1a183ece0936 100644 --- a/drivers/scsi/bfa/bfa_ioc.c +++ b/drivers/scsi/bfa/bfa_ioc.c @@ -2803,7 +2803,7 @@ void bfa_ioc_get_adapter_manufacturer(struct bfa_ioc_s *ioc, char *manufacturer) { memset((void *)manufacturer, 0, BFA_ADAPTER_MFG_NAME_LEN); - strncpy(manufacturer, BFA_MFG_NAME, BFA_ADAPTER_MFG_NAME_LEN); + strlcpy(manufacturer, BFA_MFG_NAME, BFA_ADAPTER_MFG_NAME_LEN); } void diff --git a/drivers/scsi/bfa/bfa_svc.c b/drivers/scsi/bfa/bfa_svc.c index e640223bab3c6..7356fdec79f56 100644 --- a/drivers/scsi/bfa/bfa_svc.c +++ b/drivers/scsi/bfa/bfa_svc.c @@ -350,8 +350,8 @@ bfa_plog_str(struct bfa_plog_s *plog, enum bfa_plog_mid mid, lp.eid = event; lp.log_type = BFA_PL_LOG_TYPE_STRING; lp.misc = misc; - strncpy(lp.log_entry.string_log, log_str, - BFA_PL_STRING_LOG_SZ - 1); + strlcpy(lp.log_entry.string_log, log_str, + BFA_PL_STRING_LOG_SZ); lp.log_entry.string_log[BFA_PL_STRING_LOG_SZ - 1] = '\0'; bfa_plog_add(plog, &lp); } diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c index 5caf5f3ff6422..ae37010af50fe 100644 --- a/drivers/scsi/bfa/bfad.c +++ b/drivers/scsi/bfa/bfad.c @@ -983,20 +983,20 @@ bfad_start_ops(struct bfad_s *bfad) { /* Fill the driver_info info to fcs*/ memset(&driver_info, 0, sizeof(driver_info)); - strncpy(driver_info.version, BFAD_DRIVER_VERSION, - sizeof(driver_info.version) - 1); + strlcpy(driver_info.version, BFAD_DRIVER_VERSION, + sizeof(driver_info.version)); if (host_name) - strncpy(driver_info.host_machine_name, host_name, - sizeof(driver_info.host_machine_name) - 1); + strlcpy(driver_info.host_machine_name, host_name, + sizeof(driver_info.host_machine_name)); if (os_name) - strncpy(driver_info.host_os_name, os_name, - sizeof(driver_info.host_os_name) - 1); + strlcpy(driver_info.host_os_name, os_name, + sizeof(driver_info.host_os_name)); if (os_patch) - strncpy(driver_info.host_os_patch, os_patch, - sizeof(driver_info.host_os_patch) - 1); + strlcpy(driver_info.host_os_patch, os_patch, + sizeof(driver_info.host_os_patch)); - strncpy(driver_info.os_device_name, bfad->pci_name, - sizeof(driver_info.os_device_name) - 1); + strlcpy(driver_info.os_device_name, bfad->pci_name, + sizeof(driver_info.os_device_name)); /* FCS driver info init */ spin_lock_irqsave(&bfad->bfad_lock, flags); diff --git a/drivers/scsi/bfa/bfad_attr.c b/drivers/scsi/bfa/bfad_attr.c index 13db3b7bc8737..d0a504af5b4f8 100644 --- a/drivers/scsi/bfa/bfad_attr.c +++ b/drivers/scsi/bfa/bfad_attr.c @@ -843,7 +843,7 @@ bfad_im_symbolic_name_show(struct device *dev, struct device_attribute *attr, char symname[BFA_SYMNAME_MAXLEN]; bfa_fcs_lport_get_attr(&bfad->bfa_fcs.fabric.bport, &port_attr); - strncpy(symname, port_attr.port_cfg.sym_name.symname, + strlcpy(symname, port_attr.port_cfg.sym_name.symname, BFA_SYMNAME_MAXLEN); return snprintf(buf, PAGE_SIZE, "%s\n", symname); } diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c index 1aa46d0763a07..9081a5f93aae4 100644 --- a/drivers/scsi/bfa/bfad_bsg.c +++ b/drivers/scsi/bfa/bfad_bsg.c @@ -127,7 +127,7 @@ bfad_iocmd_ioc_get_attr(struct bfad_s *bfad, void *cmd) /* fill in driver attr info */ strcpy(iocmd->ioc_attr.driver_attr.driver, BFAD_DRIVER_NAME); - strncpy(iocmd->ioc_attr.driver_attr.driver_ver, + strlcpy(iocmd->ioc_attr.driver_attr.driver_ver, BFAD_DRIVER_VERSION, BFA_VERSION_LEN); strcpy(iocmd->ioc_attr.driver_attr.fw_ver, iocmd->ioc_attr.adapter_attr.fw_ver); @@ -315,9 +315,9 @@ bfad_iocmd_port_get_attr(struct bfad_s *bfad, void *cmd) iocmd->attr.port_type = port_attr.port_type; iocmd->attr.loopback = port_attr.loopback; iocmd->attr.authfail = port_attr.authfail; - strncpy(iocmd->attr.port_symname.symname, + strlcpy(iocmd->attr.port_symname.symname, port_attr.port_cfg.sym_name.symname, - sizeof(port_attr.port_cfg.sym_name.symname)); + sizeof(iocmd->attr.port_symname.symname)); iocmd->status = BFA_STATUS_OK; return 0; -- GitLab From 46762a64c0b03fc9cab3406bf2b3bbea043cf6e8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Sep 2017 09:33:32 +0200 Subject: [PATCH 1980/2269] staging: rts5208: fix gcc-8 logic error warning commit 58930cced012adb01bc78b3687049b17ef44d0a3 upstream. As gcc-8 points out, the bit mask check makes no sense here: drivers/staging/rts5208/sd.c: In function 'ext_sd_send_cmd_get_rsp': drivers/staging/rts5208/sd.c:4130:25: error: bitwise comparison always evaluates to true [-Werror=tautological-compare] However, the code is even more bogus, as we have already checked for the SD_RSP_TYPE_R0 case earlier in the function and returned success. As seen in the mmc/sd driver core, SD_RSP_TYPE_R0 means "no response" anyway, so checking for a particular response would not help either. This just removes the nonsensical code to get rid of the warning. Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rts5208/sd.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/staging/rts5208/sd.c b/drivers/staging/rts5208/sd.c index d98d5fe25a17d..0421dd9277a88 100644 --- a/drivers/staging/rts5208/sd.c +++ b/drivers/staging/rts5208/sd.c @@ -4125,12 +4125,6 @@ RTY_SEND_CMD: rtsx_trace(chip); return STATUS_FAIL; } - - } else if (rsp_type == SD_RSP_TYPE_R0) { - if ((ptr[3] & 0x1E) != 0x03) { - rtsx_trace(chip); - return STATUS_FAIL; - } } } } -- GitLab From 184adf40d184ff2761923e7f2f64a12702843ad8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Feb 2018 15:59:40 +0100 Subject: [PATCH 1981/2269] kdb: use memmove instead of overlapping memcpy commit 2cf2f0d5b91fd1b06a6ae260462fc7945ea84add upstream. gcc discovered that the memcpy() arguments in kdbnearsym() overlap, so we should really use memmove(), which is defined to handle that correctly: In function 'memcpy', inlined from 'kdbnearsym' at /git/arm-soc/kernel/debug/kdb/kdb_support.c:132:4: /git/arm-soc/include/linux/string.h:353:9: error: '__builtin_memcpy' accessing 792 bytes at offsets 0 and 8 overlaps 784 bytes at offset 8 [-Werror=restrict] return __builtin_memcpy(p, q, size); Signed-off-by: Arnd Bergmann Signed-off-by: Jason Wessel Signed-off-by: Greg Kroah-Hartman --- kernel/debug/kdb/kdb_support.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index 014f6fbb38327..b14b0925c1848 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -129,13 +129,13 @@ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab) } if (i >= ARRAY_SIZE(kdb_name_table)) { debug_kfree(kdb_name_table[0]); - memcpy(kdb_name_table, kdb_name_table+1, + memmove(kdb_name_table, kdb_name_table+1, sizeof(kdb_name_table[0]) * (ARRAY_SIZE(kdb_name_table)-1)); } else { debug_kfree(knt1); knt1 = kdb_name_table[i]; - memcpy(kdb_name_table+i, kdb_name_table+i+1, + memmove(kdb_name_table+i, kdb_name_table+i+1, sizeof(kdb_name_table[0]) * (ARRAY_SIZE(kdb_name_table)-i-1)); } -- GitLab From 48c11537ee6d6731d1e24343150e41057f593a8b Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 14 Nov 2018 10:17:01 -0800 Subject: [PATCH 1982/2269] iser: set sector for ambiguous mr status errors commit 24c3456c8d5ee6fc1933ca40f7b4406130682668 upstream. If for some reason we failed to query the mr status, we need to make sure to provide sufficient information for an ambiguous error (guard error on sector 0). Fixes: 0a7a08ad6f5f ("IB/iser: Implement check_protection") Cc: Reported-by: Dan Carpenter Signed-off-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/iser/iser_verbs.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 55a73b0ed4c66..e28a5d713d1a9 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -1108,7 +1108,9 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, IB_MR_CHECK_SIG_STATUS, &mr_status); if (ret) { pr_err("ib_check_mr_status failed, ret %d\n", ret); - goto err; + /* Not a lot we can do, return ambiguous guard error */ + *sector = 0; + return 0x1; } if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { @@ -1136,9 +1138,6 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, } return 0; -err: - /* Not alot we can do here, return ambiguous guard error */ - return 0x1; } void iser_err_comp(struct ib_wc *wc, const char *type) -- GitLab From 8d3ab1ccc063e618ef1a2711e9da9ce9e7d27396 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Thu, 22 Nov 2018 17:10:31 +0100 Subject: [PATCH 1983/2269] uprobes: Fix handle_swbp() vs. unregister() + register() race once more commit 09d3f015d1e1b4fee7e9bbdcf54201d239393391 upstream. Commit: 142b18ddc8143 ("uprobes: Fix handle_swbp() vs unregister() + register() race") added the UPROBE_COPY_INSN flag, and corresponding smp_wmb() and smp_rmb() memory barriers, to ensure that handle_swbp() uses fully-initialized uprobes only. However, the smp_rmb() is mis-placed: this barrier should be placed after handle_swbp() has tested for the flag, thus guaranteeing that (program-order) subsequent loads from the uprobe can see the initial stores performed by prepare_uprobe(). Move the smp_rmb() accordingly. Also amend the comments associated to the two memory barriers to indicate their actual locations. Signed-off-by: Andrea Parri Acked-by: Oleg Nesterov Cc: Alexander Shishkin Cc: Andrew Morton Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: stable@kernel.org Fixes: 142b18ddc8143 ("uprobes: Fix handle_swbp() vs unregister() + register() race") Link: http://lkml.kernel.org/r/20181122161031.15179-1-andrea.parri@amarulasolutions.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/uprobes.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 267f6ef91d970..01941cffa9c2f 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -616,7 +616,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, BUG_ON((uprobe->offset & ~PAGE_MASK) + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); - smp_wmb(); /* pairs with rmb() in find_active_uprobe() */ + smp_wmb(); /* pairs with the smp_rmb() in handle_swbp() */ set_bit(UPROBE_COPY_INSN, &uprobe->flags); out: @@ -1910,10 +1910,18 @@ static void handle_swbp(struct pt_regs *regs) * After we hit the bp, _unregister + _register can install the * new and not-yet-analyzed uprobe at the same address, restart. */ - smp_rmb(); /* pairs with wmb() in install_breakpoint() */ if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags))) goto out; + /* + * Pairs with the smp_wmb() in prepare_uprobe(). + * + * Guarantees that if we see the UPROBE_COPY_INSN bit set, then + * we must also see the stores to &uprobe->arch performed by the + * prepare_uprobe() call. + */ + smp_rmb(); + /* Tracing handlers use ->utask to communicate with fetch methods */ if (!get_utask()) goto out; -- GitLab From 9aab50aec56772ff6e1a2dead6097cf2921fe944 Mon Sep 17 00:00:00 2001 From: Mathias Kresin Date: Mon, 26 Nov 2018 11:25:40 +0100 Subject: [PATCH 1984/2269] MIPS: ralink: Fix mt7620 nd_sd pinmux commit 7d35baa4e9ec4b717bc0e58a39cdb6a1c50f5465 upstream. In case the nd_sd group is set to the sd-card function, Pins 45 + 46 are configured as GPIOs. If they are blocked by the sd function, they can't be used as GPIOs. Reported-by: Kristian Evensen Signed-off-by: Mathias Kresin Signed-off-by: Paul Burton Fixes: f576fb6a0700 ("MIPS: ralink: cleanup the soc specific pinmux data") Patchwork: https://patchwork.linux-mips.org/patch/21220/ Cc: John Crispin Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/ralink/mt7620.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/ralink/mt7620.c b/arch/mips/ralink/mt7620.c index 41b71c4352c25..c1ce6f43642bc 100644 --- a/arch/mips/ralink/mt7620.c +++ b/arch/mips/ralink/mt7620.c @@ -84,7 +84,7 @@ static struct rt2880_pmx_func pcie_rst_grp[] = { }; static struct rt2880_pmx_func nd_sd_grp[] = { FUNC("nand", MT7620_GPIO_MODE_NAND, 45, 15), - FUNC("sd", MT7620_GPIO_MODE_SD, 45, 15) + FUNC("sd", MT7620_GPIO_MODE_SD, 47, 13) }; static struct rt2880_pmx_group mt7620a_pinmux_data[] = { -- GitLab From d3c0999433352dacda6da37c237388bfa9bae1c2 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Wed, 21 Nov 2018 22:14:39 +0300 Subject: [PATCH 1985/2269] mips: fix mips_get_syscall_arg o32 check commit c50cbd85cd7027d32ac5945bb60217936b4f7eaf upstream. When checking for TIF_32BIT_REGS flag, mips_get_syscall_arg() should use the task specified as its argument instead of the current task. This potentially affects all syscall_get_arguments() users who specify tasks different from the current. Fixes: c0ff3c53d4f99 ("MIPS: Enable HAVE_ARCH_TRACEHOOK.") Signed-off-by: Dmitry V. Levin Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/21185/ Cc: Elvira Khabirova Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v3.13+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/syscall.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h index 7c713025b23f6..53ee82b1efa75 100644 --- a/arch/mips/include/asm/syscall.h +++ b/arch/mips/include/asm/syscall.h @@ -51,7 +51,7 @@ static inline unsigned long mips_get_syscall_arg(unsigned long *arg, #ifdef CONFIG_64BIT case 4: case 5: case 6: case 7: #ifdef CONFIG_MIPS32_O32 - if (test_thread_flag(TIF_32BIT_REGS)) + if (test_tsk_thread_flag(task, TIF_32BIT_REGS)) return get_user(*arg, (int *)usp + n); else #endif -- GitLab From 53f6341a937be3dad213f5f4b2e188012c024ed2 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Wed, 21 Nov 2018 15:03:54 +0200 Subject: [PATCH 1986/2269] IB/mlx5: Avoid load failure due to unknown link width commit db7a691a1551a748cb92d9c89c6b190ea87e28d5 upstream. If the firmware reports a connection width that is not 1x, 4x, 8x or 12x it causes the driver to fail during initialization. To prevent this failure every time a new width is introduced to the RDMA stack, we will set a default 4x width for these widths which ar unknown to the driver. This is needed to allow to run old kernels with new firmware. Cc: # 4.1 Fixes: 1b5daf11b015 ("IB/mlx5: Avoid using the MAD_IFC command under ISSI > 0 mode") Signed-off-by: Michael Guralnik Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/main.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index c3a4f5d92391f..13a92062e9ca5 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -869,31 +869,26 @@ enum mlx5_ib_width { MLX5_IB_WIDTH_12X = 1 << 4 }; -static int translate_active_width(struct ib_device *ibdev, u8 active_width, +static void translate_active_width(struct ib_device *ibdev, u8 active_width, u8 *ib_width) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - int err = 0; - if (active_width & MLX5_IB_WIDTH_1X) { + if (active_width & MLX5_IB_WIDTH_1X) *ib_width = IB_WIDTH_1X; - } else if (active_width & MLX5_IB_WIDTH_2X) { - mlx5_ib_dbg(dev, "active_width %d is not supported by IB spec\n", - (int)active_width); - err = -EINVAL; - } else if (active_width & MLX5_IB_WIDTH_4X) { + else if (active_width & MLX5_IB_WIDTH_4X) *ib_width = IB_WIDTH_4X; - } else if (active_width & MLX5_IB_WIDTH_8X) { + else if (active_width & MLX5_IB_WIDTH_8X) *ib_width = IB_WIDTH_8X; - } else if (active_width & MLX5_IB_WIDTH_12X) { + else if (active_width & MLX5_IB_WIDTH_12X) *ib_width = IB_WIDTH_12X; - } else { - mlx5_ib_dbg(dev, "Invalid active_width %d\n", + else { + mlx5_ib_dbg(dev, "Invalid active_width %d, setting width to default value: 4x\n", (int)active_width); - err = -EINVAL; + *ib_width = IB_WIDTH_4X; } - return err; + return; } static int mlx5_mtu_to_ib_mtu(int mtu) @@ -1001,10 +996,8 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, if (err) goto out; - err = translate_active_width(ibdev, ib_link_width_oper, - &props->active_width); - if (err) - goto out; + translate_active_width(ibdev, ib_link_width_oper, &props->active_width); + err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port); if (err) goto out; -- GitLab From 37b927309286a4e4f9bf13aa2f7dbdd001637346 Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Mon, 5 Nov 2018 16:57:47 +1100 Subject: [PATCH 1987/2269] drm/ast: Fix incorrect free on ioregs commit dc25ab067645eabd037f1a23d49a666f9e0b8c68 upstream. If the platform has no IO space, ioregs is placed next to the already allocated regs. In this case, it should not be separately freed. This prevents a kernel warning from __vunmap "Trying to vfree() nonexistent vm area" when unloading the driver. Fixes: 0dd68309b9c5 ("drm/ast: Try to use MMIO registers when PIO isn't supported") Signed-off-by: Sam Bobroff Cc: Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ast/ast_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c index dac355812adcb..373700c05a00f 100644 --- a/drivers/gpu/drm/ast/ast_main.c +++ b/drivers/gpu/drm/ast/ast_main.c @@ -583,7 +583,8 @@ void ast_driver_unload(struct drm_device *dev) drm_mode_config_cleanup(dev); ast_mm_fini(ast); - pci_iounmap(dev->pdev, ast->ioregs); + if (ast->ioregs != ast->regs + AST_IO_MM_OFFSET) + pci_iounmap(dev->pdev, ast->ioregs); pci_iounmap(dev->pdev, ast->regs); kfree(ast); } -- GitLab From 4e97ad08e1e3f4e9dbd5884b1dd7c4fdbe038eb1 Mon Sep 17 00:00:00 2001 From: Sergio Correia Date: Thu, 22 Nov 2018 02:33:29 -0300 Subject: [PATCH 1988/2269] drm: set is_master to 0 upon drm_new_set_master() failure commit 23a336b34258aba3b50ea6863cca4e81b5ef6384 upstream. When drm_new_set_master() fails, set is_master to 0, to prevent a possible NULL pointer deref. Here is a problematic flow: we check is_master in drm_is_current_master(), then proceed to call drm_lease_owner() passing master. If we do not restore is_master status when drm_new_set_master() fails, we may have a situation in which is_master will be 1 and master itself, NULL, leading to the deref of a NULL pointer in drm_lease_owner(). This fixes the following OOPS, observed on an ArchLinux running a 4.19.2 kernel: [ 97.804282] BUG: unable to handle kernel NULL pointer dereference at 0000000000000080 [ 97.807224] PGD 0 P4D 0 [ 97.807224] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 97.807224] CPU: 0 PID: 1348 Comm: xfwm4 Tainted: P OE 4.19.2-arch1-1-ARCH #1 [ 97.807224] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./AB350 Pro4, BIOS P5.10 10/16/2018 [ 97.807224] RIP: 0010:drm_lease_owner+0xd/0x20 [drm] [ 97.807224] Code: 83 c4 18 5b 5d c3 b8 ea ff ff ff eb e2 b8 ed ff ff ff eb db e8 b4 ca 68 fb 0f 1f 40 00 0f 1f 44 00 00 48 89 f8 eb 03 48 89 d0 <48> 8b 90 80 00 00 00 48 85 d2 75 f1 c3 66 0f 1f 44 00 00 0f 1f 44 [ 97.807224] RSP: 0018:ffffb8cf08e07bb0 EFLAGS: 00010202 [ 97.807224] RAX: 0000000000000000 RBX: ffff9cf0f2586c00 RCX: ffff9cf0f2586c88 [ 97.807224] RDX: ffff9cf0ddbd8000 RSI: 0000000000000000 RDI: 0000000000000000 [ 97.807224] RBP: ffff9cf1040e9800 R08: 0000000000000000 R09: 0000000000000000 [ 97.807224] R10: ffffdeb30fd5d680 R11: ffffdeb30f5d6808 R12: ffff9cf1040e9888 [ 97.807224] R13: 0000000000000000 R14: dead000000000200 R15: ffff9cf0f2586cc8 [ 97.807224] FS: 00007f4145513180(0000) GS:ffff9cf10ea00000(0000) knlGS:0000000000000000 [ 97.807224] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 97.807224] CR2: 0000000000000080 CR3: 00000003d7548000 CR4: 00000000003406f0 [ 97.807224] Call Trace: [ 97.807224] drm_is_current_master+0x1a/0x30 [drm] [ 97.807224] drm_master_release+0x3e/0x130 [drm] [ 97.807224] drm_file_free.part.0+0x2be/0x2d0 [drm] [ 97.807224] drm_open+0x1ba/0x1e0 [drm] [ 97.807224] drm_stub_open+0xaf/0xe0 [drm] [ 97.807224] chrdev_open+0xa3/0x1b0 [ 97.807224] ? cdev_put.part.0+0x20/0x20 [ 97.807224] do_dentry_open+0x132/0x340 [ 97.807224] path_openat+0x2d1/0x14e0 [ 97.807224] ? mem_cgroup_commit_charge+0x7a/0x520 [ 97.807224] do_filp_open+0x93/0x100 [ 97.807224] ? __check_object_size+0x102/0x189 [ 97.807224] ? _raw_spin_unlock+0x16/0x30 [ 97.807224] do_sys_open+0x186/0x210 [ 97.807224] do_syscall_64+0x5b/0x170 [ 97.807224] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 97.807224] RIP: 0033:0x7f4147b07976 [ 97.807224] Code: 89 54 24 08 e8 7b f4 ff ff 8b 74 24 0c 48 8b 3c 24 41 89 c0 44 8b 54 24 08 b8 01 01 00 00 89 f2 48 89 fe bf 9c ff ff ff 0f 05 <48> 3d 00 f0 ff ff 77 30 44 89 c7 89 44 24 08 e8 a6 f4 ff ff 8b 44 [ 97.807224] RSP: 002b:00007ffcced96ca0 EFLAGS: 00000293 ORIG_RAX: 0000000000000101 [ 97.807224] RAX: ffffffffffffffda RBX: 00005619d5037f80 RCX: 00007f4147b07976 [ 97.807224] RDX: 0000000000000002 RSI: 00005619d46b969c RDI: 00000000ffffff9c [ 98.040039] RBP: 0000000000000024 R08: 0000000000000000 R09: 0000000000000000 [ 98.040039] R10: 0000000000000000 R11: 0000000000000293 R12: 0000000000000024 [ 98.040039] R13: 0000000000000012 R14: 00005619d5035950 R15: 0000000000000012 [ 98.040039] Modules linked in: nct6775 hwmon_vid algif_skcipher af_alg nls_iso8859_1 nls_cp437 vfat fat uvcvideo videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 videobuf2_common arc4 videodev media snd_usb_audio snd_hda_codec_hdmi snd_usbmidi_lib snd_rawmidi snd_seq_device mousedev input_leds iwlmvm mac80211 snd_hda_codec_realtek snd_hda_codec_generic snd_hda_intel snd_hda_codec edac_mce_amd kvm_amd snd_hda_core kvm iwlwifi snd_hwdep r8169 wmi_bmof cfg80211 snd_pcm irqbypass snd_timer snd libphy soundcore pinctrl_amd rfkill pcspkr sp5100_tco evdev gpio_amdpt k10temp mac_hid i2c_piix4 wmi pcc_cpufreq acpi_cpufreq vboxnetflt(OE) vboxnetadp(OE) vboxpci(OE) vboxdrv(OE) msr sg crypto_user ip_tables x_tables ext4 crc32c_generic crc16 mbcache jbd2 fscrypto uas usb_storage dm_crypt hid_generic usbhid hid [ 98.040039] dm_mod raid1 md_mod sd_mod crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel pcbc ahci libahci aesni_intel aes_x86_64 libata crypto_simd cryptd glue_helper ccp xhci_pci rng_core scsi_mod xhci_hcd nvidia_drm(POE) drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm agpgart nvidia_uvm(POE) nvidia_modeset(POE) nvidia(POE) ipmi_devintf ipmi_msghandler [ 98.040039] CR2: 0000000000000080 [ 98.040039] ---[ end trace 3b65093b6fe62b2f ]--- [ 98.040039] RIP: 0010:drm_lease_owner+0xd/0x20 [drm] [ 98.040039] Code: 83 c4 18 5b 5d c3 b8 ea ff ff ff eb e2 b8 ed ff ff ff eb db e8 b4 ca 68 fb 0f 1f 40 00 0f 1f 44 00 00 48 89 f8 eb 03 48 89 d0 <48> 8b 90 80 00 00 00 48 85 d2 75 f1 c3 66 0f 1f 44 00 00 0f 1f 44 [ 98.040039] RSP: 0018:ffffb8cf08e07bb0 EFLAGS: 00010202 [ 98.040039] RAX: 0000000000000000 RBX: ffff9cf0f2586c00 RCX: ffff9cf0f2586c88 [ 98.040039] RDX: ffff9cf0ddbd8000 RSI: 0000000000000000 RDI: 0000000000000000 [ 98.040039] RBP: ffff9cf1040e9800 R08: 0000000000000000 R09: 0000000000000000 [ 98.040039] R10: ffffdeb30fd5d680 R11: ffffdeb30f5d6808 R12: ffff9cf1040e9888 [ 98.040039] R13: 0000000000000000 R14: dead000000000200 R15: ffff9cf0f2586cc8 [ 98.040039] FS: 00007f4145513180(0000) GS:ffff9cf10ea00000(0000) knlGS:0000000000000000 [ 98.040039] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 98.040039] CR2: 0000000000000080 CR3: 00000003d7548000 CR4: 00000000003406f0 Signed-off-by: Sergio Correia Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20181122053329.2692-1-sergio@correia.cc Signed-off-by: Sean Paul Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_auth.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c index 7ff697389d744..fe85d041d0baa 100644 --- a/drivers/gpu/drm/drm_auth.c +++ b/drivers/gpu/drm/drm_auth.c @@ -133,6 +133,7 @@ static int drm_new_set_master(struct drm_device *dev, struct drm_file *fpriv) lockdep_assert_held_once(&dev->master_mutex); + WARN_ON(fpriv->is_master); old_master = fpriv->master; fpriv->master = drm_master_create(dev); if (!fpriv->master) { @@ -161,6 +162,7 @@ out_err: /* drop references and restore old master on failure */ drm_master_put(&fpriv->master); fpriv->master = old_master; + fpriv->is_master = 0; return ret; } -- GitLab From 9576f5215818f54c299eb7880e396ad75fc777fc Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Sat, 24 Nov 2018 14:12:38 -0500 Subject: [PATCH 1989/2269] drm/meson: Enable fast_io in meson_dw_hdmi_regmap_config commit 995b278e4723b26f8ebf0e7c119286d16c712747 upstream. Seeing as we use this registermap in the context of our IRQ handlers, we need to be using spinlocks for reading/writing registers so that we can still read them from IRQ handlers without having to grab any mutexes and accidentally sleep. We don't currently do this, as pointed out by lockdep: [ 18.403770] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:908 [ 18.406744] in_atomic(): 1, irqs_disabled(): 128, pid: 68, name: kworker/u17:0 [ 18.413864] INFO: lockdep is turned off. [ 18.417675] irq event stamp: 12 [ 18.420778] hardirqs last enabled at (11): [] _raw_spin_unlock_irq+0x2c/0x60 [ 18.429510] hardirqs last disabled at (12): [] __schedule+0xc4/0xa60 [ 18.437345] softirqs last enabled at (0): [] copy_process.isra.4.part.5+0x4d8/0x1c50 [ 18.446684] softirqs last disabled at (0): [<0000000000000000>] (null) [ 18.453979] CPU: 0 PID: 68 Comm: kworker/u17:0 Tainted: G W O 4.20.0-rc3Lyude-Test+ #9 [ 18.469839] Hardware name: amlogic khadas-vim2/khadas-vim2, BIOS 2018.07-rc2-armbian 09/11/2018 [ 18.480037] Workqueue: hci0 hci_power_on [bluetooth] [ 18.487138] Call trace: [ 18.494192] dump_backtrace+0x0/0x1b8 [ 18.501280] show_stack+0x14/0x20 [ 18.508361] dump_stack+0xbc/0xf4 [ 18.515427] ___might_sleep+0x140/0x1d8 [ 18.522515] __might_sleep+0x50/0x88 [ 18.529582] __mutex_lock+0x60/0x870 [ 18.536621] mutex_lock_nested+0x1c/0x28 [ 18.543660] regmap_lock_mutex+0x10/0x18 [ 18.550696] regmap_read+0x38/0x70 [ 18.557727] dw_hdmi_hardirq+0x58/0x138 [dw_hdmi] [ 18.564804] __handle_irq_event_percpu+0xac/0x410 [ 18.571891] handle_irq_event_percpu+0x34/0x88 [ 18.578982] handle_irq_event+0x48/0x78 [ 18.586051] handle_fasteoi_irq+0xac/0x160 [ 18.593061] generic_handle_irq+0x24/0x38 [ 18.599989] __handle_domain_irq+0x60/0xb8 [ 18.606857] gic_handle_irq+0x50/0xa0 [ 18.613659] el1_irq+0xb4/0x130 [ 18.620394] debug_lockdep_rcu_enabled+0x2c/0x30 [ 18.627111] schedule+0x38/0xa0 [ 18.633781] schedule_timeout+0x3a8/0x510 [ 18.640389] wait_for_common+0x15c/0x180 [ 18.646905] wait_for_completion+0x14/0x20 [ 18.653319] mmc_wait_for_req_done+0x28/0x168 [ 18.659693] mmc_wait_for_req+0xa8/0xe8 [ 18.665978] mmc_wait_for_cmd+0x64/0x98 [ 18.672180] mmc_io_rw_direct_host+0x94/0x130 [ 18.678385] mmc_io_rw_direct+0x10/0x18 [ 18.684516] sdio_enable_func+0xe8/0x1d0 [ 18.690627] btsdio_open+0x24/0xc0 [btsdio] [ 18.696821] hci_dev_do_open+0x64/0x598 [bluetooth] [ 18.703025] hci_power_on+0x50/0x270 [bluetooth] [ 18.709163] process_one_work+0x2a0/0x6e0 [ 18.715252] worker_thread+0x40/0x448 [ 18.721310] kthread+0x12c/0x130 [ 18.727326] ret_from_fork+0x10/0x1c [ 18.735555] ------------[ cut here ]------------ [ 18.741430] do not call blocking ops when !TASK_RUNNING; state=2 set at [<000000006265ec59>] wait_for_common+0x140/0x180 [ 18.752417] WARNING: CPU: 0 PID: 68 at kernel/sched/core.c:6096 __might_sleep+0x7c/0x88 [ 18.760553] Modules linked in: dm_mirror dm_region_hash dm_log dm_mod btsdio bluetooth snd_soc_hdmi_codec dw_hdmi_i2s_audio ecdh_generic brcmfmac brcmutil cfg80211 rfkill ir_nec_decoder meson_dw_hdmi(O) dw_hdmi rc_geekbox meson_rng meson_ir ao_cec rng_core rc_core cec leds_pwm efivars nfsd ip_tables x_tables crc32_generic f2fs uas meson_gxbb_wdt pwm_meson efivarfs ipv6 [ 18.799469] CPU: 0 PID: 68 Comm: kworker/u17:0 Tainted: G W O 4.20.0-rc3Lyude-Test+ #9 [ 18.808858] Hardware name: amlogic khadas-vim2/khadas-vim2, BIOS 2018.07-rc2-armbian 09/11/2018 [ 18.818045] Workqueue: hci0 hci_power_on [bluetooth] [ 18.824088] pstate: 80000085 (Nzcv daIf -PAN -UAO) [ 18.829891] pc : __might_sleep+0x7c/0x88 [ 18.835722] lr : __might_sleep+0x7c/0x88 [ 18.841256] sp : ffff000008003cb0 [ 18.846751] x29: ffff000008003cb0 x28: 0000000000000000 [ 18.852269] x27: ffff00000938e000 x26: ffff800010283000 [ 18.857726] x25: ffff800010353280 x24: ffff00000868ef50 [ 18.863166] x23: 0000000000000000 x22: 0000000000000000 [ 18.868551] x21: 0000000000000000 x20: 000000000000038c [ 18.873850] x19: ffff000008cd08c0 x18: 0000000000000010 [ 18.879081] x17: ffff000008a68cb0 x16: 0000000000000000 [ 18.884197] x15: 0000000000aaaaaa x14: 0e200e200e200e20 [ 18.889239] x13: 0000000000000001 x12: 00000000ffffffff [ 18.894261] x11: ffff000008adfa48 x10: 0000000000000001 [ 18.899517] x9 : ffff0000092a0158 x8 : 0000000000000000 [ 18.904674] x7 : ffff00000812136c x6 : 0000000000000000 [ 18.909895] x5 : 0000000000000000 x4 : 0000000000000001 [ 18.915080] x3 : 0000000000000007 x2 : 0000000000000007 [ 18.920269] x1 : 99ab8e9ebb6c8500 x0 : 0000000000000000 [ 18.925443] Call trace: [ 18.929904] __might_sleep+0x7c/0x88 [ 18.934311] __mutex_lock+0x60/0x870 [ 18.938687] mutex_lock_nested+0x1c/0x28 [ 18.943076] regmap_lock_mutex+0x10/0x18 [ 18.947453] regmap_read+0x38/0x70 [ 18.951842] dw_hdmi_hardirq+0x58/0x138 [dw_hdmi] [ 18.956269] __handle_irq_event_percpu+0xac/0x410 [ 18.960712] handle_irq_event_percpu+0x34/0x88 [ 18.965176] handle_irq_event+0x48/0x78 [ 18.969612] handle_fasteoi_irq+0xac/0x160 [ 18.974058] generic_handle_irq+0x24/0x38 [ 18.978501] __handle_domain_irq+0x60/0xb8 [ 18.982938] gic_handle_irq+0x50/0xa0 [ 18.987351] el1_irq+0xb4/0x130 [ 18.991734] debug_lockdep_rcu_enabled+0x2c/0x30 [ 18.996180] schedule+0x38/0xa0 [ 19.000609] schedule_timeout+0x3a8/0x510 [ 19.005064] wait_for_common+0x15c/0x180 [ 19.009513] wait_for_completion+0x14/0x20 [ 19.013951] mmc_wait_for_req_done+0x28/0x168 [ 19.018402] mmc_wait_for_req+0xa8/0xe8 [ 19.022809] mmc_wait_for_cmd+0x64/0x98 [ 19.027177] mmc_io_rw_direct_host+0x94/0x130 [ 19.031563] mmc_io_rw_direct+0x10/0x18 [ 19.035922] sdio_enable_func+0xe8/0x1d0 [ 19.040294] btsdio_open+0x24/0xc0 [btsdio] [ 19.044742] hci_dev_do_open+0x64/0x598 [bluetooth] [ 19.049228] hci_power_on+0x50/0x270 [bluetooth] [ 19.053687] process_one_work+0x2a0/0x6e0 [ 19.058143] worker_thread+0x40/0x448 [ 19.062608] kthread+0x12c/0x130 [ 19.067064] ret_from_fork+0x10/0x1c [ 19.071513] irq event stamp: 12 [ 19.075937] hardirqs last enabled at (11): [] _raw_spin_unlock_irq+0x2c/0x60 [ 19.083560] hardirqs last disabled at (12): [] __schedule+0xc4/0xa60 [ 19.091401] softirqs last enabled at (0): [] copy_process.isra.4.part.5+0x4d8/0x1c50 [ 19.100801] softirqs last disabled at (0): [<0000000000000000>] (null) [ 19.108135] ---[ end trace 38c4920787b88c75 ]--- So, fix this by enabling the fast_io option in our regmap config so that regmap uses spinlocks for locking instead of mutexes. Signed-off-by: Lyude Paul Fixes: 3f68be7d8e96 ("drm/meson: Add support for HDMI encoder and DW-HDMI bridge + PHY") Cc: Daniel Vetter Cc: Neil Armstrong Cc: Carlo Caione Cc: Kevin Hilman Cc: dri-devel@lists.freedesktop.org Cc: linux-amlogic@lists.infradead.org Cc: linux-arm-kernel@lists.infradead.org Cc: # v4.12+ Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20181124191238.28276-1-lyude@redhat.com Signed-off-by: Sean Paul Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/meson/meson_dw_hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/meson/meson_dw_hdmi.c b/drivers/gpu/drm/meson/meson_dw_hdmi.c index cef414466f9fe..e5f7a7cf48fa8 100644 --- a/drivers/gpu/drm/meson/meson_dw_hdmi.c +++ b/drivers/gpu/drm/meson/meson_dw_hdmi.c @@ -697,6 +697,7 @@ static const struct regmap_config meson_dw_hdmi_regmap_config = { .reg_read = meson_dw_hdmi_reg_read, .reg_write = meson_dw_hdmi_reg_write, .max_register = 0x10000, + .fast_io = true, }; static bool meson_hdmi_connector_is_available(struct device *dev) -- GitLab From a88a1b3c215beee3f78ee9b8981518381fa073d6 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Sat, 24 Nov 2018 20:21:17 -0500 Subject: [PATCH 1990/2269] drm/meson: Fix OOB memory accesses in meson_viu_set_osd_lut() commit 97b2a3180a559a33852ac0cd77904166069484fd upstream. Currently on driver bringup with KASAN enabled, meson triggers an OOB memory access as shown below: [ 117.904528] ================================================================== [ 117.904560] BUG: KASAN: global-out-of-bounds in meson_viu_set_osd_lut+0x7a0/0x890 [ 117.904588] Read of size 4 at addr ffff20000a63ce24 by task systemd-udevd/498 [ 117.904601] [ 118.083372] CPU: 4 PID: 498 Comm: systemd-udevd Not tainted 4.20.0-rc3Lyude-Test+ #20 [ 118.091143] Hardware name: amlogic khadas-vim2/khadas-vim2, BIOS 2018.07-rc2-armbian 09/11/2018 [ 118.099768] Call trace: [ 118.102181] dump_backtrace+0x0/0x3e8 [ 118.105796] show_stack+0x14/0x20 [ 118.109083] dump_stack+0x130/0x1c4 [ 118.112539] print_address_description+0x60/0x25c [ 118.117214] kasan_report+0x1b4/0x368 [ 118.120851] __asan_report_load4_noabort+0x18/0x20 [ 118.125566] meson_viu_set_osd_lut+0x7a0/0x890 [ 118.129953] meson_viu_init+0x10c/0x290 [ 118.133741] meson_drv_bind_master+0x474/0x748 [ 118.138141] meson_drv_bind+0x10/0x18 [ 118.141760] try_to_bring_up_master+0x3d8/0x768 [ 118.146249] component_add+0x214/0x570 [ 118.149978] meson_dw_hdmi_probe+0x18/0x20 [meson_dw_hdmi] [ 118.155404] platform_drv_probe+0x98/0x138 [ 118.159455] really_probe+0x2a0/0xa70 [ 118.163070] driver_probe_device+0x1b4/0x2d8 [ 118.167299] __driver_attach+0x200/0x280 [ 118.171189] bus_for_each_dev+0x10c/0x1a8 [ 118.175144] driver_attach+0x38/0x50 [ 118.178681] bus_add_driver+0x330/0x608 [ 118.182471] driver_register+0x140/0x388 [ 118.186361] __platform_driver_register+0xc8/0x108 [ 118.191117] meson_dw_hdmi_platform_driver_init+0x1c/0x1000 [meson_dw_hdmi] [ 118.198022] do_one_initcall+0x12c/0x3bc [ 118.201883] do_init_module+0x1fc/0x638 [ 118.205673] load_module+0x4b4c/0x6808 [ 118.209387] __se_sys_init_module+0x2e8/0x3c0 [ 118.213699] __arm64_sys_init_module+0x68/0x98 [ 118.218100] el0_svc_common+0x104/0x210 [ 118.221893] el0_svc_handler+0x48/0xb8 [ 118.225594] el0_svc+0x8/0xc [ 118.228429] [ 118.229887] The buggy address belongs to the variable: [ 118.235007] eotf_33_linear_mapping+0x84/0xc0 [ 118.239301] [ 118.240752] Memory state around the buggy address: [ 118.245522] ffff20000a63cd00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 118.252695] ffff20000a63cd80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 118.259850] >ffff20000a63ce00: 00 00 00 00 04 fa fa fa fa fa fa fa 00 00 00 00 [ 118.267000] ^ [ 118.271222] ffff20000a63ce80: 00 fa fa fa fa fa fa fa 00 00 00 00 00 00 00 00 [ 118.278393] ffff20000a63cf00: 00 00 00 00 00 00 00 00 00 00 00 00 04 fa fa fa [ 118.285542] ================================================================== [ 118.292699] Disabling lock debugging due to kernel taint It seems that when looping through the OSD EOTF LUT maps, we use the same max iterator for OETF: 20. This is wrong though, since 20*2 is 40, which means that we'll stop out of bounds on the EOTF maps. But, this whole thing is already confusing enough to read through as-is, so let's just replace all of the hardcoded sizes with OSD_(OETF/EOTF)_LUT_SIZE / 2. Signed-off-by: Lyude Paul Fixes: bbbe775ec5b5 ("drm: Add support for Amlogic Meson Graphic Controller") Cc: Neil Armstrong Cc: Maxime Ripard Cc: Carlo Caione Cc: Kevin Hilman Cc: dri-devel@lists.freedesktop.org Cc: linux-amlogic@lists.infradead.org Cc: linux-arm-kernel@lists.infradead.org Cc: # v4.10+ Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20181125012117.31915-1-lyude@redhat.com Signed-off-by: Sean Paul Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/meson/meson_viu.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_viu.c b/drivers/gpu/drm/meson/meson_viu.c index 6bcfa527c1801..26a0857878bfd 100644 --- a/drivers/gpu/drm/meson/meson_viu.c +++ b/drivers/gpu/drm/meson/meson_viu.c @@ -184,18 +184,18 @@ void meson_viu_set_osd_lut(struct meson_drm *priv, enum viu_lut_sel_e lut_sel, if (lut_sel == VIU_LUT_OSD_OETF) { writel(0, priv->io_base + _REG(addr_port)); - for (i = 0; i < 20; i++) + for (i = 0; i < (OSD_OETF_LUT_SIZE / 2); i++) writel(r_map[i * 2] | (r_map[i * 2 + 1] << 16), priv->io_base + _REG(data_port)); writel(r_map[OSD_OETF_LUT_SIZE - 1] | (g_map[0] << 16), priv->io_base + _REG(data_port)); - for (i = 0; i < 20; i++) + for (i = 0; i < (OSD_OETF_LUT_SIZE / 2); i++) writel(g_map[i * 2 + 1] | (g_map[i * 2 + 2] << 16), priv->io_base + _REG(data_port)); - for (i = 0; i < 20; i++) + for (i = 0; i < (OSD_OETF_LUT_SIZE / 2); i++) writel(b_map[i * 2] | (b_map[i * 2 + 1] << 16), priv->io_base + _REG(data_port)); @@ -211,18 +211,18 @@ void meson_viu_set_osd_lut(struct meson_drm *priv, enum viu_lut_sel_e lut_sel, } else if (lut_sel == VIU_LUT_OSD_EOTF) { writel(0, priv->io_base + _REG(addr_port)); - for (i = 0; i < 20; i++) + for (i = 0; i < (OSD_EOTF_LUT_SIZE / 2); i++) writel(r_map[i * 2] | (r_map[i * 2 + 1] << 16), priv->io_base + _REG(data_port)); writel(r_map[OSD_EOTF_LUT_SIZE - 1] | (g_map[0] << 16), priv->io_base + _REG(data_port)); - for (i = 0; i < 20; i++) + for (i = 0; i < (OSD_EOTF_LUT_SIZE / 2); i++) writel(g_map[i * 2 + 1] | (g_map[i * 2 + 2] << 16), priv->io_base + _REG(data_port)); - for (i = 0; i < 20; i++) + for (i = 0; i < (OSD_EOTF_LUT_SIZE / 2); i++) writel(b_map[i * 2] | (b_map[i * 2 + 1] << 16), priv->io_base + _REG(data_port)); -- GitLab From 82c5a8c0debac552750a00b4fc7551c89c7b34b8 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 30 Nov 2018 14:09:25 -0800 Subject: [PATCH 1991/2269] userfaultfd: use ENOENT instead of EFAULT if the atomic copy user fails commit 9e368259ad988356c4c95150fafd1a06af095d98 upstream. Patch series "userfaultfd shmem updates". Jann found two bugs in the userfaultfd shmem MAP_SHARED backend: the lack of the VM_MAYWRITE check and the lack of i_size checks. Then looking into the above we also fixed the MAP_PRIVATE case. Hugh by source review also found a data loss source if UFFDIO_COPY is used on shmem MAP_SHARED PROT_READ mappings (the production usages incidentally run with PROT_READ|PROT_WRITE, so the data loss couldn't happen in those production usages like with QEMU). The whole patchset is marked for stable. We verified QEMU postcopy live migration with guest running on shmem MAP_PRIVATE run as well as before after the fix of shmem MAP_PRIVATE. Regardless if it's shmem or hugetlbfs or MAP_PRIVATE or MAP_SHARED, QEMU unconditionally invokes a punch hole if the guest mapping is filebacked and a MADV_DONTNEED too (needed to get rid of the MAP_PRIVATE COWs and for the anon backend). This patch (of 5): We internally used EFAULT to communicate with the caller, switch to ENOENT, so EFAULT can be used as a non internal retval. Link: http://lkml.kernel.org/r/20181126173452.26955-2-aarcange@redhat.com Fixes: 4c27fe4c4c84 ("userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support") Signed-off-by: Andrea Arcangeli Reviewed-by: Mike Rapoport Reviewed-by: Hugh Dickins Cc: Mike Kravetz Cc: Jann Horn Cc: Peter Xu Cc: "Dr. David Alan Gilbert" Cc: Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 2 +- mm/shmem.c | 2 +- mm/userfaultfd.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f46040aed2da1..224cdd953a792 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4037,7 +4037,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, /* fallback to copy_from_user outside mmap_sem */ if (unlikely(ret)) { - ret = -EFAULT; + ret = -ENOENT; *pagep = page; /* don't free the page */ goto out; diff --git a/mm/shmem.c b/mm/shmem.c index ab7ff0aeae2d3..9f856ecda73b9 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2266,7 +2266,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, *pagep = page; shmem_inode_unacct_blocks(inode, 1); /* don't free the page */ - return -EFAULT; + return -ENOENT; } } else { /* mfill_zeropage_atomic */ clear_highpage(page); diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 81192701964d3..c63c0fc5ecfad 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -49,7 +49,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm, /* fallback to copy_from_user outside mmap_sem */ if (unlikely(ret)) { - ret = -EFAULT; + ret = -ENOENT; *pagep = page; /* don't free the page */ goto out; @@ -275,7 +275,7 @@ retry: cond_resched(); - if (unlikely(err == -EFAULT)) { + if (unlikely(err == -ENOENT)) { up_read(&dst_mm->mmap_sem); BUG_ON(!page); @@ -521,7 +521,7 @@ retry: src_addr, &page, zeropage); cond_resched(); - if (unlikely(err == -EFAULT)) { + if (unlikely(err == -ENOENT)) { void *page_kaddr; up_read(&dst_mm->mmap_sem); -- GitLab From 683b47330c498a7bb90b1e863f63c9b5035dbba9 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 30 Nov 2018 14:09:28 -0800 Subject: [PATCH 1992/2269] userfaultfd: shmem: allocate anonymous memory for MAP_PRIVATE shmem commit 5b51072e97d587186c2f5390c8c9c1fb7e179505 upstream. Userfaultfd did not create private memory when UFFDIO_COPY was invoked on a MAP_PRIVATE shmem mapping. Instead it wrote to the shmem file, even when that had not been opened for writing. Though, fortunately, that could only happen where there was a hole in the file. Fix the shmem-backed implementation of UFFDIO_COPY to create private memory for MAP_PRIVATE mappings. The hugetlbfs-backed implementation was already correct. This change is visible to userland, if userfaultfd has been used in unintended ways: so it introduces a small risk of incompatibility, but is necessary in order to respect file permissions. An app that uses UFFDIO_COPY for anything like postcopy live migration won't notice the difference, and in fact it'll run faster because there will be no copy-on-write and memory waste in the tmpfs pagecache anymore. Userfaults on MAP_PRIVATE shmem keep triggering only on file holes like before. The real zeropage can also be built on a MAP_PRIVATE shmem mapping through UFFDIO_ZEROPAGE and that's safe because the zeropage pte is never dirty, in turn even an mprotect upgrading the vma permission from PROT_READ to PROT_READ|PROT_WRITE won't make the zeropage pte writable. Link: http://lkml.kernel.org/r/20181126173452.26955-3-aarcange@redhat.com Fixes: 4c27fe4c4c84 ("userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support") Signed-off-by: Andrea Arcangeli Reported-by: Mike Rapoport Reviewed-by: Hugh Dickins Cc: Cc: "Dr. David Alan Gilbert" Cc: Jann Horn Cc: Mike Kravetz Cc: Peter Xu Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/userfaultfd.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index c63c0fc5ecfad..cb82e50becf7a 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -381,7 +381,17 @@ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm, { ssize_t err; - if (vma_is_anonymous(dst_vma)) { + /* + * The normal page fault path for a shmem will invoke the + * fault, fill the hole in the file and COW it right away. The + * result generates plain anonymous memory. So when we are + * asked to fill an hole in a MAP_PRIVATE shmem mapping, we'll + * generate anonymous memory directly without actually filling + * the hole. For the MAP_PRIVATE case the robustness check + * only happens in the pagetable (to verify it's still none) + * and not in the radix tree. + */ + if (!(dst_vma->vm_flags & VM_SHARED)) { if (!zeropage) err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr, src_addr, page); @@ -480,7 +490,8 @@ retry: * dst_vma. */ err = -ENOMEM; - if (vma_is_anonymous(dst_vma) && unlikely(anon_vma_prepare(dst_vma))) + if (!(dst_vma->vm_flags & VM_SHARED) && + unlikely(anon_vma_prepare(dst_vma))) goto out_unlock; while (src_addr < src_start + len) { -- GitLab From af3edb30cddfcc1eace505af10d08826a9522dbe Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 30 Nov 2018 14:09:37 -0800 Subject: [PATCH 1993/2269] userfaultfd: shmem: add i_size checks commit e2a50c1f64145a04959df2442305d57307e5395a upstream. With MAP_SHARED: recheck the i_size after taking the PT lock, to serialize against truncate with the PT lock. Delete the page from the pagecache if the i_size_read check fails. With MAP_PRIVATE: check the i_size after the PT lock before mapping anonymous memory or zeropages into the MAP_PRIVATE shmem mapping. A mostly irrelevant cleanup: like we do the delete_from_page_cache() pagecache removal after dropping the PT lock, the PT lock is a spinlock so drop it before the sleepable page lock. Link: http://lkml.kernel.org/r/20181126173452.26955-5-aarcange@redhat.com Fixes: 4c27fe4c4c84 ("userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support") Signed-off-by: Andrea Arcangeli Reviewed-by: Mike Rapoport Reviewed-by: Hugh Dickins Reported-by: Jann Horn Cc: Cc: "Dr. David Alan Gilbert" Cc: Mike Kravetz Cc: Peter Xu Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/shmem.c | 18 ++++++++++++++++-- mm/userfaultfd.c | 26 ++++++++++++++++++++++++-- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 9f856ecda73b9..581e7baf04782 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2244,6 +2244,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, struct page *page; pte_t _dst_pte, *dst_pte; int ret; + pgoff_t offset, max_off; ret = -ENOMEM; if (!shmem_inode_acct_block(inode, 1)) @@ -2281,6 +2282,12 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, __SetPageSwapBacked(page); __SetPageUptodate(page); + ret = -EFAULT; + offset = linear_page_index(dst_vma, dst_addr); + max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + if (unlikely(offset >= max_off)) + goto out_release; + ret = mem_cgroup_try_charge(page, dst_mm, gfp, &memcg, false); if (ret) goto out_release; @@ -2299,8 +2306,14 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, if (dst_vma->vm_flags & VM_WRITE) _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte)); - ret = -EEXIST; dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); + + ret = -EFAULT; + max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + if (unlikely(offset >= max_off)) + goto out_release_uncharge_unlock; + + ret = -EEXIST; if (!pte_none(*dst_pte)) goto out_release_uncharge_unlock; @@ -2318,13 +2331,14 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, /* No need to invalidate - it was non-present before */ update_mmu_cache(dst_vma, dst_addr, dst_pte); - unlock_page(page); pte_unmap_unlock(dst_pte, ptl); + unlock_page(page); ret = 0; out: return ret; out_release_uncharge_unlock: pte_unmap_unlock(dst_pte, ptl); + delete_from_page_cache(page); out_release_uncharge: mem_cgroup_cancel_charge(page, memcg, false); out_release: diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index cb82e50becf7a..8da1cd8cea890 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -34,6 +34,8 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm, void *page_kaddr; int ret; struct page *page; + pgoff_t offset, max_off; + struct inode *inode; if (!*pagep) { ret = -ENOMEM; @@ -74,8 +76,17 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm, if (dst_vma->vm_flags & VM_WRITE) _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte)); - ret = -EEXIST; dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); + if (dst_vma->vm_file) { + /* the shmem MAP_PRIVATE case requires checking the i_size */ + inode = dst_vma->vm_file->f_inode; + offset = linear_page_index(dst_vma, dst_addr); + max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + ret = -EFAULT; + if (unlikely(offset >= max_off)) + goto out_release_uncharge_unlock; + } + ret = -EEXIST; if (!pte_none(*dst_pte)) goto out_release_uncharge_unlock; @@ -109,11 +120,22 @@ static int mfill_zeropage_pte(struct mm_struct *dst_mm, pte_t _dst_pte, *dst_pte; spinlock_t *ptl; int ret; + pgoff_t offset, max_off; + struct inode *inode; _dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr), dst_vma->vm_page_prot)); - ret = -EEXIST; dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); + if (dst_vma->vm_file) { + /* the shmem MAP_PRIVATE case requires checking the i_size */ + inode = dst_vma->vm_file->f_inode; + offset = linear_page_index(dst_vma, dst_addr); + max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + ret = -EFAULT; + if (unlikely(offset >= max_off)) + goto out_unlock; + } + ret = -EEXIST; if (!pte_none(*dst_pte)) goto out_unlock; set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); -- GitLab From 46466e23bc1bc262c28a020df982233c309ed958 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 30 Nov 2018 14:09:43 -0800 Subject: [PATCH 1994/2269] userfaultfd: shmem: UFFDIO_COPY: set the page dirty if VM_WRITE is not set commit dcf7fe9d89763a28e0f43975b422ff141fe79e43 upstream. Set the page dirty if VM_WRITE is not set because in such case the pte won't be marked dirty and the page would be reclaimed without writepage (i.e. swapout in the shmem case). This was found by source review. Most apps (certainly including QEMU) only use UFFDIO_COPY on PROT_READ|PROT_WRITE mappings or the app can't modify the memory in the first place. This is for correctness and it could help the non cooperative use case to avoid unexpected data loss. Link: http://lkml.kernel.org/r/20181126173452.26955-6-aarcange@redhat.com Reviewed-by: Hugh Dickins Cc: stable@vger.kernel.org Fixes: 4c27fe4c4c84 ("userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support") Reported-by: Hugh Dickins Signed-off-by: Andrea Arcangeli Cc: "Dr. David Alan Gilbert" Cc: Jann Horn Cc: Mike Kravetz Cc: Mike Rapoport Cc: Peter Xu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/shmem.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mm/shmem.c b/mm/shmem.c index 581e7baf04782..6c10f1d922517 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2305,6 +2305,16 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, _dst_pte = mk_pte(page, dst_vma->vm_page_prot); if (dst_vma->vm_flags & VM_WRITE) _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte)); + else { + /* + * We don't set the pte dirty if the vma has no + * VM_WRITE permission, so mark the page dirty or it + * could be freed from under us. We could do it + * unconditionally before unlock_page(), but doing it + * only if VM_WRITE is not set is faster. + */ + set_page_dirty(page); + } dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); @@ -2338,6 +2348,7 @@ out: return ret; out_release_uncharge_unlock: pte_unmap_unlock(dst_pte, ptl); + ClearPageDirty(page); delete_from_page_cache(page); out_release_uncharge: mem_cgroup_cancel_charge(page, memcg, false); -- GitLab From 6ea54af90988bf869e57265723c33af623b4abec Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Mon, 27 Nov 2017 23:47:35 +0100 Subject: [PATCH 1995/2269] scsi: scsi_devinfo: cleanly zero-pad devinfo strings commit 81df022b688d43d2a3667518b2f755d384397910 upstream. Cleanly fill memory for "vendor" and "model" with 0-bytes for the "compatible" case rather than adding only a single 0 byte. This simplifies the devinfo code a a bit, and avoids mistakes in other places of the code (not in current upstream, but we had one such mistake in the SUSE kernel). [mkp: applied by hand and added braces] Signed-off-by: Martin Wilck Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_devinfo.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index ea947a7c25967..6b594bc7d94a0 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -34,7 +34,6 @@ struct scsi_dev_info_list_table { }; -static const char spaces[] = " "; /* 16 of them */ static unsigned scsi_default_dev_flags; static LIST_HEAD(scsi_dev_info_list); static char scsi_dev_flags[256]; @@ -296,20 +295,13 @@ static void scsi_strcpy_devinfo(char *name, char *to, size_t to_length, size_t from_length; from_length = strlen(from); - strncpy(to, from, min(to_length, from_length)); - if (from_length < to_length) { - if (compatible) { - /* - * NUL terminate the string if it is short. - */ - to[from_length] = '\0'; - } else { - /* - * space pad the string if it is short. - */ - strncpy(&to[from_length], spaces, - to_length - from_length); - } + /* this zero-pads the destination */ + strncpy(to, from, to_length); + if (from_length < to_length && !compatible) { + /* + * space pad the string if it is short. + */ + memset(&to[from_length], ' ', to_length - from_length); } if (from_length > to_length) printk(KERN_WARNING "%s: %s string '%s' is too long\n", -- GitLab From 705a2810a314ccf06fe6190ea555e586975f9b28 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 30 Nov 2018 14:09:32 -0800 Subject: [PATCH 1996/2269] userfaultfd: shmem/hugetlbfs: only allow to register VM_MAYWRITE vmas commit 29ec90660d68bbdd69507c1c8b4e33aa299278b1 upstream. After the VMA to register the uffd onto is found, check that it has VM_MAYWRITE set before allowing registration. This way we inherit all common code checks before allowing to fill file holes in shmem and hugetlbfs with UFFDIO_COPY. The userfaultfd memory model is not applicable for readonly files unless it's a MAP_PRIVATE. Link: http://lkml.kernel.org/r/20181126173452.26955-4-aarcange@redhat.com Fixes: ff62a3421044 ("hugetlb: implement memfd sealing") Signed-off-by: Andrea Arcangeli Reviewed-by: Mike Rapoport Reviewed-by: Hugh Dickins Reported-by: Jann Horn Fixes: 4c27fe4c4c84 ("userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support") Cc: Cc: "Dr. David Alan Gilbert" Cc: Mike Kravetz Cc: Peter Xu Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/userfaultfd.c | 15 +++++++++++++++ mm/userfaultfd.c | 15 ++++++--------- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 3eda623e4cb4d..f92e1f2fc8460 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1362,6 +1362,19 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, ret = -EINVAL; if (!vma_can_userfault(cur)) goto out_unlock; + + /* + * UFFDIO_COPY will fill file holes even without + * PROT_WRITE. This check enforces that if this is a + * MAP_SHARED, the process has write permission to the backing + * file. If VM_MAYWRITE is set it also enforces that on a + * MAP_SHARED vma: there is no F_WRITE_SEAL and no further + * F_WRITE_SEAL can be taken until the vma is destroyed. + */ + ret = -EPERM; + if (unlikely(!(cur->vm_flags & VM_MAYWRITE))) + goto out_unlock; + /* * If this vma contains ending address, and huge pages * check alignment. @@ -1407,6 +1420,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, BUG_ON(!vma_can_userfault(vma)); BUG_ON(vma->vm_userfaultfd_ctx.ctx && vma->vm_userfaultfd_ctx.ctx != ctx); + WARN_ON(!(vma->vm_flags & VM_MAYWRITE)); /* * Nothing to do: this vma is already registered into this @@ -1553,6 +1567,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, cond_resched(); BUG_ON(!vma_can_userfault(vma)); + WARN_ON(!(vma->vm_flags & VM_MAYWRITE)); /* * Nothing to do: this vma is already registered into this diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 8da1cd8cea890..5d70fdbd8bc01 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -228,8 +228,9 @@ retry: if (!dst_vma || !is_vm_hugetlb_page(dst_vma)) goto out_unlock; /* - * Only allow __mcopy_atomic_hugetlb on userfaultfd - * registered ranges. + * Check the vma is registered in uffd, this is + * required to enforce the VM_MAYWRITE check done at + * uffd registration time. */ if (!dst_vma->vm_userfaultfd_ctx.ctx) goto out_unlock; @@ -472,13 +473,9 @@ retry: if (!dst_vma) goto out_unlock; /* - * Be strict and only allow __mcopy_atomic on userfaultfd - * registered ranges to prevent userland errors going - * unnoticed. As far as the VM consistency is concerned, it - * would be perfectly safe to remove this check, but there's - * no useful usage for __mcopy_atomic ouside of userfaultfd - * registered ranges. This is after all why these are ioctls - * belonging to the userfaultfd and not syscalls. + * Check the vma is registered in uffd, this is required to + * enforce the VM_MAYWRITE check done at uffd registration + * time. */ if (!dst_vma->vm_userfaultfd_ctx.ctx) goto out_unlock; -- GitLab From 6bf8c9fc962e5586397c9a3bbed9b806b1cdab55 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 26 Jul 2018 14:58:03 +0200 Subject: [PATCH 1997/2269] ALSA: trident: Suppress gcc string warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d6b340d7cb33c816ef4abe8143764ec5ab14a5cc upstream. The meddlesome gcc warns about the possible shortname string in trident driver code: sound/pci/trident/trident.c: In function ‘snd_trident_probe’: sound/pci/trident/trident.c:126:2: warning: ‘strcat’ accessing 17 or more bytes at offsets 36 and 20 may overlap 1 byte at offset 36 [-Wrestrict] strcat(card->shortname, card->driver); It happens since gcc calculates the possible string size from card->driver, but this can't be true since we did set the string just before that, and they are much shorter. For shutting it up, use the exactly same string set to card->driver for strcat() to card->shortname, too. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/trident/trident.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/trident/trident.c b/sound/pci/trident/trident.c index cedf13b64803a..2f18b1cdc2cd4 100644 --- a/sound/pci/trident/trident.c +++ b/sound/pci/trident/trident.c @@ -123,7 +123,7 @@ static int snd_trident_probe(struct pci_dev *pci, } else { strcpy(card->shortname, "Trident "); } - strcat(card->shortname, card->driver); + strcat(card->shortname, str); sprintf(card->longname, "%s PCI Audio at 0x%lx, irq %d", card->shortname, trident->port, trident->irq); -- GitLab From a293ef5e92b523f899d5d58bd9bfc26f2ea1ee3e Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Mon, 10 Sep 2018 16:20:14 -0700 Subject: [PATCH 1998/2269] kgdboc: Fix restrict error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2dd453168643d9475028cd867c57e65956a0f7f9 upstream. There's an error when compiled with restrict: drivers/tty/serial/kgdboc.c: In function ‘configure_kgdboc’: drivers/tty/serial/kgdboc.c:137:2: error: ‘strcpy’ source argument is the same as destination [-Werror=restrict] strcpy(config, opt); ^~~~~~~~~~~~~~~~~~~ As the error implies, this is from trying to use config as both source and destination. Drop the call to the function where config is the argument since nothing else happens in the function. Signed-off-by: Laura Abbott Reviewed-by: Daniel Thompson Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/kgdboc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c index 2db68dfe497df..da020eea7bb57 100644 --- a/drivers/tty/serial/kgdboc.c +++ b/drivers/tty/serial/kgdboc.c @@ -162,15 +162,13 @@ static int configure_kgdboc(void) { struct tty_driver *p; int tty_line = 0; - int err; + int err = -ENODEV; char *cptr = config; struct console *cons; - err = kgdboc_option_setup(config); - if (err || !strlen(config) || isspace(config[0])) + if (!strlen(config) || isspace(config[0])) goto noconfig; - err = -ENODEV; kgdboc_io_ops.is_console = 0; kgdb_tty_driver = NULL; -- GitLab From e8693a9829d1135ff6da551023de0c707394733c Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Wed, 19 Sep 2018 18:59:01 -0700 Subject: [PATCH 1999/2269] kgdboc: Fix warning with module build commit 1cd25cbb2fedbc777f3a8c3cb1ba69b645aeaa64 upstream. After 2dd453168643 ("kgdboc: Fix restrict error"), kgdboc_option_setup is now only used when built in, resulting in a warning when compiled as a module: drivers/tty/serial/kgdboc.c:134:12: warning: 'kgdboc_option_setup' defined but not used [-Wunused-function] static int kgdboc_option_setup(char *opt) ^~~~~~~~~~~~~~~~~~~ Move the function under the appropriate ifdef for builtin only. Fixes: 2dd453168643 ("kgdboc: Fix restrict error") Reported-by: Stephen Rothwell Signed-off-by: Laura Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/kgdboc.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c index da020eea7bb57..c448225ef5ca2 100644 --- a/drivers/tty/serial/kgdboc.c +++ b/drivers/tty/serial/kgdboc.c @@ -131,24 +131,6 @@ static void kgdboc_unregister_kbd(void) #define kgdboc_restore_input() #endif /* ! CONFIG_KDB_KEYBOARD */ -static int kgdboc_option_setup(char *opt) -{ - if (!opt) { - pr_err("kgdboc: config string not provided\n"); - return -EINVAL; - } - - if (strlen(opt) >= MAX_CONFIG_LEN) { - printk(KERN_ERR "kgdboc: config string too long\n"); - return -ENOSPC; - } - strcpy(config, opt); - - return 0; -} - -__setup("kgdboc=", kgdboc_option_setup); - static void cleanup_kgdboc(void) { if (kgdb_unregister_nmi_console()) @@ -316,6 +298,25 @@ static struct kgdb_io kgdboc_io_ops = { }; #ifdef CONFIG_KGDB_SERIAL_CONSOLE +static int kgdboc_option_setup(char *opt) +{ + if (!opt) { + pr_err("config string not provided\n"); + return -EINVAL; + } + + if (strlen(opt) >= MAX_CONFIG_LEN) { + pr_err("config string too long\n"); + return -ENOSPC; + } + strcpy(config, opt); + + return 0; +} + +__setup("kgdboc=", kgdboc_option_setup); + + /* This is only available if kgdboc is a built in for early debugging */ static int __init kgdboc_early_init(char *opt) { -- GitLab From b002e9dc214f0f917ce1b4df753c1da0b8a9ac84 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Mon, 12 Nov 2018 12:23:14 +0000 Subject: [PATCH 2000/2269] svm: Add mutex_lock to protect apic_access_page_done on AMD systems commit 30510387a5e45bfcf8190e03ec7aa15b295828e2 upstream. There is a race condition when accessing kvm->arch.apic_access_page_done. Due to it, x86_set_memory_region will fail when creating the second vcpu for a svm guest. Add a mutex_lock to serialize the accesses to apic_access_page_done. This lock is also used by vmx for the same purpose. Signed-off-by: Wei Wang Signed-off-by: Amadeusz Juskowiak Signed-off-by: Julian Stecklina Signed-off-by: Suravee Suthikulpanit Reviewed-by: Joerg Roedel Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 17f08db345479..4dc79d1398100 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1399,20 +1399,23 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, static int avic_init_access_page(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; - int ret; + int ret = 0; + mutex_lock(&kvm->slots_lock); if (kvm->arch.apic_access_page_done) - return 0; + goto out; - ret = x86_set_memory_region(kvm, - APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, - APIC_DEFAULT_PHYS_BASE, - PAGE_SIZE); + ret = __x86_set_memory_region(kvm, + APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, + APIC_DEFAULT_PHYS_BASE, + PAGE_SIZE); if (ret) - return ret; + goto out; kvm->arch.apic_access_page_done = true; - return 0; +out: + mutex_unlock(&kvm->slots_lock); + return ret; } static int avic_init_backing_page(struct kvm_vcpu *vcpu) -- GitLab From 84ee7f89237f096a6fd2906bb7d909d3ba8a0432 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 27 Aug 2018 10:21:47 +0200 Subject: [PATCH 2001/2269] drm/msm: fix OF child-node lookup commit f9a7082327e26f54067a49cac2316d31e0cc8ba7 upstream. Use the new of_get_compatible_child() helper to lookup the legacy pwrlevels child node instead of using of_find_compatible_node(), which searches the entire tree from a given start node and thus can return an unrelated (i.e. non-child) node. This also addresses a potential use-after-free (e.g. after probe deferral) as the tree-wide helper drops a reference to its first argument (i.e. the probed device's node). While at it, also fix the related child-node reference leak. Fixes: e2af8b6b0ca1 ("drm/msm: gpu: Use OPP tables if we can") Cc: stable # 4.12 Cc: Jordan Crouse Cc: Rob Clark Cc: David Airlie Signed-off-by: Johan Hovold Signed-off-by: Rob Herring [ johan: backport to 4.14 ] Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/msm/adreno/adreno_device.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index c75c4df4bc395..2c379774d3f2f 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -223,8 +223,7 @@ static int adreno_get_legacy_pwrlevels(struct device *dev) struct device_node *child, *node; int ret; - node = of_find_compatible_node(dev->of_node, NULL, - "qcom,gpu-pwrlevels"); + node = of_get_compatible_child(dev->of_node, "qcom,gpu-pwrlevels"); if (!node) { dev_err(dev, "Could not find the GPU powerlevels\n"); return -ENXIO; @@ -245,6 +244,8 @@ static int adreno_get_legacy_pwrlevels(struct device *dev) dev_pm_opp_add(dev, val, 0); } + of_node_put(node); + return 0; } -- GitLab From cf980cebed0313ef417ffa845fd92b7f00184e5e Mon Sep 17 00:00:00 2001 From: Cameron Gutman Date: Thu, 29 Nov 2018 10:09:33 -0800 Subject: [PATCH 2002/2269] Input: xpad - quirk all PDP Xbox One gamepads commit a6754fae1e66e9a40fed406290d7ca3f2b4d227c upstream. Since we continue to find tons of new variants [0,1,2,3,4,5,6] that need the PDP quirk, let's just quirk all devices from PDP. [0]: https://github.com/paroj/xpad/pull/104 [1]: https://github.com/paroj/xpad/pull/105 [2]: https://github.com/paroj/xpad/pull/108 [3]: https://github.com/paroj/xpad/pull/109 [4]: https://github.com/paroj/xpad/pull/112 [5]: https://github.com/paroj/xpad/pull/115 [6]: https://github.com/paroj/xpad/pull/116 Fixes: e5c9c6a885fa ("Input: xpad - add support for PDP Xbox One controllers") Cc: stable@vger.kernel.org Signed-off-by: Cameron Gutman Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/joystick/xpad.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 2e52015634f98..f55dcdf99bc59 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -483,18 +483,18 @@ static const u8 xboxone_hori_init[] = { }; /* - * This packet is required for some of the PDP pads to start + * This packet is required for most (all?) of the PDP pads to start * sending input reports. These pads include: (0x0e6f:0x02ab), - * (0x0e6f:0x02a4). + * (0x0e6f:0x02a4), (0x0e6f:0x02a6). */ static const u8 xboxone_pdp_init1[] = { 0x0a, 0x20, 0x00, 0x03, 0x00, 0x01, 0x14 }; /* - * This packet is required for some of the PDP pads to start + * This packet is required for most (all?) of the PDP pads to start * sending input reports. These pads include: (0x0e6f:0x02ab), - * (0x0e6f:0x02a4). + * (0x0e6f:0x02a4), (0x0e6f:0x02a6). */ static const u8 xboxone_pdp_init2[] = { 0x06, 0x20, 0x00, 0x02, 0x01, 0x00 @@ -530,12 +530,8 @@ static const struct xboxone_init_packet xboxone_init_packets[] = { XBOXONE_INIT_PKT(0x0e6f, 0x0165, xboxone_hori_init), XBOXONE_INIT_PKT(0x0f0d, 0x0067, xboxone_hori_init), XBOXONE_INIT_PKT(0x0000, 0x0000, xboxone_fw2015_init), - XBOXONE_INIT_PKT(0x0e6f, 0x02ab, xboxone_pdp_init1), - XBOXONE_INIT_PKT(0x0e6f, 0x02ab, xboxone_pdp_init2), - XBOXONE_INIT_PKT(0x0e6f, 0x02a4, xboxone_pdp_init1), - XBOXONE_INIT_PKT(0x0e6f, 0x02a4, xboxone_pdp_init2), - XBOXONE_INIT_PKT(0x0e6f, 0x02a6, xboxone_pdp_init1), - XBOXONE_INIT_PKT(0x0e6f, 0x02a6, xboxone_pdp_init2), + XBOXONE_INIT_PKT(0x0e6f, 0x0000, xboxone_pdp_init1), + XBOXONE_INIT_PKT(0x0e6f, 0x0000, xboxone_pdp_init2), XBOXONE_INIT_PKT(0x24c6, 0x541a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x542a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x543a, xboxone_rumblebegin_init), -- GitLab From 6dc574d0cac6e71c404ea031a9b59b9e817e05b2 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Sat, 24 Nov 2018 23:28:10 -0800 Subject: [PATCH 2003/2269] Input: synaptics - add PNP ID for ThinkPad P50 to SMBus commit 9df39bedbf292680655c6a947c77d6562c693d4a upstream. Noticed the other day the trackpoint felt different on my P50, then realized it was because rmi4 wasn't loading for this machine automatically. Suspend/resume, hibernate, and everything else seem to work perfectly fine on here. Signed-off-by: Lyude Paul Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 6f36e2d01e2e9..65c9095eb5177 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -170,6 +170,7 @@ static const char * const smbus_pnp_ids[] = { "LEN0048", /* X1 Carbon 3 */ "LEN0046", /* X250 */ "LEN004a", /* W541 */ + "LEN005b", /* P50 */ "LEN0071", /* T480 */ "LEN0072", /* X1 Carbon Gen 5 (2017) - Elan/ALPS trackpoint */ "LEN0073", /* X1 Carbon G5 (Elantech) */ -- GitLab From 58a3a4e82dea339ef011ff2510c11e21bc53e418 Mon Sep 17 00:00:00 2001 From: Christian Hoff Date: Mon, 12 Nov 2018 11:11:29 -0800 Subject: [PATCH 2004/2269] Input: matrix_keypad - check for errors from of_get_named_gpio() commit d55bda1b3e7c5a87f10da54fdda866a9a9cef30b upstream. "of_get_named_gpio()" returns a negative error value if it fails and drivers should check for this. This missing check was now added to the matrix_keypad driver. In my case "of_get_named_gpio()" returned -EPROBE_DEFER because the referenced GPIOs belong to an I/O expander, which was not yet probed at the point in time when the matrix_keypad driver was loading. Because the driver did not check for errors from the "of_get_named_gpio()" routine, it was assuming that "-EPROBE_DEFER" is actually a GPIO number and continued as usual, which led to further errors like this later on: WARNING: CPU: 3 PID: 167 at drivers/gpio/gpiolib.c:114 gpio_to_desc+0xc8/0xd0 invalid GPIO -517 Note that the "GPIO number" -517 in the error message above is actually "-EPROBE_DEFER". As part of the patch a misleading error message "no platform data defined" was also removed. This does not lead to information loss because the other error paths in matrix_keypad_parse_dt() already print an error. Signed-off-by: Christian Hoff Suggested-by: Sebastian Reichel Reviewed-by: Sebastian Reichel Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/keyboard/matrix_keypad.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/input/keyboard/matrix_keypad.c b/drivers/input/keyboard/matrix_keypad.c index 41614c1859182..782dda68d93ae 100644 --- a/drivers/input/keyboard/matrix_keypad.c +++ b/drivers/input/keyboard/matrix_keypad.c @@ -407,7 +407,7 @@ matrix_keypad_parse_dt(struct device *dev) struct matrix_keypad_platform_data *pdata; struct device_node *np = dev->of_node; unsigned int *gpios; - int i, nrow, ncol; + int ret, i, nrow, ncol; if (!np) { dev_err(dev, "device lacks DT data\n"); @@ -452,12 +452,19 @@ matrix_keypad_parse_dt(struct device *dev) return ERR_PTR(-ENOMEM); } - for (i = 0; i < pdata->num_row_gpios; i++) - gpios[i] = of_get_named_gpio(np, "row-gpios", i); + for (i = 0; i < nrow; i++) { + ret = of_get_named_gpio(np, "row-gpios", i); + if (ret < 0) + return ERR_PTR(ret); + gpios[i] = ret; + } - for (i = 0; i < pdata->num_col_gpios; i++) - gpios[pdata->num_row_gpios + i] = - of_get_named_gpio(np, "col-gpios", i); + for (i = 0; i < ncol; i++) { + ret = of_get_named_gpio(np, "col-gpios", i); + if (ret < 0) + return ERR_PTR(ret); + gpios[nrow + i] = ret; + } pdata->row_gpios = gpios; pdata->col_gpios = &gpios[pdata->num_row_gpios]; @@ -484,10 +491,8 @@ static int matrix_keypad_probe(struct platform_device *pdev) pdata = dev_get_platdata(&pdev->dev); if (!pdata) { pdata = matrix_keypad_parse_dt(&pdev->dev); - if (IS_ERR(pdata)) { - dev_err(&pdev->dev, "no platform data defined\n"); + if (IS_ERR(pdata)) return PTR_ERR(pdata); - } } else if (!pdata->keymap_data) { dev_err(&pdev->dev, "no keymap data defined\n"); return -EINVAL; -- GitLab From 4121029c4311c4e6298fc601bcb6b7dd2167b415 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 12 Nov 2018 11:23:39 -0800 Subject: [PATCH 2005/2269] Input: cros_ec_keyb - fix button/switch capability reports commit ac5722c1643a2fb75224c79b578214956d34f989 upstream. The cros_ec_keyb_bs array lists buttons and switches together, expecting that its users will match the appropriate type and bit fields. But cros_ec_keyb_register_bs() only checks the 'bit' field, which causes misreported input capabilities in some cases. For example, tablets (e.g., Scarlet -- a.k.a. Acer Chromebook Tab 10) were reporting a SW_LID capability, because EC_MKBP_POWER_BUTTON and EC_MKBP_LID_OPEN happen to share the same bit. (This has comedic effect on a tablet, in which a power-management daemon then thinks this "lid" is closed, and so puts the system to sleep as soon as it boots!) To fix this, check both the 'ev_type' and 'bit' fields before reporting the capability. Tested with a lid (Kevin / Samsung Chromebook Plus) and without a lid (Scarlet / Acer Chromebook Tab 10). This error got introduced when porting the feature from the downstream Chromium OS kernel to be upstreamed. Fixes: cdd7950e7aa4 ("input: cros_ec_keyb: Add non-matrix buttons and switches") Cc: Signed-off-by: Brian Norris Reviewed-by: Heiko Stuebner Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/keyboard/cros_ec_keyb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/input/keyboard/cros_ec_keyb.c b/drivers/input/keyboard/cros_ec_keyb.c index 79eb29550c348..0993b3f12df6a 100644 --- a/drivers/input/keyboard/cros_ec_keyb.c +++ b/drivers/input/keyboard/cros_ec_keyb.c @@ -506,7 +506,8 @@ static int cros_ec_keyb_register_bs(struct cros_ec_keyb *ckdev) for (i = 0; i < ARRAY_SIZE(cros_ec_keyb_bs); i++) { const struct cros_ec_bs_map *map = &cros_ec_keyb_bs[i]; - if (buttons & BIT(map->bit)) + if ((map->ev_type == EV_KEY && (buttons & BIT(map->bit))) || + (map->ev_type == EV_SW && (switches & BIT(map->bit)))) input_set_capability(idev, map->ev_type, map->code); } -- GitLab From 1151c098512f148f409a9e6ec25b486ba3a312bf Mon Sep 17 00:00:00 2001 From: Patrick Gaskin Date: Mon, 12 Nov 2018 11:12:24 -0800 Subject: [PATCH 2006/2269] Input: elan_i2c - add ELAN0620 to the ACPI table commit 3ed64da3b790be7c63601e8ca6341b7dff74a660 upstream. Add ELAN0620 to the ACPI table to support the elan touchpad in the Lenovo IdeaPad 130-15IKB. Signed-off-by: Patrick Gaskin Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 766d30a7b0857..e56223c6df8f4 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1264,6 +1264,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0618", 0 }, { "ELAN061C", 0 }, { "ELAN061D", 0 }, + { "ELAN0620", 0 }, { "ELAN0622", 0 }, { "ELAN1000", 0 }, { } -- GitLab From ec2c95c56ba7689fd295acd6d43696846b38f4e1 Mon Sep 17 00:00:00 2001 From: Noah Westervelt Date: Thu, 29 Nov 2018 10:10:35 -0800 Subject: [PATCH 2007/2269] Input: elan_i2c - add ACPI ID for Lenovo IdeaPad 330-15ARR commit ad33429cd02565c28404bb16ae7a4c2bdfda6626 upstream. Add ELAN061E to the ACPI table to support Elan touchpad found in Lenovo IdeaPad 330-15ARR. Signed-off-by: Noah Westervelt Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index e56223c6df8f4..5e20be31cd734 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1264,6 +1264,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0618", 0 }, { "ELAN061C", 0 }, { "ELAN061D", 0 }, + { "ELAN061E", 0 }, { "ELAN0620", 0 }, { "ELAN0622", 0 }, { "ELAN1000", 0 }, -- GitLab From 61ffe9b932037aabc00d51bc9095f4a20ea90d56 Mon Sep 17 00:00:00 2001 From: Adam Wong Date: Thu, 29 Nov 2018 10:04:35 -0800 Subject: [PATCH 2008/2269] Input: elan_i2c - add support for ELAN0621 touchpad commit bf87ade0dd7f8cf19dac4d3161d5e86abe0c062b upstream. Added the ability to detect the ELAN0621 touchpad found in some Lenovo laptops. Signed-off-by: Adam Wong Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 5e20be31cd734..368871a398a5d 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1266,6 +1266,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN061D", 0 }, { "ELAN061E", 0 }, { "ELAN0620", 0 }, + { "ELAN0621", 0 }, { "ELAN0622", 0 }, { "ELAN1000", 0 }, { } -- GitLab From 1aadbb4a325b81989c9dd454c44b846d590cda49 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 23 Nov 2018 09:06:36 +0800 Subject: [PATCH 2009/2269] btrfs: tree-checker: Don't check max block group size as current max chunk size limit is unreliable commit 10950929e994c5ecee149ff0873388d3c98f12b5 upstream. [BUG] A completely valid btrfs will refuse to mount, with error message like: BTRFS critical (device sdb2): corrupt leaf: root=2 block=239681536 slot=172 \ bg_start=12018974720 bg_len=10888413184, invalid block group size, \ have 10888413184 expect (0, 10737418240] This has been reported several times as the 4.19 kernel is now being used. The filesystem refuses to mount, but is otherwise ok and booting 4.18 is a workaround. Btrfs check returns no error, and all kernels used on this fs is later than 2011, which should all have the 10G size limit commit. [CAUSE] For a 12 devices btrfs, we could allocate a chunk larger than 10G due to stripe stripe bump up. __btrfs_alloc_chunk() |- max_stripe_size = 1G |- max_chunk_size = 10G |- data_stripe = 11 |- if (1G * 11 > 10G) { stripe_size = 976128930; stripe_size = round_up(976128930, SZ_16M) = 989855744 However the final stripe_size (989855744) * 11 = 10888413184, which is still larger than 10G. [FIX] For the comprehensive check, we need to do the full check at chunk read time, and rely on bg <-> chunk mapping to do the check. We could just skip the length check for now. Fixes: fce466eab7ac ("btrfs: tree-checker: Verify block_group_item") Cc: stable@vger.kernel.org # v4.19+ Reported-by: Wang Yugui Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-checker.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index f206aec1525d4..ebc882281b395 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -348,13 +348,11 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info, /* * Here we don't really care about alignment since extent allocator can - * handle it. We care more about the size, as if one block group is - * larger than maximum size, it's must be some obvious corruption. + * handle it. We care more about the size. */ - if (key->offset > BTRFS_MAX_DATA_CHUNK_SIZE || key->offset == 0) { + if (key->offset == 0) { block_group_err(fs_info, leaf, slot, - "invalid block group size, have %llu expect (0, %llu]", - key->offset, BTRFS_MAX_DATA_CHUNK_SIZE); + "invalid block group size 0"); return -EUCLEAN; } -- GitLab From 335cbe4df8b0d14cd43f207932b33eb8c756beb2 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 6 Nov 2018 16:40:20 +0200 Subject: [PATCH 2010/2269] btrfs: Always try all copies when reading extent buffers commit f8397d69daef06d358430d3054662fb597e37c00 upstream. When a metadata read is served the endio routine btree_readpage_end_io_hook is called which eventually runs the tree-checker. If tree-checker fails to validate the read eb then it sets EXTENT_BUFFER_CORRUPT flag. This leads to btree_read_extent_buffer_pages wrongly assuming that all available copies of this extent buffer are wrong and failing prematurely. Fix this modify btree_read_extent_buffer_pages to read all copies of the data. This failure was exhibitted in xfstests btrfs/124 which would spuriously fail its balance operations. The reason was that when balance was run following re-introduction of the missing raid1 disk __btrfs_map_block would map the read request to stripe 0, which corresponded to devid 2 (the disk which is being removed in the test): item 2 key (FIRST_CHUNK_TREE CHUNK_ITEM 3553624064) itemoff 15975 itemsize 112 length 1073741824 owner 2 stripe_len 65536 type DATA|RAID1 io_align 65536 io_width 65536 sector_size 4096 num_stripes 2 sub_stripes 1 stripe 0 devid 2 offset 2156920832 dev_uuid 8466c350-ed0c-4c3b-b17d-6379b445d5c8 stripe 1 devid 1 offset 3553624064 dev_uuid 1265d8db-5596-477e-af03-df08eb38d2ca This caused read requests for a checksum item that to be routed to the stale disk which triggered the aforementioned logic involving EXTENT_BUFFER_CORRUPT flag. This then triggered cascading failures of the balance operation. Fixes: a826d6dcb32d ("Btrfs: check items for correctness as we search") CC: stable@vger.kernel.org # 4.4+ Suggested-by: Qu Wenruo Reviewed-by: Qu Wenruo Signed-off-by: Nikolay Borisov Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e42673477c25e..858d5812eb8f3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -451,9 +451,9 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, int mirror_num = 0; int failed_mirror = 0; - clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree; while (1) { + clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE, btree_get_extent, mirror_num); if (!ret) { @@ -464,14 +464,6 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, ret = -EIO; } - /* - * This buffer's crc is fine, but its contents are corrupted, so - * there is no reason to read the other copies, they won't be - * any less wrong. - */ - if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) - break; - num_copies = btrfs_num_copies(fs_info, eb->start, eb->len); if (num_copies == 1) -- GitLab From f4069a6cf6faf910a90f469b72e27bc29e4beccd Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Fri, 30 Nov 2018 15:51:56 +0300 Subject: [PATCH 2011/2269] ARC: change defconfig defaults to ARCv2 commit b7cc40c32a8bfa6f2581a71747f6a7d491fe43ba upstream. Change the default defconfig (used with 'make defconfig') to the ARCv2 nsim_hs_defconfig, and also switch the default Kconfig ISA selection to ARCv2. This allows several default defconfigs (e.g. make defconfig, make allnoconfig, make tinyconfig) to all work with ARCv2 by default. Note since we change default architecture from ARCompact to ARCv2 it's required to explicitly mention architecture type in ARCompact defconfigs otherwise ARCv2 will be implied and binaries will be generated for ARCv2. Cc: # 4.4.x Signed-off-by: Kevin Hilman Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/Kconfig | 2 +- arch/arc/Makefile | 2 +- arch/arc/configs/axs101_defconfig | 1 + arch/arc/configs/nps_defconfig | 1 + arch/arc/configs/nsim_700_defconfig | 1 + arch/arc/configs/nsimosci_defconfig | 1 + arch/arc/configs/tb10x_defconfig | 1 + 7 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 8ff0660906803..9d06c9478a0dd 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -109,7 +109,7 @@ endmenu choice prompt "ARC Instruction Set" - default ISA_ARCOMPACT + default ISA_ARCV2 config ISA_ARCOMPACT bool "ARCompact ISA" diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 7c6c977820225..2917f56f0ea43 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -6,7 +6,7 @@ # published by the Free Software Foundation. # -KBUILD_DEFCONFIG := nsim_700_defconfig +KBUILD_DEFCONFIG := nsim_hs_defconfig cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7 diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index ece78630d7112..77df85de11218 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -15,6 +15,7 @@ CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set +CONFIG_ISA_ARCOMPACT=y CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/arc/configs/nps_defconfig b/arch/arc/configs/nps_defconfig index 7c9c706ae7f66..ba337d3d3a17e 100644 --- a/arch/arc/configs/nps_defconfig +++ b/arch/arc/configs/nps_defconfig @@ -15,6 +15,7 @@ CONFIG_SYSCTL_SYSCALL=y CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_COMPAT_BRK is not set +CONFIG_ISA_ARCOMPACT=y CONFIG_KPROBES=y CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig index b1a78222699c6..cdb06417d3d9a 100644 --- a/arch/arc/configs/nsim_700_defconfig +++ b/arch/arc/configs/nsim_700_defconfig @@ -16,6 +16,7 @@ CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set +CONFIG_ISA_ARCOMPACT=y CONFIG_KPROBES=y CONFIG_MODULES=y # CONFIG_LBDAF is not set diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index 14377b8234f79..8cbcbc1853591 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -16,6 +16,7 @@ CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set +CONFIG_ISA_ARCOMPACT=y CONFIG_KPROBES=y CONFIG_MODULES=y # CONFIG_LBDAF is not set diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig index f301825493950..0130e29eeca17 100644 --- a/arch/arc/configs/tb10x_defconfig +++ b/arch/arc/configs/tb10x_defconfig @@ -19,6 +19,7 @@ CONFIG_KALLSYMS_ALL=y # CONFIG_AIO is not set CONFIG_EMBEDDED=y # CONFIG_COMPAT_BRK is not set +CONFIG_ISA_ARCOMPACT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y -- GitLab From 097c13e1b78d48c1a3a75dbfabc10736863056a7 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Tue, 20 Nov 2018 13:30:19 +0300 Subject: [PATCH 2012/2269] arc: [devboards] Add support of NFSv3 ACL commit 6b04114f6fae5e84d33404c2970b1949c032546e upstream. By default NFSv3 doesn't support ACL (Access Control Lists) which might be quite convenient to have so that mounted NFS behaves exactly as any other local file-system. In particular missing support of ACL makes umask useless. This among other thigs fixes Glibc's "nptl/tst-umask1". Signed-off-by: Alexey Brodkin Cc: Cupertino Miranda Cc: stable@vger.kernel.org #4.14+ Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/configs/axs101_defconfig | 1 + arch/arc/configs/axs103_defconfig | 1 + arch/arc/configs/axs103_smp_defconfig | 1 + arch/arc/configs/hsdk_defconfig | 1 + arch/arc/configs/nps_defconfig | 1 + arch/arc/configs/nsimosci_defconfig | 1 + arch/arc/configs/nsimosci_hs_defconfig | 1 + arch/arc/configs/nsimosci_hs_smp_defconfig | 1 + arch/arc/configs/vdk_hs38_defconfig | 1 + arch/arc/configs/vdk_hs38_smp_defconfig | 1 + 10 files changed, 10 insertions(+) diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index 77df85de11218..5d5ba2104ba75 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -99,6 +99,7 @@ CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y CONFIG_TMPFS=y CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index 240c9251a7d4a..0874db2d48a83 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -97,6 +97,7 @@ CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y CONFIG_TMPFS=y CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index af54b96abee04..cf5df0e1cb082 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -100,6 +100,7 @@ CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y CONFIG_TMPFS=y CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 762b1fcd93dc1..083560e9e5713 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -66,6 +66,7 @@ CONFIG_EXT3_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/arc/configs/nps_defconfig b/arch/arc/configs/nps_defconfig index ba337d3d3a17e..9121c6ba15d0d 100644 --- a/arch/arc/configs/nps_defconfig +++ b/arch/arc/configs/nps_defconfig @@ -75,6 +75,7 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_ROOT_NFS=y CONFIG_DEBUG_INFO=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index 8cbcbc1853591..c4577bd9196c6 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -70,5 +70,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig index 7e61c923a3cdd..b20692c82d3cb 100644 --- a/arch/arc/configs/nsimosci_hs_defconfig +++ b/arch/arc/configs/nsimosci_hs_defconfig @@ -68,5 +68,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig index 299fbe8003b28..5ad4949af6d0f 100644 --- a/arch/arc/configs/nsimosci_hs_smp_defconfig +++ b/arch/arc/configs/nsimosci_hs_smp_defconfig @@ -79,6 +79,7 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_FTRACE=y diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig index 4fcf4f2503f61..4587c9af5afee 100644 --- a/arch/arc/configs/vdk_hs38_defconfig +++ b/arch/arc/configs/vdk_hs38_defconfig @@ -88,6 +88,7 @@ CONFIG_NTFS_FS=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig index 7b71464f6c2f2..1855aa995bc98 100644 --- a/arch/arc/configs/vdk_hs38_smp_defconfig +++ b/arch/arc/configs/vdk_hs38_smp_defconfig @@ -92,6 +92,7 @@ CONFIG_NTFS_FS=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set -- GitLab From 99bfa7bc16c3b5c4bf06d3885e535060dd5cfeef Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 16 Nov 2018 13:43:17 +0100 Subject: [PATCH 2013/2269] udf: Allow mounting volumes with incorrect identification strings commit b54e41f5efcb4316b2f30b30c2535cc194270373 upstream. Commit c26f6c615788 ("udf: Fix conversion of 'dstring' fields to UTF8") started to be more strict when checking whether converted strings are properly formatted. Sudip reports that there are DVDs where the volume identification string is actually too long - UDF reports: [ 632.309320] UDF-fs: incorrect dstring lengths (32/32) during mount and fails the mount. This is mostly harmless failure as we don't need volume identification (and even less volume set identification) for anything. So just truncate the volume identification string if it is too long and replace it with 'Invalid' if we just cannot convert it for other reasons. This keeps slightly incorrect media still mountable. CC: stable@vger.kernel.org Fixes: c26f6c615788 ("udf: Fix conversion of 'dstring' fields to UTF8") Reported-and-tested-by: Sudip Mukherjee Signed-off-by: Jan Kara Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- fs/udf/super.c | 16 ++++++++++------ fs/udf/unicode.c | 14 +++++++++++--- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/fs/udf/super.c b/fs/udf/super.c index 9b0d6562d0a10..242d960df9a17 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -922,16 +922,20 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block) } ret = udf_dstrCS0toUTF8(outstr, 31, pvoldesc->volIdent, 32); - if (ret < 0) - goto out_bh; - - strncpy(UDF_SB(sb)->s_volume_ident, outstr, ret); + if (ret < 0) { + strcpy(UDF_SB(sb)->s_volume_ident, "InvalidName"); + pr_warn("incorrect volume identification, setting to " + "'InvalidName'\n"); + } else { + strncpy(UDF_SB(sb)->s_volume_ident, outstr, ret); + } udf_debug("volIdent[] = '%s'\n", UDF_SB(sb)->s_volume_ident); ret = udf_dstrCS0toUTF8(outstr, 127, pvoldesc->volSetIdent, 128); - if (ret < 0) + if (ret < 0) { + ret = 0; goto out_bh; - + } outstr[ret] = 0; udf_debug("volSetIdent[] = '%s'\n", outstr); diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index 3a3be23689b35..61a1738895b7a 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -341,6 +341,11 @@ try_again: return u_len; } +/* + * Convert CS0 dstring to output charset. Warning: This function may truncate + * input string if it is too long as it is used for informational strings only + * and it is better to truncate the string than to refuse mounting a media. + */ int udf_dstrCS0toUTF8(uint8_t *utf_o, int o_len, const uint8_t *ocu_i, int i_len) { @@ -349,9 +354,12 @@ int udf_dstrCS0toUTF8(uint8_t *utf_o, int o_len, if (i_len > 0) { s_len = ocu_i[i_len - 1]; if (s_len >= i_len) { - pr_err("incorrect dstring lengths (%d/%d)\n", - s_len, i_len); - return -EINVAL; + pr_warn("incorrect dstring lengths (%d/%d)," + " truncating\n", s_len, i_len); + s_len = i_len - 1; + /* 2-byte encoding? Need to round properly... */ + if (ocu_i[0] == 16) + s_len -= (s_len - 1) & 2; } } -- GitLab From b92cee8097d99ec641521975b5fa6eb907ab5361 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 29 Oct 2017 01:50:06 +0900 Subject: [PATCH 2014/2269] reset: make device_reset_optional() really optional commit 1554bbd4ad401b7f0f916c0891874111c10befe5 upstream. Commit bb475230b8e5 ("reset: make optional functions really optional") converted *_get_optional* functions, but device_reset_optional() was left behind. Convert it in the same way. Signed-off-by: Masahiro Yamada Signed-off-by: Philipp Zabel Cc: Dinh Nguyen Signed-off-by: Greg Kroah-Hartman --- drivers/reset/core.c | 9 +++++---- include/linux/reset.h | 28 +++++++++++++--------------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/reset/core.c b/drivers/reset/core.c index 1d21c6f7d56cb..da4292e9de978 100644 --- a/drivers/reset/core.c +++ b/drivers/reset/core.c @@ -566,17 +566,18 @@ EXPORT_SYMBOL_GPL(__devm_reset_control_get); * device_reset - find reset controller associated with the device * and perform reset * @dev: device to be reset by the controller + * @optional: whether it is optional to reset the device * - * Convenience wrapper for reset_control_get() and reset_control_reset(). + * Convenience wrapper for __reset_control_get() and reset_control_reset(). * This is useful for the common case of devices with single, dedicated reset * lines. */ -int device_reset(struct device *dev) +int __device_reset(struct device *dev, bool optional) { struct reset_control *rstc; int ret; - rstc = reset_control_get(dev, NULL); + rstc = __reset_control_get(dev, NULL, 0, 0, optional); if (IS_ERR(rstc)) return PTR_ERR(rstc); @@ -586,7 +587,7 @@ int device_reset(struct device *dev) return ret; } -EXPORT_SYMBOL_GPL(device_reset); +EXPORT_SYMBOL_GPL(__device_reset); /** * APIs to manage an array of reset controls. diff --git a/include/linux/reset.h b/include/linux/reset.h index 4c7871ddf3c65..b681019fc04cd 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -20,22 +20,16 @@ struct reset_control *__reset_control_get(struct device *dev, const char *id, int index, bool shared, bool optional); void reset_control_put(struct reset_control *rstc); +int __device_reset(struct device *dev, bool optional); struct reset_control *__devm_reset_control_get(struct device *dev, const char *id, int index, bool shared, bool optional); -int __must_check device_reset(struct device *dev); - struct reset_control *devm_reset_control_array_get(struct device *dev, bool shared, bool optional); struct reset_control *of_reset_control_array_get(struct device_node *np, bool shared, bool optional); -static inline int device_reset_optional(struct device *dev) -{ - return device_reset(dev); -} - #else static inline int reset_control_reset(struct reset_control *rstc) @@ -62,15 +56,9 @@ static inline void reset_control_put(struct reset_control *rstc) { } -static inline int __must_check device_reset(struct device *dev) +static inline int __device_reset(struct device *dev, bool optional) { - WARN_ON(1); - return -ENOTSUPP; -} - -static inline int device_reset_optional(struct device *dev) -{ - return -ENOTSUPP; + return optional ? 0 : -ENOTSUPP; } static inline struct reset_control *__of_reset_control_get( @@ -109,6 +97,16 @@ of_reset_control_array_get(struct device_node *np, bool shared, bool optional) #endif /* CONFIG_RESET_CONTROLLER */ +static inline int __must_check device_reset(struct device *dev) +{ + return __device_reset(dev, false); +} + +static inline int device_reset_optional(struct device *dev) +{ + return __device_reset(dev, true); +} + /** * reset_control_get_exclusive - Lookup and obtain an exclusive reference * to a reset controller. -- GitLab From d2e29b46a9cf4e733e147442a20dbba376decc26 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 29 Oct 2017 01:50:07 +0900 Subject: [PATCH 2015/2269] reset: remove remaining WARN_ON() in commit bb6c7768385b200063a14d6615cc1246c3d00760 upstream. Commit bb475230b8e5 ("reset: make optional functions really optional") gave a new meaning to _get_optional variants. The differentiation by WARN_ON() is not needed any more. We already have inconsistency about this; (devm_)reset_control_get_exclusive() has WARN_ON() check, but of_reset_control_get_exclusive() does not. Signed-off-by: Masahiro Yamada Signed-off-by: Philipp Zabel Cc: Dinh Nguyen Signed-off-by: Greg Kroah-Hartman --- include/linux/reset.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/include/linux/reset.h b/include/linux/reset.h index b681019fc04cd..ed6fb02907977 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -125,9 +125,6 @@ static inline int device_reset_optional(struct device *dev) static inline struct reset_control * __must_check reset_control_get_exclusive(struct device *dev, const char *id) { -#ifndef CONFIG_RESET_CONTROLLER - WARN_ON(1); -#endif return __reset_control_get(dev, id, 0, false, false); } @@ -273,9 +270,6 @@ static inline struct reset_control * __must_check devm_reset_control_get_exclusive(struct device *dev, const char *id) { -#ifndef CONFIG_RESET_CONTROLLER - WARN_ON(1); -#endif return __devm_reset_control_get(dev, id, 0, false, false); } -- GitLab From 60720df8bf43e8ca2ce1a17936904a52129c8471 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Fri, 30 Nov 2018 14:09:00 -0800 Subject: [PATCH 2016/2269] mm: cleancache: fix corruption on missed inode invalidation commit 6ff38bd40230af35e446239396e5fc8ebd6a5248 upstream. If all pages are deleted from the mapping by memory reclaim and also moved to the cleancache: __delete_from_page_cache (no shadow case) unaccount_page_cache_page cleancache_put_page page_cache_delete mapping->nrpages -= nr (nrpages becomes 0) We don't clean the cleancache for an inode after final file truncation (removal). truncate_inode_pages_final check (nrpages || nrexceptional) is false no truncate_inode_pages no cleancache_invalidate_inode(mapping) These way when reading the new file created with same inode we may get these trash leftover pages from cleancache and see wrong data instead of the contents of the new file. Fix it by always doing truncate_inode_pages which is already ready for nrpages == 0 && nrexceptional == 0 case and just invalidates inode. [akpm@linux-foundation.org: add comment, per Jan] Link: http://lkml.kernel.org/r/20181112095734.17979-1-ptikhomirov@virtuozzo.com Fixes: commit 91b0abe36a7b ("mm + fs: store shadow entries in page cache") Signed-off-by: Pavel Tikhomirov Reviewed-by: Vasily Averin Reviewed-by: Andrey Ryabinin Reviewed-by: Jan Kara Cc: Johannes Weiner Cc: Mel Gorman Cc: Matthew Wilcox Cc: Andi Kleen Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Vasily Averin Signed-off-by: Greg Kroah-Hartman --- mm/truncate.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mm/truncate.c b/mm/truncate.c index 2330223841fbb..d3a2737cc1885 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -471,9 +471,13 @@ void truncate_inode_pages_final(struct address_space *mapping) */ spin_lock_irq(&mapping->tree_lock); spin_unlock_irq(&mapping->tree_lock); - - truncate_inode_pages(mapping, 0); } + + /* + * Cleancache needs notification even if there are no pages or shadow + * entries. + */ + truncate_inode_pages(mapping, 0); } EXPORT_SYMBOL(truncate_inode_pages_final); -- GitLab From 402519439ccb9410faf6287bf98f99817bd0c354 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Oct 2017 19:05:44 +0200 Subject: [PATCH 2017/2269] thermal/drivers/hisi: Remove the multiple sensors support commit ff4ec2997df8fe7cc40513dbe5f86d9f88fb6be7 upstream. By essence, the tsensor does not really support multiple sensor at the same time. It allows to set a sensor and use it to get the temperature, another sensor could be switched but with a delay of 3-5ms. It is difficult to read simultaneously several sensors without a big delay. Today, just one sensor is used, it is not necessary to deal with multiple sensors in the code. Remove them and if it is needed in the future add them on top of a code which will be clean up in the meantime. Signed-off-by: Daniel Lezcano Reviewed-by: Leo Yan Acked-by: Wangtao (Kevin, Kirin) Signed-off-by: Eduardo Valentin Signed-off-by: Rafael David Tinoco Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/hisi_thermal.c | 75 +++++++++------------------------- 1 file changed, 19 insertions(+), 56 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index 6d8906d654763..aecbfe9fc8f23 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -40,6 +40,7 @@ #define HISI_TEMP_STEP (784) #define HISI_MAX_SENSORS 4 +#define HISI_DEFAULT_SENSOR 2 struct hisi_thermal_sensor { struct hisi_thermal_data *thermal; @@ -54,9 +55,8 @@ struct hisi_thermal_data { struct mutex thermal_lock; /* protects register data */ struct platform_device *pdev; struct clk *clk; - struct hisi_thermal_sensor sensors[HISI_MAX_SENSORS]; - - int irq, irq_bind_sensor; + struct hisi_thermal_sensor sensors; + int irq; bool irq_enabled; void __iomem *regs; @@ -133,7 +133,7 @@ static void hisi_thermal_enable_bind_irq_sensor mutex_lock(&data->thermal_lock); - sensor = &data->sensors[data->irq_bind_sensor]; + sensor = &data->sensors; /* setting the hdak time */ writel(0x0, data->regs + TEMP0_CFG); @@ -181,31 +181,8 @@ static int hisi_thermal_get_temp(void *_sensor, int *temp) struct hisi_thermal_sensor *sensor = _sensor; struct hisi_thermal_data *data = sensor->thermal; - int sensor_id = -1, i; - long max_temp = 0; - *temp = hisi_thermal_get_sensor_temp(data, sensor); - sensor->sensor_temp = *temp; - - for (i = 0; i < HISI_MAX_SENSORS; i++) { - if (!data->sensors[i].tzd) - continue; - - if (data->sensors[i].sensor_temp >= max_temp) { - max_temp = data->sensors[i].sensor_temp; - sensor_id = i; - } - } - - /* If no sensor has been enabled, then skip to enable irq */ - if (sensor_id == -1) - return 0; - - mutex_lock(&data->thermal_lock); - data->irq_bind_sensor = sensor_id; - mutex_unlock(&data->thermal_lock); - dev_dbg(&data->pdev->dev, "id=%d, irq=%d, temp=%d, thres=%d\n", sensor->id, data->irq_enabled, *temp, sensor->thres_temp); /* @@ -218,7 +195,7 @@ static int hisi_thermal_get_temp(void *_sensor, int *temp) return 0; } - if (max_temp < sensor->thres_temp) { + if (*temp < sensor->thres_temp) { data->irq_enabled = true; hisi_thermal_enable_bind_irq_sensor(data); enable_irq(data->irq); @@ -245,22 +222,16 @@ static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) { struct hisi_thermal_data *data = dev; struct hisi_thermal_sensor *sensor; - int i; mutex_lock(&data->thermal_lock); - sensor = &data->sensors[data->irq_bind_sensor]; + sensor = &data->sensors; dev_crit(&data->pdev->dev, "THERMAL ALARM: T > %d\n", sensor->thres_temp); mutex_unlock(&data->thermal_lock); - for (i = 0; i < HISI_MAX_SENSORS; i++) { - if (!data->sensors[i].tzd) - continue; - - thermal_zone_device_update(data->sensors[i].tzd, - THERMAL_EVENT_UNSPECIFIED); - } + thermal_zone_device_update(data->sensors.tzd, + THERMAL_EVENT_UNSPECIFIED); return IRQ_HANDLED; } @@ -317,7 +288,6 @@ static int hisi_thermal_probe(struct platform_device *pdev) { struct hisi_thermal_data *data; struct resource *res; - int i; int ret; data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); @@ -359,14 +329,13 @@ static int hisi_thermal_probe(struct platform_device *pdev) hisi_thermal_enable_bind_irq_sensor(data); data->irq_enabled = true; - for (i = 0; i < HISI_MAX_SENSORS; ++i) { - ret = hisi_thermal_register_sensor(pdev, data, - &data->sensors[i], i); - if (ret) - dev_err(&pdev->dev, - "failed to register thermal sensor: %d\n", ret); - else - hisi_thermal_toggle_sensor(&data->sensors[i], true); + ret = hisi_thermal_register_sensor(pdev, data, + &data->sensors, + HISI_DEFAULT_SENSOR); + if (ret) { + dev_err(&pdev->dev, "failed to register thermal sensor: %d\n", + ret); + return ret; } ret = devm_request_threaded_irq(&pdev->dev, data->irq, @@ -378,6 +347,8 @@ static int hisi_thermal_probe(struct platform_device *pdev) return ret; } + hisi_thermal_toggle_sensor(&data->sensors, true); + enable_irq(data->irq); return 0; @@ -386,17 +357,9 @@ static int hisi_thermal_probe(struct platform_device *pdev) static int hisi_thermal_remove(struct platform_device *pdev) { struct hisi_thermal_data *data = platform_get_drvdata(pdev); - int i; - - for (i = 0; i < HISI_MAX_SENSORS; i++) { - struct hisi_thermal_sensor *sensor = &data->sensors[i]; - - if (!sensor->tzd) - continue; - - hisi_thermal_toggle_sensor(sensor, false); - } + struct hisi_thermal_sensor *sensor = &data->sensors; + hisi_thermal_toggle_sensor(sensor, false); hisi_thermal_disable_sensor(data); clk_disable_unprepare(data->clk); -- GitLab From e29649e49c81b1a1a78174f70c2e1ddf41f7832f Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Oct 2017 19:05:48 +0200 Subject: [PATCH 2018/2269] thermal/drivers/hisi: Remove pointless lock commit 2d4fa7b4c6f8080ced2e8237c9f46fb1fc110d64 upstream. The threaded interrupt inspect the sensors structure to look in the temp threshold field, but this field is read-only in all the code, except in the probe function before the threaded interrupt is set. In other words there is not race window in the threaded interrupt when reading the field value. Signed-off-by: Daniel Lezcano Reviewed-by: Leo Yan Signed-off-by: Eduardo Valentin Signed-off-by: Rafael David Tinoco Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/hisi_thermal.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index aecbfe9fc8f23..0c2966c285b15 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -221,14 +221,10 @@ static irqreturn_t hisi_thermal_alarm_irq(int irq, void *dev) static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) { struct hisi_thermal_data *data = dev; - struct hisi_thermal_sensor *sensor; - - mutex_lock(&data->thermal_lock); - sensor = &data->sensors; + struct hisi_thermal_sensor *sensor = &data->sensors; dev_crit(&data->pdev->dev, "THERMAL ALARM: T > %d\n", sensor->thres_temp); - mutex_unlock(&data->thermal_lock); thermal_zone_device_update(data->sensors.tzd, THERMAL_EVENT_UNSPECIFIED); -- GitLab From f7105bd03755272a0e434ddc54e3a78dae2ed0c6 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Oct 2017 19:05:49 +0200 Subject: [PATCH 2019/2269] thermal/drivers/hisi: Encapsulate register writes into helpers commit 1e11b014271ceccb5ea04ae58f4829ac8209a86d upstream. Hopefully, the function name can help to clarify the semantic of the operations when writing in the register. Signed-off-by: Daniel Lezcano Signed-off-by: Eduardo Valentin Signed-off-by: Rafael David Tinoco Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/hisi_thermal.c | 92 ++++++++++++++++++++++++++-------- 1 file changed, 70 insertions(+), 22 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index 0c2966c285b15..a803043f37ae9 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -26,6 +26,7 @@ #include "thermal_core.h" +#define TEMP0_LAG (0x0) #define TEMP0_TH (0x4) #define TEMP0_RST_TH (0x8) #define TEMP0_CFG (0xC) @@ -96,6 +97,56 @@ static inline long hisi_thermal_round_temp(int temp) hisi_thermal_temp_to_step(temp)); } +static inline void hisi_thermal_set_lag(void __iomem *addr, int value) +{ + writel(value, addr + TEMP0_LAG); +} + +static inline void hisi_thermal_alarm_clear(void __iomem *addr, int value) +{ + writel(value, addr + TEMP0_INT_CLR); +} + +static inline void hisi_thermal_alarm_enable(void __iomem *addr, int value) +{ + writel(value, addr + TEMP0_INT_EN); +} + +static inline void hisi_thermal_alarm_set(void __iomem *addr, int temp) +{ + writel(hisi_thermal_temp_to_step(temp) | 0x0FFFFFF00, addr + TEMP0_TH); +} + +static inline void hisi_thermal_reset_set(void __iomem *addr, int temp) +{ + writel(hisi_thermal_temp_to_step(temp), addr + TEMP0_RST_TH); +} + +static inline void hisi_thermal_reset_enable(void __iomem *addr, int value) +{ + writel(value, addr + TEMP0_RST_MSK); +} + +static inline void hisi_thermal_enable(void __iomem *addr, int value) +{ + writel(value, addr + TEMP0_EN); +} + +static inline void hisi_thermal_sensor_select(void __iomem *addr, int sensor) +{ + writel((sensor << 12), addr + TEMP0_CFG); +} + +static inline int hisi_thermal_get_temperature(void __iomem *addr) +{ + return hisi_thermal_step_to_temp(readl(addr + TEMP0_VALUE)); +} + +static inline void hisi_thermal_hdak_set(void __iomem *addr, int value) +{ + writel(value, addr + TEMP0_CFG); +} + static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data, struct hisi_thermal_sensor *sensor) { @@ -104,22 +155,21 @@ static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data, mutex_lock(&data->thermal_lock); /* disable interrupt */ - writel(0x0, data->regs + TEMP0_INT_EN); - writel(0x1, data->regs + TEMP0_INT_CLR); + hisi_thermal_alarm_enable(data->regs, 0); + hisi_thermal_alarm_clear(data->regs, 1); /* disable module firstly */ - writel(0x0, data->regs + TEMP0_EN); + hisi_thermal_enable(data->regs, 0); /* select sensor id */ - writel((sensor->id << 12), data->regs + TEMP0_CFG); + hisi_thermal_sensor_select(data->regs, sensor->id); /* enable module */ - writel(0x1, data->regs + TEMP0_EN); + hisi_thermal_enable(data->regs, 1); usleep_range(3000, 5000); - val = readl(data->regs + TEMP0_VALUE); - val = hisi_thermal_step_to_temp(val); + val = hisi_thermal_get_temperature(data->regs); mutex_unlock(&data->thermal_lock); @@ -136,28 +186,26 @@ static void hisi_thermal_enable_bind_irq_sensor sensor = &data->sensors; /* setting the hdak time */ - writel(0x0, data->regs + TEMP0_CFG); + hisi_thermal_hdak_set(data->regs, 0); /* disable module firstly */ - writel(0x0, data->regs + TEMP0_RST_MSK); - writel(0x0, data->regs + TEMP0_EN); + hisi_thermal_reset_enable(data->regs, 0); + hisi_thermal_enable(data->regs, 0); /* select sensor id */ - writel((sensor->id << 12), data->regs + TEMP0_CFG); + hisi_thermal_sensor_select(data->regs, sensor->id); /* enable for interrupt */ - writel(hisi_thermal_temp_to_step(sensor->thres_temp) | 0x0FFFFFF00, - data->regs + TEMP0_TH); + hisi_thermal_alarm_set(data->regs, sensor->thres_temp); - writel(hisi_thermal_temp_to_step(HISI_TEMP_RESET), - data->regs + TEMP0_RST_TH); + hisi_thermal_reset_set(data->regs, HISI_TEMP_RESET); /* enable module */ - writel(0x1, data->regs + TEMP0_RST_MSK); - writel(0x1, data->regs + TEMP0_EN); + hisi_thermal_reset_enable(data->regs, 1); + hisi_thermal_enable(data->regs, 1); - writel(0x0, data->regs + TEMP0_INT_CLR); - writel(0x1, data->regs + TEMP0_INT_EN); + hisi_thermal_alarm_clear(data->regs, 0); + hisi_thermal_alarm_enable(data->regs, 1); usleep_range(3000, 5000); @@ -169,9 +217,9 @@ static void hisi_thermal_disable_sensor(struct hisi_thermal_data *data) mutex_lock(&data->thermal_lock); /* disable sensor module */ - writel(0x0, data->regs + TEMP0_INT_EN); - writel(0x0, data->regs + TEMP0_RST_MSK); - writel(0x0, data->regs + TEMP0_EN); + hisi_thermal_enable(data->regs, 0); + hisi_thermal_alarm_enable(data->regs, 0); + hisi_thermal_reset_enable(data->regs, 0); mutex_unlock(&data->thermal_lock); } -- GitLab From aa6e035f259a5a4402a204565b7b2fb26bbeaaf2 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Oct 2017 19:05:50 +0200 Subject: [PATCH 2020/2269] thermal/drivers/hisi: Fix configuration register setting commit b424315a287c70eeb5f920f84c92492bd2f5658e upstream. The TEMP0_CFG configuration register contains different field to set up the temperature controller. However in the code, nothing prevents a setup to overwrite the previous one: eg. writing the hdak value overwrites the sensor selection, the sensor selection overwrites the hdak value. In order to prevent such thing, use a regmap-like mechanism by reading the value before, set the corresponding bits and write the result. Signed-off-by: Daniel Lezcano Signed-off-by: Eduardo Valentin Signed-off-by: Rafael David Tinoco Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/hisi_thermal.c | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index a803043f37ae9..31b52ebf426f1 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -30,6 +30,8 @@ #define TEMP0_TH (0x4) #define TEMP0_RST_TH (0x8) #define TEMP0_CFG (0xC) +#define TEMP0_CFG_SS_MSK (0xF000) +#define TEMP0_CFG_HDAK_MSK (0x30) #define TEMP0_EN (0x10) #define TEMP0_INT_EN (0x14) #define TEMP0_INT_CLR (0x18) @@ -132,19 +134,41 @@ static inline void hisi_thermal_enable(void __iomem *addr, int value) writel(value, addr + TEMP0_EN); } -static inline void hisi_thermal_sensor_select(void __iomem *addr, int sensor) +static inline int hisi_thermal_get_temperature(void __iomem *addr) { - writel((sensor << 12), addr + TEMP0_CFG); + return hisi_thermal_step_to_temp(readl(addr + TEMP0_VALUE)); } -static inline int hisi_thermal_get_temperature(void __iomem *addr) +/* + * Temperature configuration register - Sensor selection + * + * Bits [19:12] + * + * 0x0: local sensor (default) + * 0x1: remote sensor 1 (ACPU cluster 1) + * 0x2: remote sensor 2 (ACPU cluster 0) + * 0x3: remote sensor 3 (G3D) + */ +static inline void hisi_thermal_sensor_select(void __iomem *addr, int sensor) { - return hisi_thermal_step_to_temp(readl(addr + TEMP0_VALUE)); + writel((readl(addr + TEMP0_CFG) & ~TEMP0_CFG_SS_MSK) | + (sensor << 12), addr + TEMP0_CFG); } +/* + * Temperature configuration register - Hdak conversion polling interval + * + * Bits [5:4] + * + * 0x0 : 0.768 ms + * 0x1 : 6.144 ms + * 0x2 : 49.152 ms + * 0x3 : 393.216 ms + */ static inline void hisi_thermal_hdak_set(void __iomem *addr, int value) { - writel(value, addr + TEMP0_CFG); + writel((readl(addr + TEMP0_CFG) & ~TEMP0_CFG_HDAK_MSK) | + (value << 4), addr + TEMP0_CFG); } static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data, -- GitLab From dd9989eda8327d9894f8f2c4812ca7dd085fad51 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Oct 2017 19:05:51 +0200 Subject: [PATCH 2021/2269] thermal/drivers/hisi: Remove costly sensor inspection commit 10d7e9a9181f4637640f388d334c6740c1b5d0e8 upstream. The sensor is all setup, bind, resetted, acked, etc... every single second. That was the way to workaround a problem with the interrupt bouncing again and again. With the following changes, we fix all in one: - Do the setup, one time, at probe time - Add the IRQF_ONESHOT, ack the interrupt in the threaded handler - Remove the interrupt handler - Set the correct value for the LAG register - Remove all the irq_enabled stuff in the code as the interruption handling is fixed - Remove the 3ms delay - Reorder the initialization routine to be in the right order It ends up to a nicer code and more efficient, the 3-5ms delay is removed from the get_temp() path. Signed-off-by: Daniel Lezcano Reviewed-by: Leo Yan Tested-by: Leo Yan Signed-off-by: Eduardo Valentin Signed-off-by: Rafael David Tinoco Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/hisi_thermal.c | 203 +++++++++++++++------------------ 1 file changed, 93 insertions(+), 110 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index 31b52ebf426f1..7c5d4647cb5e9 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -41,6 +41,7 @@ #define HISI_TEMP_BASE (-60000) #define HISI_TEMP_RESET (100000) #define HISI_TEMP_STEP (784) +#define HISI_TEMP_LAG (3500) #define HISI_MAX_SENSORS 4 #define HISI_DEFAULT_SENSOR 2 @@ -60,8 +61,6 @@ struct hisi_thermal_data { struct clk *clk; struct hisi_thermal_sensor sensors; int irq; - bool irq_enabled; - void __iomem *regs; }; @@ -99,9 +98,40 @@ static inline long hisi_thermal_round_temp(int temp) hisi_thermal_temp_to_step(temp)); } +/* + * The lag register contains 5 bits encoding the temperature in steps. + * + * Each time the temperature crosses the threshold boundary, an + * interrupt is raised. It could be when the temperature is going + * above the threshold or below. However, if the temperature is + * fluctuating around this value due to the load, we can receive + * several interrupts which may not desired. + * + * We can setup a temperature representing the delta between the + * threshold and the current temperature when the temperature is + * decreasing. + * + * For instance: the lag register is 5°C, the threshold is 65°C, when + * the temperature reaches 65°C an interrupt is raised and when the + * temperature decrease to 65°C - 5°C another interrupt is raised. + * + * A very short lag can lead to an interrupt storm, a long lag + * increase the latency to react to the temperature changes. In our + * case, that is not really a problem as we are polling the + * temperature. + * + * [0:4] : lag register + * + * The temperature is coded in steps, cf. HISI_TEMP_STEP. + * + * Min : 0x00 : 0.0 °C + * Max : 0x1F : 24.3 °C + * + * The 'value' parameter is in milliCelsius. + */ static inline void hisi_thermal_set_lag(void __iomem *addr, int value) { - writel(value, addr + TEMP0_LAG); + writel((value / HISI_TEMP_STEP) & 0x1F, addr + TEMP0_LAG); } static inline void hisi_thermal_alarm_clear(void __iomem *addr, int value) @@ -171,71 +201,6 @@ static inline void hisi_thermal_hdak_set(void __iomem *addr, int value) (value << 4), addr + TEMP0_CFG); } -static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data, - struct hisi_thermal_sensor *sensor) -{ - long val; - - mutex_lock(&data->thermal_lock); - - /* disable interrupt */ - hisi_thermal_alarm_enable(data->regs, 0); - hisi_thermal_alarm_clear(data->regs, 1); - - /* disable module firstly */ - hisi_thermal_enable(data->regs, 0); - - /* select sensor id */ - hisi_thermal_sensor_select(data->regs, sensor->id); - - /* enable module */ - hisi_thermal_enable(data->regs, 1); - - usleep_range(3000, 5000); - - val = hisi_thermal_get_temperature(data->regs); - - mutex_unlock(&data->thermal_lock); - - return val; -} - -static void hisi_thermal_enable_bind_irq_sensor - (struct hisi_thermal_data *data) -{ - struct hisi_thermal_sensor *sensor; - - mutex_lock(&data->thermal_lock); - - sensor = &data->sensors; - - /* setting the hdak time */ - hisi_thermal_hdak_set(data->regs, 0); - - /* disable module firstly */ - hisi_thermal_reset_enable(data->regs, 0); - hisi_thermal_enable(data->regs, 0); - - /* select sensor id */ - hisi_thermal_sensor_select(data->regs, sensor->id); - - /* enable for interrupt */ - hisi_thermal_alarm_set(data->regs, sensor->thres_temp); - - hisi_thermal_reset_set(data->regs, HISI_TEMP_RESET); - - /* enable module */ - hisi_thermal_reset_enable(data->regs, 1); - hisi_thermal_enable(data->regs, 1); - - hisi_thermal_alarm_clear(data->regs, 0); - hisi_thermal_alarm_enable(data->regs, 1); - - usleep_range(3000, 5000); - - mutex_unlock(&data->thermal_lock); -} - static void hisi_thermal_disable_sensor(struct hisi_thermal_data *data) { mutex_lock(&data->thermal_lock); @@ -253,25 +218,10 @@ static int hisi_thermal_get_temp(void *_sensor, int *temp) struct hisi_thermal_sensor *sensor = _sensor; struct hisi_thermal_data *data = sensor->thermal; - *temp = hisi_thermal_get_sensor_temp(data, sensor); - - dev_dbg(&data->pdev->dev, "id=%d, irq=%d, temp=%d, thres=%d\n", - sensor->id, data->irq_enabled, *temp, sensor->thres_temp); - /* - * Bind irq to sensor for two cases: - * Reenable alarm IRQ if temperature below threshold; - * if irq has been enabled, always set it; - */ - if (data->irq_enabled) { - hisi_thermal_enable_bind_irq_sensor(data); - return 0; - } + *temp = hisi_thermal_get_temperature(data->regs); - if (*temp < sensor->thres_temp) { - data->irq_enabled = true; - hisi_thermal_enable_bind_irq_sensor(data); - enable_irq(data->irq); - } + dev_dbg(&data->pdev->dev, "id=%d, temp=%d, thres=%d\n", + sensor->id, *temp, sensor->thres_temp); return 0; } @@ -280,26 +230,27 @@ static const struct thermal_zone_of_device_ops hisi_of_thermal_ops = { .get_temp = hisi_thermal_get_temp, }; -static irqreturn_t hisi_thermal_alarm_irq(int irq, void *dev) +static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) { struct hisi_thermal_data *data = dev; + struct hisi_thermal_sensor *sensor = &data->sensors; + int temp; - disable_irq_nosync(irq); - data->irq_enabled = false; + hisi_thermal_alarm_clear(data->regs, 1); - return IRQ_WAKE_THREAD; -} + temp = hisi_thermal_get_temperature(data->regs); -static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) -{ - struct hisi_thermal_data *data = dev; - struct hisi_thermal_sensor *sensor = &data->sensors; + if (temp >= sensor->thres_temp) { + dev_crit(&data->pdev->dev, "THERMAL ALARM: %d > %d\n", + temp, sensor->thres_temp); - dev_crit(&data->pdev->dev, "THERMAL ALARM: T > %d\n", - sensor->thres_temp); + thermal_zone_device_update(data->sensors.tzd, + THERMAL_EVENT_UNSPECIFIED); - thermal_zone_device_update(data->sensors.tzd, - THERMAL_EVENT_UNSPECIFIED); + } else if (temp < sensor->thres_temp) { + dev_crit(&data->pdev->dev, "THERMAL ALARM stopped: %d < %d\n", + temp, sensor->thres_temp); + } return IRQ_HANDLED; } @@ -352,6 +303,40 @@ static void hisi_thermal_toggle_sensor(struct hisi_thermal_sensor *sensor, on ? THERMAL_DEVICE_ENABLED : THERMAL_DEVICE_DISABLED); } +static int hisi_thermal_setup(struct hisi_thermal_data *data) +{ + struct hisi_thermal_sensor *sensor; + + sensor = &data->sensors; + + /* disable module firstly */ + hisi_thermal_reset_enable(data->regs, 0); + hisi_thermal_enable(data->regs, 0); + + /* select sensor id */ + hisi_thermal_sensor_select(data->regs, sensor->id); + + /* setting the hdak time */ + hisi_thermal_hdak_set(data->regs, 0); + + /* setting lag value between current temp and the threshold */ + hisi_thermal_set_lag(data->regs, HISI_TEMP_LAG); + + /* enable for interrupt */ + hisi_thermal_alarm_set(data->regs, sensor->thres_temp); + + hisi_thermal_reset_set(data->regs, HISI_TEMP_RESET); + + /* enable module */ + hisi_thermal_reset_enable(data->regs, 1); + hisi_thermal_enable(data->regs, 1); + + hisi_thermal_alarm_clear(data->regs, 0); + hisi_thermal_alarm_enable(data->regs, 1); + + return 0; +} + static int hisi_thermal_probe(struct platform_device *pdev) { struct hisi_thermal_data *data; @@ -394,9 +379,6 @@ static int hisi_thermal_probe(struct platform_device *pdev) return ret; } - hisi_thermal_enable_bind_irq_sensor(data); - data->irq_enabled = true; - ret = hisi_thermal_register_sensor(pdev, data, &data->sensors, HISI_DEFAULT_SENSOR); @@ -406,10 +388,15 @@ static int hisi_thermal_probe(struct platform_device *pdev) return ret; } - ret = devm_request_threaded_irq(&pdev->dev, data->irq, - hisi_thermal_alarm_irq, + ret = hisi_thermal_setup(data); + if (ret) { + dev_err(&pdev->dev, "Failed to setup the sensor: %d\n", ret); + return ret; + } + + ret = devm_request_threaded_irq(&pdev->dev, data->irq, NULL, hisi_thermal_alarm_irq_thread, - 0, "hisi_thermal", data); + IRQF_ONESHOT, "hisi_thermal", data); if (ret < 0) { dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret); return ret; @@ -417,8 +404,6 @@ static int hisi_thermal_probe(struct platform_device *pdev) hisi_thermal_toggle_sensor(&data->sensors, true); - enable_irq(data->irq); - return 0; } @@ -440,7 +425,6 @@ static int hisi_thermal_suspend(struct device *dev) struct hisi_thermal_data *data = dev_get_drvdata(dev); hisi_thermal_disable_sensor(data); - data->irq_enabled = false; clk_disable_unprepare(data->clk); @@ -456,8 +440,7 @@ static int hisi_thermal_resume(struct device *dev) if (ret) return ret; - data->irq_enabled = true; - hisi_thermal_enable_bind_irq_sensor(data); + hisi_thermal_setup(data); return 0; } -- GitLab From c29f9010a35604047f96a7e9d6cbabfa36d996d1 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 30 Oct 2018 17:48:25 +0000 Subject: [PATCH 2022/2269] mm: hide incomplete nr_indirectly_reclaimable in /proc/zoneinfo [fixed differently upstream, this is a work-around to resolve it for 4.14.y] Yongqin reported that /proc/zoneinfo format is broken in 4.14 due to commit 7aaf77272358 ("mm: don't show nr_indirectly_reclaimable in /proc/vmstat") Node 0, zone DMA per-node stats nr_inactive_anon 403 nr_active_anon 89123 nr_inactive_file 128887 nr_active_file 47377 nr_unevictable 2053 nr_slab_reclaimable 7510 nr_slab_unreclaimable 10775 nr_isolated_anon 0 nr_isolated_file 0 <...> nr_vmscan_write 0 nr_vmscan_immediate_reclaim 0 nr_dirtied 6022 nr_written 5985 74240 ^^^^^^^^^^ pages free 131656 The problem is caused by the nr_indirectly_reclaimable counter, which is hidden from the /proc/vmstat, but not from the /proc/zoneinfo. Let's fix this inconsistency and hide the counter from /proc/zoneinfo exactly as from /proc/vmstat. BTW, in 4.19+ the counter has been renamed and exported by the commit b29940c1abd7 ("mm: rename and change semantics of nr_indirectly_reclaimable_bytes"), so there is no such a problem anymore. Cc: # 4.14.x-4.18.x Fixes: 7aaf77272358 ("mm: don't show nr_indirectly_reclaimable in /proc/vmstat") Reported-by: Yongqin Liu Signed-off-by: Roman Gushchin Cc: Vlastimil Babka Cc: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/vmstat.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/vmstat.c b/mm/vmstat.c index 527ae727d547e..6389e876c7a74 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1500,6 +1500,10 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, if (is_zone_first_populated(pgdat, zone)) { seq_printf(m, "\n per-node stats"); for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { + /* Skip hidden vmstat items. */ + if (*vmstat_text[i + NR_VM_ZONE_STAT_ITEMS + + NR_VM_NUMA_STAT_ITEMS] == '\0') + continue; seq_printf(m, "\n %-12s %lu", vmstat_text[i + NR_VM_ZONE_STAT_ITEMS + NR_VM_NUMA_STAT_ITEMS], -- GitLab From 02dc68829e5f2a9b48829725f332200ac3b051c2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Feb 2018 16:44:47 +0100 Subject: [PATCH 2023/2269] net: qed: use correct strncpy() size commit 11f711081af0eb54190dc0de96ba4a9cd494666b upstream. passing the strlen() of the source string as the destination length is pointless, and gcc-8 now warns about it: drivers/net/ethernet/qlogic/qed/qed_debug.c: In function 'qed_grc_dump': include/linux/string.h:253: error: 'strncpy' specified bound depends on the length of the source argument [-Werror=stringop-overflow=] This changes qed_grc_dump_big_ram() to instead uses the length of the destination buffer, and use strscpy() to guarantee nul-termination. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qed/qed_debug.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c index 03c3cf77aaff6..99f32202a85c9 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_debug.c +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -3590,10 +3590,8 @@ static u32 qed_grc_dump_big_ram(struct qed_hwfn *p_hwfn, total_blocks = big_ram->num_of_blocks[dev_data->chip_id]; ram_size = total_blocks * BIG_RAM_BLOCK_SIZE_DWORDS; - strncpy(type_name, big_ram->instance_name, - strlen(big_ram->instance_name)); - strncpy(mem_name, big_ram->instance_name, - strlen(big_ram->instance_name)); + strscpy(type_name, big_ram->instance_name, sizeof(type_name)); + strscpy(mem_name, big_ram->instance_name, sizeof(mem_name)); /* Dump memory header */ offset += qed_grc_dump_mem_hdr(p_hwfn, -- GitLab From 88b58409f82e98d28f90328fa9ab98e0de479a0c Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 19 Oct 2018 12:08:22 +0800 Subject: [PATCH 2024/2269] tipc: use destination length for copy string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 29e270fc32192e7729057963ae7120663856c93e upstream. Got below warning with gcc 8.2 compiler. net/tipc/topsrv.c: In function ‘tipc_topsrv_start’: net/tipc/topsrv.c:660:2: warning: ‘strncpy’ specified bound depends on the length of the source argument [-Wstringop-overflow=] strncpy(srv->name, name, strlen(name) + 1); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ net/tipc/topsrv.c:660:27: note: length computed here strncpy(srv->name, name, strlen(name) + 1); ^~~~~~~~~~~~ So change it to correct length and use strscpy. Signed-off-by: Guoqing Jiang Acked-by: Ying Xue Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/subscr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index be3d9e3183dcb..959c9aea3d1a4 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -375,7 +375,7 @@ int tipc_topsrv_start(struct net *net) topsrv->tipc_conn_new = tipc_subscrb_connect_cb; topsrv->tipc_conn_release = tipc_subscrb_release_cb; - strncpy(topsrv->name, name, strlen(name) + 1); + strscpy(topsrv->name, name, sizeof(topsrv->name)); tn->topsrv = topsrv; atomic_set(&tn->subscription_count, 0); -- GitLab From ca48e5e30b75a28c12c43c7428c95735e4885e6b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 8 Dec 2018 13:03:41 +0100 Subject: [PATCH 2025/2269] Linux 4.14.87 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 77ffc1dc4e79b..322484348f3e6 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 86 +SUBLEVEL = 87 EXTRAVERSION = NAME = Petit Gorille -- GitLab From 13b8d9fdf84486d7bf3be24eee78fa01d232a574 Mon Sep 17 00:00:00 2001 From: Connor O'Brien Date: Fri, 5 Oct 2018 19:20:54 -0700 Subject: [PATCH 2026/2269] ANDROID: cpufreq: times: add /proc/uid_concurrent_{active,policy}_time In order to model the energy used by the cpu, we need to expose cpu time information to userspace. We can use this information to blame uids for the energy they are using. This patch adds 2 files: /proc/uid_concurrent_active_time outputs the time each uid spent running with 1 to num_possible_cpus online and not idle. For instance, if 4 cores are online and active for 50ms and the uid is running on one of the cores, the uid should be blamed for 1/4 of the base energy used by the cpu when 1 or more cores is active. /proc/uid_concurrent_policy_time outputs the time each uid spent running on group of cpu's with the same policy with 1 to num_related_cpus online and not idle. For instance, if 2 cores that are a part of the same policy are online and active for 50ms and the uid is running on one of the cores, the uid should be blamed for 1/2 of the energy that is used everytime one or more cpus in the same policy is active. This patch is based on commit c89e69136fec ("ANDROID: cpufreq: uid_concurrent_active_time") and commit 989212536842 ("ANDROID: cpufreq: uid_concurrent_policy_time") in the android-msm-wahoo-4.4 kernel by Marissa Wall. Bug: 111216804 Test: cat files on hikey960 and confirm output is reasonable Change-Id: I1a342361af5c04ecee58d1ab667c91c1bce42445 Signed-off-by: Connor O'Brien --- .../ABI/testing/procfs-concurrent_time | 16 ++ drivers/cpufreq/cpufreq_times.c | 190 +++++++++++++++++- 2 files changed, 204 insertions(+), 2 deletions(-) create mode 100644 Documentation/ABI/testing/procfs-concurrent_time diff --git a/Documentation/ABI/testing/procfs-concurrent_time b/Documentation/ABI/testing/procfs-concurrent_time new file mode 100644 index 0000000000000..55b414289b406 --- /dev/null +++ b/Documentation/ABI/testing/procfs-concurrent_time @@ -0,0 +1,16 @@ +What: /proc/uid_concurrent_active_time +Date: December 2018 +Contact: Connor O'Brien +Description: + The /proc/uid_concurrent_active_time file displays aggregated cputime + numbers for each uid, broken down by the total number of cores that were + active while the uid's task was running. + +What: /proc/uid_concurrent_policy_time +Date: December 2018 +Contact: Connor O'Brien +Description: + The /proc/uid_concurrent_policy_time file displays aggregated cputime + numbers for each uid, broken down based on the cpufreq policy + of the core used by the uid's task and the number of cores associated + with that policy that were active while the uid's task was running. diff --git a/drivers/cpufreq/cpufreq_times.c b/drivers/cpufreq/cpufreq_times.c index a43eeee30e8e1..aaded60c9a21c 100644 --- a/drivers/cpufreq/cpufreq_times.c +++ b/drivers/cpufreq/cpufreq_times.c @@ -32,11 +32,17 @@ static DECLARE_HASHTABLE(uid_hash_table, UID_HASH_BITS); static DEFINE_SPINLOCK(task_time_in_state_lock); /* task->time_in_state */ static DEFINE_SPINLOCK(uid_lock); /* uid_hash_table */ +struct concurrent_times { + atomic64_t active[NR_CPUS]; + atomic64_t policy[NR_CPUS]; +}; + struct uid_entry { uid_t uid; unsigned int max_state; struct hlist_node hash; struct rcu_head rcu; + struct concurrent_times *concurrent_times; u64 time_in_state[0]; }; @@ -87,6 +93,7 @@ static struct uid_entry *find_uid_entry_locked(uid_t uid) static struct uid_entry *find_or_register_uid_locked(uid_t uid) { struct uid_entry *uid_entry, *temp; + struct concurrent_times *times; unsigned int max_state = READ_ONCE(next_offset); size_t alloc_size = sizeof(*uid_entry) + max_state * sizeof(uid_entry->time_in_state[0]); @@ -115,9 +122,15 @@ static struct uid_entry *find_or_register_uid_locked(uid_t uid) uid_entry = kzalloc(alloc_size, GFP_ATOMIC); if (!uid_entry) return NULL; + times = kzalloc(sizeof(*times), GFP_ATOMIC); + if (!times) { + kfree(uid_entry); + return NULL; + } uid_entry->uid = uid; uid_entry->max_state = max_state; + uid_entry->concurrent_times = times; hash_add_rcu(uid_hash_table, &uid_entry->hash, uid); @@ -232,6 +245,86 @@ static int uid_time_in_state_seq_show(struct seq_file *m, void *v) return 0; } +static int concurrent_time_seq_show(struct seq_file *m, void *v, + atomic64_t *(*get_times)(struct concurrent_times *)) +{ + struct uid_entry *uid_entry; + int i, num_possible_cpus = num_possible_cpus(); + + rcu_read_lock(); + + hlist_for_each_entry_rcu(uid_entry, (struct hlist_head *)v, hash) { + atomic64_t *times = get_times(uid_entry->concurrent_times); + + seq_put_decimal_ull(m, "", (u64)uid_entry->uid); + seq_putc(m, ':'); + + for (i = 0; i < num_possible_cpus; ++i) { + u64 time = nsec_to_clock_t(atomic64_read(×[i])); + + seq_put_decimal_ull(m, " ", time); + } + seq_putc(m, '\n'); + } + + rcu_read_unlock(); + + return 0; +} + +static inline atomic64_t *get_active_times(struct concurrent_times *times) +{ + return times->active; +} + +static int concurrent_active_time_seq_show(struct seq_file *m, void *v) +{ + if (v == uid_hash_table) { + seq_put_decimal_ull(m, "cpus: ", num_possible_cpus()); + seq_putc(m, '\n'); + } + + return concurrent_time_seq_show(m, v, get_active_times); +} + +static inline atomic64_t *get_policy_times(struct concurrent_times *times) +{ + return times->policy; +} + +static int concurrent_policy_time_seq_show(struct seq_file *m, void *v) +{ + int i; + struct cpu_freqs *freqs, *last_freqs = NULL; + + if (v == uid_hash_table) { + int cnt = 0; + + for_each_possible_cpu(i) { + freqs = all_freqs[i]; + if (!freqs) + continue; + if (freqs != last_freqs) { + if (last_freqs) { + seq_put_decimal_ull(m, ": ", cnt); + seq_putc(m, ' '); + cnt = 0; + } + seq_put_decimal_ull(m, "policy", i); + + last_freqs = freqs; + } + cnt++; + } + if (last_freqs) { + seq_put_decimal_ull(m, ": ", cnt); + seq_putc(m, '\n'); + } + } + + return concurrent_time_seq_show(m, v, get_policy_times); +} + void cpufreq_task_times_init(struct task_struct *p) { unsigned long flags; @@ -326,11 +419,16 @@ void cpufreq_acct_update_power(struct task_struct *p, u64 cputime) { unsigned long flags; unsigned int state; + unsigned int active_cpu_cnt = 0; + unsigned int policy_cpu_cnt = 0; + unsigned int policy_first_cpu; struct uid_entry *uid_entry; struct cpu_freqs *freqs = all_freqs[task_cpu(p)]; + struct cpufreq_policy *policy; uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p)); + int cpu = 0; - if (!freqs || p->flags & PF_EXITING) + if (!freqs || is_idle_task(p) || p->flags & PF_EXITING) return; state = freqs->offset + READ_ONCE(freqs->last_index); @@ -346,6 +444,42 @@ void cpufreq_acct_update_power(struct task_struct *p, u64 cputime) if (uid_entry && state < uid_entry->max_state) uid_entry->time_in_state[state] += cputime; spin_unlock_irqrestore(&uid_lock, flags); + + rcu_read_lock(); + uid_entry = find_uid_entry_rcu(uid); + if (!uid_entry) { + rcu_read_unlock(); + return; + } + + for_each_possible_cpu(cpu) + if (!idle_cpu(cpu)) + ++active_cpu_cnt; + + atomic64_add(cputime, + &uid_entry->concurrent_times->active[active_cpu_cnt - 1]); + + policy = cpufreq_cpu_get(task_cpu(p)); + if (!policy) { + /* + * This CPU may have just come up and not have a cpufreq policy + * yet. + */ + rcu_read_unlock(); + return; + } + + for_each_cpu(cpu, policy->related_cpus) + if (!idle_cpu(cpu)) + ++policy_cpu_cnt; + + policy_first_cpu = cpumask_first(policy->related_cpus); + cpufreq_cpu_put(policy); + + atomic64_add(cputime, + &uid_entry->concurrent_times->policy[policy_first_cpu + + policy_cpu_cnt - 1]); + rcu_read_unlock(); } void cpufreq_times_create_policy(struct cpufreq_policy *policy) @@ -387,6 +521,14 @@ void cpufreq_times_create_policy(struct cpufreq_policy *policy) all_freqs[cpu] = freqs; } +static void uid_entry_reclaim(struct rcu_head *rcu) +{ + struct uid_entry *uid_entry = container_of(rcu, struct uid_entry, rcu); + + kfree(uid_entry->concurrent_times); + kfree(uid_entry); +} + void cpufreq_task_times_remove_uids(uid_t uid_start, uid_t uid_end) { struct uid_entry *uid_entry; @@ -400,7 +542,7 @@ void cpufreq_task_times_remove_uids(uid_t uid_start, uid_t uid_end) hash, uid_start) { if (uid_start == uid_entry->uid) { hash_del_rcu(&uid_entry->hash); - kfree_rcu(uid_entry, rcu); + call_rcu(&uid_entry->rcu, uid_entry_reclaim); } } } @@ -453,11 +595,55 @@ static const struct file_operations uid_time_in_state_fops = { .release = seq_release, }; +static const struct seq_operations concurrent_active_time_seq_ops = { + .start = uid_seq_start, + .next = uid_seq_next, + .stop = uid_seq_stop, + .show = concurrent_active_time_seq_show, +}; + +static int concurrent_active_time_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &concurrent_active_time_seq_ops); +} + +static const struct file_operations concurrent_active_time_fops = { + .open = concurrent_active_time_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static const struct seq_operations concurrent_policy_time_seq_ops = { + .start = uid_seq_start, + .next = uid_seq_next, + .stop = uid_seq_stop, + .show = concurrent_policy_time_seq_show, +}; + +static int concurrent_policy_time_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &concurrent_policy_time_seq_ops); +} + +static const struct file_operations concurrent_policy_time_fops = { + .open = concurrent_policy_time_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + static int __init cpufreq_times_init(void) { proc_create_data("uid_time_in_state", 0444, NULL, &uid_time_in_state_fops, NULL); + proc_create_data("uid_concurrent_active_time", 0444, NULL, + &concurrent_active_time_fops, NULL); + + proc_create_data("uid_concurrent_policy_time", 0444, NULL, + &concurrent_policy_time_fops, NULL); + return 0; } -- GitLab From ccd0de1304be3a8863d97872fbf00b8b3b6c7801 Mon Sep 17 00:00:00 2001 From: Cody Schuffelen Date: Tue, 20 Nov 2018 19:14:49 -0800 Subject: [PATCH 2027/2269] FROMGIT, BACKPORT: mac80211-next: rtnetlink wifi simulation device This device takes over an existing network device and produces a new one that appears like a wireless connection, returning enough canned responses to nl80211 to satisfy a standard connection manager. If necessary, it can also be set up one step removed from an existing network device, such as through a vlan/80211Q or macvlan connection to not disrupt the existing network interface. To use it to wrap a bare ethernet connection: ip link add link eth0 name wlan0 type virt_wifi You may have to rename or otherwise hide the eth0 from your connection manager, as the original network link will become unusuable and only the wireless wrapper will be functional. This can also be combined with vlan or macvlan links on top of eth0 to share the network between distinct links, but that requires support outside the machine for accepting vlan-tagged packets or packets from multiple MAC addresses. This is being used for Google's Remote Android Virtual Device project, which runs Android devices in virtual machines. The standard network interfaces provided inside the virtual machines are all ethernet. However, Android is not interested in ethernet devices and would rather connect to a wireless interface. This patch allows the virtual machine guest to treat one of its network connections as wireless rather than ethernet, satisfying Android's network connection requirements. We believe this is a generally useful driver for simulating wireless network connections in other environments where a wireless connection is desired by some userspace process but is not available. This is distinct from other testing efforts such as mac80211_hwsim by being a cfg80211 device instead of mac80211 device, allowing straight pass-through on the data plane instead of forcing packaging of ethernet data into mac80211 frames. Signed-off-by: A. Cody Schuffelen Acked-by: Alistair Strachan Acked-by: Greg Hartman Acked-by: Tristan Muntsinger [make it a tristate] Signed-off-by: Johannes Berg (cherry picked from commit c7cdba31ed8b87526db978976392802d3f93110c) [astrachan: taken from mac80211-next/master] [astrachan: removed 'extack' from call to netdev_upper_dev_link()] Bug: 120682817 Test: boot tested cuttlefish and enabled wifi, saw AndroidWifi Change-Id: I726ec28617574c0217d937da049089f0ab8e0da8 Signed-off-by: Alistair Strachan --- drivers/net/wireless/Kconfig | 7 + drivers/net/wireless/Makefile | 2 + drivers/net/wireless/virt_wifi.c | 632 +++++++++++++++++++++++++++++++ 3 files changed, 641 insertions(+) create mode 100644 drivers/net/wireless/virt_wifi.c diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig index 166920ae23f8d..8c456a66ac3bb 100644 --- a/drivers/net/wireless/Kconfig +++ b/drivers/net/wireless/Kconfig @@ -114,4 +114,11 @@ config USB_NET_RNDIS_WLAN If you choose to build a module, it'll be called rndis_wlan. +config VIRT_WIFI + tristate "Wifi wrapper for ethernet drivers" + depends on CFG80211 + ---help--- + This option adds support for ethernet connections to appear as if they + are wifi connections through a special rtnetlink device. + endif # WLAN diff --git a/drivers/net/wireless/Makefile b/drivers/net/wireless/Makefile index 7fc96306712ac..6cfe74515c959 100644 --- a/drivers/net/wireless/Makefile +++ b/drivers/net/wireless/Makefile @@ -27,3 +27,5 @@ obj-$(CONFIG_PCMCIA_WL3501) += wl3501_cs.o obj-$(CONFIG_USB_NET_RNDIS_WLAN) += rndis_wlan.o obj-$(CONFIG_MAC80211_HWSIM) += mac80211_hwsim.o + +obj-$(CONFIG_VIRT_WIFI) += virt_wifi.o diff --git a/drivers/net/wireless/virt_wifi.c b/drivers/net/wireless/virt_wifi.c new file mode 100644 index 0000000000000..2e21ffee9b0e4 --- /dev/null +++ b/drivers/net/wireless/virt_wifi.c @@ -0,0 +1,632 @@ +// SPDX-License-Identifier: GPL-2.0 +/* drivers/net/wireless/virt_wifi.c + * + * A fake implementation of cfg80211_ops that can be tacked on to an ethernet + * net_device to make it appear as a wireless connection. + * + * Copyright (C) 2018 Google, Inc. + * + * Author: schuffelen@google.com + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +static struct wiphy *common_wiphy; + +struct virt_wifi_wiphy_priv { + struct delayed_work scan_result; + struct cfg80211_scan_request *scan_request; + bool being_deleted; +}; + +static struct ieee80211_channel channel_2ghz = { + .band = NL80211_BAND_2GHZ, + .center_freq = 2432, + .hw_value = 2432, + .max_power = 20, +}; + +static struct ieee80211_rate bitrates_2ghz[] = { + { .bitrate = 10 }, + { .bitrate = 20 }, + { .bitrate = 55 }, + { .bitrate = 110 }, + { .bitrate = 60 }, + { .bitrate = 120 }, + { .bitrate = 240 }, +}; + +static struct ieee80211_supported_band band_2ghz = { + .channels = &channel_2ghz, + .bitrates = bitrates_2ghz, + .band = NL80211_BAND_2GHZ, + .n_channels = 1, + .n_bitrates = ARRAY_SIZE(bitrates_2ghz), + .ht_cap = { + .ht_supported = true, + .cap = IEEE80211_HT_CAP_SUP_WIDTH_20_40 | + IEEE80211_HT_CAP_GRN_FLD | + IEEE80211_HT_CAP_SGI_20 | + IEEE80211_HT_CAP_SGI_40 | + IEEE80211_HT_CAP_DSSSCCK40, + .ampdu_factor = 0x3, + .ampdu_density = 0x6, + .mcs = { + .rx_mask = {0xff, 0xff}, + .tx_params = IEEE80211_HT_MCS_TX_DEFINED, + }, + }, +}; + +static struct ieee80211_channel channel_5ghz = { + .band = NL80211_BAND_5GHZ, + .center_freq = 5240, + .hw_value = 5240, + .max_power = 20, +}; + +static struct ieee80211_rate bitrates_5ghz[] = { + { .bitrate = 60 }, + { .bitrate = 120 }, + { .bitrate = 240 }, +}; + +#define RX_MCS_MAP (IEEE80211_VHT_MCS_SUPPORT_0_9 << 0 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 2 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 4 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 6 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 8 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 10 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 12 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 14) + +#define TX_MCS_MAP (IEEE80211_VHT_MCS_SUPPORT_0_9 << 0 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 2 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 4 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 6 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 8 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 10 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 12 | \ + IEEE80211_VHT_MCS_SUPPORT_0_9 << 14) + +static struct ieee80211_supported_band band_5ghz = { + .channels = &channel_5ghz, + .bitrates = bitrates_5ghz, + .band = NL80211_BAND_5GHZ, + .n_channels = 1, + .n_bitrates = ARRAY_SIZE(bitrates_5ghz), + .ht_cap = { + .ht_supported = true, + .cap = IEEE80211_HT_CAP_SUP_WIDTH_20_40 | + IEEE80211_HT_CAP_GRN_FLD | + IEEE80211_HT_CAP_SGI_20 | + IEEE80211_HT_CAP_SGI_40 | + IEEE80211_HT_CAP_DSSSCCK40, + .ampdu_factor = 0x3, + .ampdu_density = 0x6, + .mcs = { + .rx_mask = {0xff, 0xff}, + .tx_params = IEEE80211_HT_MCS_TX_DEFINED, + }, + }, + .vht_cap = { + .vht_supported = true, + .cap = IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 | + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ | + IEEE80211_VHT_CAP_RXLDPC | + IEEE80211_VHT_CAP_SHORT_GI_80 | + IEEE80211_VHT_CAP_SHORT_GI_160 | + IEEE80211_VHT_CAP_TXSTBC | + IEEE80211_VHT_CAP_RXSTBC_1 | + IEEE80211_VHT_CAP_RXSTBC_2 | + IEEE80211_VHT_CAP_RXSTBC_3 | + IEEE80211_VHT_CAP_RXSTBC_4 | + IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK, + .vht_mcs = { + .rx_mcs_map = cpu_to_le16(RX_MCS_MAP), + .tx_mcs_map = cpu_to_le16(TX_MCS_MAP), + } + }, +}; + +/* Assigned at module init. Guaranteed locally-administered and unicast. */ +static u8 fake_router_bssid[ETH_ALEN] __ro_after_init = {}; + +/* Called with the rtnl lock held. */ +static int virt_wifi_scan(struct wiphy *wiphy, + struct cfg80211_scan_request *request) +{ + struct virt_wifi_wiphy_priv *priv = wiphy_priv(wiphy); + + wiphy_debug(wiphy, "scan\n"); + + if (priv->scan_request || priv->being_deleted) + return -EBUSY; + + priv->scan_request = request; + schedule_delayed_work(&priv->scan_result, HZ * 2); + + return 0; +} + +/* Acquires and releases the rdev BSS lock. */ +static void virt_wifi_scan_result(struct work_struct *work) +{ + struct { + u8 tag; + u8 len; + u8 ssid[8]; + } __packed ssid = { + .tag = WLAN_EID_SSID, .len = 8, .ssid = "VirtWifi", + }; + struct cfg80211_bss *informed_bss; + struct virt_wifi_wiphy_priv *priv = + container_of(work, struct virt_wifi_wiphy_priv, + scan_result.work); + struct wiphy *wiphy = priv_to_wiphy(priv); + struct cfg80211_scan_info scan_info = { .aborted = false }; + + informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz, + CFG80211_BSS_FTYPE_PRESP, + fake_router_bssid, + ktime_get_boot_ns(), + WLAN_CAPABILITY_ESS, 0, + (void *)&ssid, sizeof(ssid), + DBM_TO_MBM(-50), GFP_KERNEL); + cfg80211_put_bss(wiphy, informed_bss); + + /* Schedules work which acquires and releases the rtnl lock. */ + cfg80211_scan_done(priv->scan_request, &scan_info); + priv->scan_request = NULL; +} + +/* May acquire and release the rdev BSS lock. */ +static void virt_wifi_cancel_scan(struct wiphy *wiphy) +{ + struct virt_wifi_wiphy_priv *priv = wiphy_priv(wiphy); + + cancel_delayed_work_sync(&priv->scan_result); + /* Clean up dangling callbacks if necessary. */ + if (priv->scan_request) { + struct cfg80211_scan_info scan_info = { .aborted = true }; + /* Schedules work which acquires and releases the rtnl lock. */ + cfg80211_scan_done(priv->scan_request, &scan_info); + priv->scan_request = NULL; + } +} + +struct virt_wifi_netdev_priv { + struct delayed_work connect; + struct net_device *lowerdev; + struct net_device *upperdev; + u32 tx_packets; + u32 tx_failed; + u8 connect_requested_bss[ETH_ALEN]; + bool is_up; + bool is_connected; + bool being_deleted; +}; + +/* Called with the rtnl lock held. */ +static int virt_wifi_connect(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_connect_params *sme) +{ + struct virt_wifi_netdev_priv *priv = netdev_priv(netdev); + bool could_schedule; + + if (priv->being_deleted || !priv->is_up) + return -EBUSY; + + could_schedule = schedule_delayed_work(&priv->connect, HZ * 2); + if (!could_schedule) + return -EBUSY; + + if (sme->bssid) + ether_addr_copy(priv->connect_requested_bss, sme->bssid); + else + eth_zero_addr(priv->connect_requested_bss); + + wiphy_debug(wiphy, "connect\n"); + + return 0; +} + +/* Acquires and releases the rdev event lock. */ +static void virt_wifi_connect_complete(struct work_struct *work) +{ + struct virt_wifi_netdev_priv *priv = + container_of(work, struct virt_wifi_netdev_priv, connect.work); + u8 *requested_bss = priv->connect_requested_bss; + bool has_addr = !is_zero_ether_addr(requested_bss); + bool right_addr = ether_addr_equal(requested_bss, fake_router_bssid); + u16 status = WLAN_STATUS_SUCCESS; + + if (!priv->is_up || (has_addr && !right_addr)) + status = WLAN_STATUS_UNSPECIFIED_FAILURE; + else + priv->is_connected = true; + + /* Schedules an event that acquires the rtnl lock. */ + cfg80211_connect_result(priv->upperdev, requested_bss, NULL, 0, NULL, 0, + status, GFP_KERNEL); + netif_carrier_on(priv->upperdev); +} + +/* May acquire and release the rdev event lock. */ +static void virt_wifi_cancel_connect(struct net_device *netdev) +{ + struct virt_wifi_netdev_priv *priv = netdev_priv(netdev); + + /* If there is work pending, clean up dangling callbacks. */ + if (cancel_delayed_work_sync(&priv->connect)) { + /* Schedules an event that acquires the rtnl lock. */ + cfg80211_connect_result(priv->upperdev, + priv->connect_requested_bss, NULL, 0, + NULL, 0, + WLAN_STATUS_UNSPECIFIED_FAILURE, + GFP_KERNEL); + } +} + +/* Called with the rtnl lock held. Acquires the rdev event lock. */ +static int virt_wifi_disconnect(struct wiphy *wiphy, struct net_device *netdev, + u16 reason_code) +{ + struct virt_wifi_netdev_priv *priv = netdev_priv(netdev); + + if (priv->being_deleted) + return -EBUSY; + + wiphy_debug(wiphy, "disconnect\n"); + virt_wifi_cancel_connect(netdev); + + cfg80211_disconnected(netdev, reason_code, NULL, 0, true, GFP_KERNEL); + priv->is_connected = false; + netif_carrier_off(netdev); + + return 0; +} + +/* Called with the rtnl lock held. */ +static int virt_wifi_get_station(struct wiphy *wiphy, struct net_device *dev, + const u8 *mac, struct station_info *sinfo) +{ + struct virt_wifi_netdev_priv *priv = netdev_priv(dev); + + wiphy_debug(wiphy, "get_station\n"); + + if (!priv->is_connected || !ether_addr_equal(mac, fake_router_bssid)) + return -ENOENT; + + sinfo->filled = BIT_ULL(NL80211_STA_INFO_TX_PACKETS) | + BIT_ULL(NL80211_STA_INFO_TX_FAILED) | + BIT_ULL(NL80211_STA_INFO_SIGNAL) | + BIT_ULL(NL80211_STA_INFO_TX_BITRATE); + sinfo->tx_packets = priv->tx_packets; + sinfo->tx_failed = priv->tx_failed; + /* For CFG80211_SIGNAL_TYPE_MBM, value is expressed in _dBm_ */ + sinfo->signal = -50; + sinfo->txrate = (struct rate_info) { + .legacy = 10, /* units are 100kbit/s */ + }; + return 0; +} + +/* Called with the rtnl lock held. */ +static int virt_wifi_dump_station(struct wiphy *wiphy, struct net_device *dev, + int idx, u8 *mac, struct station_info *sinfo) +{ + struct virt_wifi_netdev_priv *priv = netdev_priv(dev); + + wiphy_debug(wiphy, "dump_station\n"); + + if (idx != 0 || !priv->is_connected) + return -ENOENT; + + ether_addr_copy(mac, fake_router_bssid); + return virt_wifi_get_station(wiphy, dev, fake_router_bssid, sinfo); +} + +static const struct cfg80211_ops virt_wifi_cfg80211_ops = { + .scan = virt_wifi_scan, + + .connect = virt_wifi_connect, + .disconnect = virt_wifi_disconnect, + + .get_station = virt_wifi_get_station, + .dump_station = virt_wifi_dump_station, +}; + +/* Acquires and releases the rtnl lock. */ +static struct wiphy *virt_wifi_make_wiphy(void) +{ + struct wiphy *wiphy; + struct virt_wifi_wiphy_priv *priv; + int err; + + wiphy = wiphy_new(&virt_wifi_cfg80211_ops, sizeof(*priv)); + + if (!wiphy) + return NULL; + + wiphy->max_scan_ssids = 4; + wiphy->max_scan_ie_len = 1000; + wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM; + + wiphy->bands[NL80211_BAND_2GHZ] = &band_2ghz; + wiphy->bands[NL80211_BAND_5GHZ] = &band_5ghz; + wiphy->bands[NL80211_BAND_60GHZ] = NULL; + + wiphy->regulatory_flags = REGULATORY_WIPHY_SELF_MANAGED; + wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION); + + priv = wiphy_priv(wiphy); + priv->being_deleted = false; + priv->scan_request = NULL; + INIT_DELAYED_WORK(&priv->scan_result, virt_wifi_scan_result); + + err = wiphy_register(wiphy); + if (err < 0) { + wiphy_free(wiphy); + return NULL; + } + + return wiphy; +} + +/* Acquires and releases the rtnl lock. */ +static void virt_wifi_destroy_wiphy(struct wiphy *wiphy) +{ + struct virt_wifi_wiphy_priv *priv; + + WARN(!wiphy, "%s called with null wiphy", __func__); + if (!wiphy) + return; + + priv = wiphy_priv(wiphy); + priv->being_deleted = true; + virt_wifi_cancel_scan(wiphy); + + if (wiphy->registered) + wiphy_unregister(wiphy); + wiphy_free(wiphy); +} + +/* Enters and exits a RCU-bh critical section. */ +static netdev_tx_t virt_wifi_start_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct virt_wifi_netdev_priv *priv = netdev_priv(dev); + + priv->tx_packets++; + if (!priv->is_connected) { + priv->tx_failed++; + return NET_XMIT_DROP; + } + + skb->dev = priv->lowerdev; + return dev_queue_xmit(skb); +} + +/* Called with rtnl lock held. */ +static int virt_wifi_net_device_open(struct net_device *dev) +{ + struct virt_wifi_netdev_priv *priv = netdev_priv(dev); + + priv->is_up = true; + return 0; +} + +/* Called with rtnl lock held. */ +static int virt_wifi_net_device_stop(struct net_device *dev) +{ + struct virt_wifi_netdev_priv *n_priv = netdev_priv(dev); + struct virt_wifi_wiphy_priv *w_priv; + + n_priv->is_up = false; + + if (!dev->ieee80211_ptr) + return 0; + w_priv = wiphy_priv(dev->ieee80211_ptr->wiphy); + + virt_wifi_cancel_scan(dev->ieee80211_ptr->wiphy); + virt_wifi_cancel_connect(dev); + netif_carrier_off(dev); + + return 0; +} + +static const struct net_device_ops virt_wifi_ops = { + .ndo_start_xmit = virt_wifi_start_xmit, + .ndo_open = virt_wifi_net_device_open, + .ndo_stop = virt_wifi_net_device_stop, +}; + +/* Invoked as part of rtnl lock release. */ +static void virt_wifi_net_device_destructor(struct net_device *dev) +{ + /* Delayed past dellink to allow nl80211 to react to the device being + * deleted. + */ + kfree(dev->ieee80211_ptr); + dev->ieee80211_ptr = NULL; + free_netdev(dev); +} + +/* No lock interaction. */ +static void virt_wifi_setup(struct net_device *dev) +{ + ether_setup(dev); + dev->netdev_ops = &virt_wifi_ops; + dev->priv_destructor = virt_wifi_net_device_destructor; +} + +/* Called in a RCU read critical section from netif_receive_skb */ +static rx_handler_result_t virt_wifi_rx_handler(struct sk_buff **pskb) +{ + struct sk_buff *skb = *pskb; + struct virt_wifi_netdev_priv *priv = + rcu_dereference(skb->dev->rx_handler_data); + + if (!priv->is_connected) + return RX_HANDLER_PASS; + + /* GFP_ATOMIC because this is a packet interrupt handler. */ + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) { + dev_err(&priv->upperdev->dev, "can't skb_share_check\n"); + return RX_HANDLER_CONSUMED; + } + + *pskb = skb; + skb->dev = priv->upperdev; + skb->pkt_type = PACKET_HOST; + return RX_HANDLER_ANOTHER; +} + +/* Called with rtnl lock held. */ +static int virt_wifi_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct virt_wifi_netdev_priv *priv = netdev_priv(dev); + int err; + + if (!tb[IFLA_LINK]) + return -EINVAL; + + netif_carrier_off(dev); + + priv->upperdev = dev; + priv->lowerdev = __dev_get_by_index(src_net, + nla_get_u32(tb[IFLA_LINK])); + + if (!priv->lowerdev) + return -ENODEV; + if (!tb[IFLA_MTU]) + dev->mtu = priv->lowerdev->mtu; + else if (dev->mtu > priv->lowerdev->mtu) + return -EINVAL; + + err = netdev_rx_handler_register(priv->lowerdev, virt_wifi_rx_handler, + priv); + if (err) { + dev_err(&priv->lowerdev->dev, + "can't netdev_rx_handler_register: %d\n", err); + return err; + } + + eth_hw_addr_inherit(dev, priv->lowerdev); + netif_stacked_transfer_operstate(priv->lowerdev, dev); + + SET_NETDEV_DEV(dev, &priv->lowerdev->dev); + dev->ieee80211_ptr = kzalloc(sizeof(*dev->ieee80211_ptr), GFP_KERNEL); + + if (!dev->ieee80211_ptr) + goto remove_handler; + + dev->ieee80211_ptr->iftype = NL80211_IFTYPE_STATION; + dev->ieee80211_ptr->wiphy = common_wiphy; + + err = register_netdevice(dev); + if (err) { + dev_err(&priv->lowerdev->dev, "can't register_netdevice: %d\n", + err); + goto free_wireless_dev; + } + + err = netdev_upper_dev_link(priv->lowerdev, dev); + if (err) { + dev_err(&priv->lowerdev->dev, "can't netdev_upper_dev_link: %d\n", + err); + goto unregister_netdev; + } + + priv->being_deleted = false; + priv->is_connected = false; + priv->is_up = false; + INIT_DELAYED_WORK(&priv->connect, virt_wifi_connect_complete); + + return 0; +unregister_netdev: + unregister_netdevice(dev); +free_wireless_dev: + kfree(dev->ieee80211_ptr); + dev->ieee80211_ptr = NULL; +remove_handler: + netdev_rx_handler_unregister(priv->lowerdev); + + return err; +} + +/* Called with rtnl lock held. */ +static void virt_wifi_dellink(struct net_device *dev, + struct list_head *head) +{ + struct virt_wifi_netdev_priv *priv = netdev_priv(dev); + + if (dev->ieee80211_ptr) + virt_wifi_cancel_scan(dev->ieee80211_ptr->wiphy); + + priv->being_deleted = true; + virt_wifi_cancel_connect(dev); + netif_carrier_off(dev); + + netdev_rx_handler_unregister(priv->lowerdev); + netdev_upper_dev_unlink(priv->lowerdev, dev); + + unregister_netdevice_queue(dev, head); + + /* Deleting the wiphy is handled in the module destructor. */ +} + +static struct rtnl_link_ops virt_wifi_link_ops = { + .kind = "virt_wifi", + .setup = virt_wifi_setup, + .newlink = virt_wifi_newlink, + .dellink = virt_wifi_dellink, + .priv_size = sizeof(struct virt_wifi_netdev_priv), +}; + +/* Acquires and releases the rtnl lock. */ +static int __init virt_wifi_init_module(void) +{ + int err; + + /* Guaranteed to be locallly-administered and not multicast. */ + eth_random_addr(fake_router_bssid); + + common_wiphy = virt_wifi_make_wiphy(); + if (!common_wiphy) + return -ENOMEM; + + err = rtnl_link_register(&virt_wifi_link_ops); + if (err) + virt_wifi_destroy_wiphy(common_wiphy); + + return err; +} + +/* Acquires and releases the rtnl lock. */ +static void __exit virt_wifi_cleanup_module(void) +{ + /* Will delete any devices that depend on the wiphy. */ + rtnl_link_unregister(&virt_wifi_link_ops); + virt_wifi_destroy_wiphy(common_wiphy); +} + +module_init(virt_wifi_init_module); +module_exit(virt_wifi_cleanup_module); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Cody Schuffelen "); +MODULE_DESCRIPTION("Driver for a wireless wrapper of ethernet devices"); +MODULE_ALIAS_RTNL_LINK("virt_wifi"); -- GitLab From 977dd3f743b7ef6b470cd3fa3dac0daf4bd358d2 Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Fri, 7 Dec 2018 16:40:23 -0800 Subject: [PATCH 2028/2269] ANDROID: cuttlefish_defconfig: Enable VIRT_WIFI Bug: 120439617 Bug: 120682817 Change-Id: Ia1b66528bd9cb1e6e95bd75ac60f393978caa582 Signed-off-by: Alistair Strachan --- arch/arm64/configs/cuttlefish_defconfig | 1 + arch/x86/configs/x86_64_cuttlefish_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 188317af108ec..f22d74dbf8569 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -246,6 +246,7 @@ CONFIG_USB_USBNET=y # CONFIG_WLAN_VENDOR_TI is not set # CONFIG_WLAN_VENDOR_ZYDAS is not set # CONFIG_WLAN_VENDOR_QUANTENNA is not set +CONFIG_VIRT_WIFI=y CONFIG_INPUT_EVDEV=y CONFIG_INPUT_KEYRESET=y # CONFIG_INPUT_KEYBOARD is not set diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 1ffe21526f506..b1482404f08b7 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -258,6 +258,7 @@ CONFIG_USB_USBNET=y # CONFIG_WLAN_VENDOR_ZYDAS is not set # CONFIG_WLAN_VENDOR_QUANTENNA is not set CONFIG_MAC80211_HWSIM=y +CONFIG_VIRT_WIFI=y CONFIG_INPUT_EVDEV=y CONFIG_INPUT_KEYRESET=y # CONFIG_INPUT_KEYBOARD is not set -- GitLab From ebfda8c786be84a99800ff16198afb98d4d868c2 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 4 Oct 2018 09:58:19 -0700 Subject: [PATCH 2029/2269] FROMGIT: modpost: validate symbol names also in find_elf_symbol If an ARM mapping symbol shares an address with a valid symbol, find_elf_symbol can currently return the mapping symbol instead, as the symbol is not validated. This can result in confusing warnings: WARNING: vmlinux.o(.text+0x18f4028): Section mismatch in reference from the function set_reset_devices() to the variable .init.text:$x.0 This change adds a call to is_valid_name to find_elf_symbol, similarly to how it's already used in find_elf_symbol2. Bug: 117237524 Change-Id: I7bcab44f7c8f350baa699cd966c04cfa5f81ca0a (cherry picked from commit 5818c683a619c534c113e1f66d24f636defc29bc git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git for-next) Signed-off-by: Sami Tolvanen Signed-off-by: Masahiro Yamada --- scripts/mod/modpost.c | 50 ++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 957f6041dd79e..31eff585c6a84 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1215,6 +1215,30 @@ static int secref_whitelist(const struct sectioncheck *mismatch, return 1; } +static inline int is_arm_mapping_symbol(const char *str) +{ + return str[0] == '$' && strchr("axtd", str[1]) + && (str[2] == '\0' || str[2] == '.'); +} + +/* + * If there's no name there, ignore it; likewise, ignore it if it's + * one of the magic symbols emitted used by current ARM tools. + * + * Otherwise if find_symbols_between() returns those symbols, they'll + * fail the whitelist tests and cause lots of false alarms ... fixable + * only by merging __exit and __init sections into __text, bloating + * the kernel (which is especially evil on embedded platforms). + */ +static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym) +{ + const char *name = elf->strtab + sym->st_name; + + if (!name || !strlen(name)) + return 0; + return !is_arm_mapping_symbol(name); +} + /** * Find symbol based on relocation record info. * In some cases the symbol supplied is a valid symbol so @@ -1240,6 +1264,8 @@ static Elf_Sym *find_elf_symbol(struct elf_info *elf, Elf64_Sword addr, continue; if (ELF_ST_TYPE(sym->st_info) == STT_SECTION) continue; + if (!is_valid_name(elf, sym)) + continue; if (sym->st_value == addr) return sym; /* Find a symbol nearby - addr are maybe negative */ @@ -1258,30 +1284,6 @@ static Elf_Sym *find_elf_symbol(struct elf_info *elf, Elf64_Sword addr, return NULL; } -static inline int is_arm_mapping_symbol(const char *str) -{ - return str[0] == '$' && strchr("axtd", str[1]) - && (str[2] == '\0' || str[2] == '.'); -} - -/* - * If there's no name there, ignore it; likewise, ignore it if it's - * one of the magic symbols emitted used by current ARM tools. - * - * Otherwise if find_symbols_between() returns those symbols, they'll - * fail the whitelist tests and cause lots of false alarms ... fixable - * only by merging __exit and __init sections into __text, bloating - * the kernel (which is especially evil on embedded platforms). - */ -static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym) -{ - const char *name = elf->strtab + sym->st_name; - - if (!name || !strlen(name)) - return 0; - return !is_arm_mapping_symbol(name); -} - /* * Find symbols before or equal addr and after addr - in the section sec. * If we find two symbols with equal offset prefer one with a valid name. -- GitLab From e525d2cfbe652f4e287a7f84ce397d4f751d3aed Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 4 Oct 2018 08:56:16 -0700 Subject: [PATCH 2030/2269] ANDROID: modpost: add an exception for CFI stubs When CONFIG_CFI_CLANG is enabled, LLVM renames all address taken functions by appending a .cfi postfix to their names, and creates function stubs with the original names. The compiler always injects these stubs to the text section, even if the function itself is placed into init or exit sections, which creates modpost warnings. This commit adds a modpost exception for CFI stubs to prevent the warnings. Bug: 117237524 Change-Id: Ieb8bf20d0c3ad7b7295c535f598370220598cdb0 Signed-off-by: Sami Tolvanen --- scripts/mod/modpost.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 31eff585c6a84..e54411826e7ce 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -953,6 +953,7 @@ static const char *const head_sections[] = { ".head.text*", NULL }; static const char *const linker_symbols[] = { "__init_begin", "_sinittext", "_einittext", NULL }; static const char *const optim_symbols[] = { "*.constprop.*", NULL }; +static const char *const cfi_symbols[] = { "*.cfi", NULL }; enum mismatch { TEXT_TO_ANY_INIT, @@ -1174,6 +1175,16 @@ static const struct sectioncheck *section_mismatch( * fromsec = text section * refsymname = *.constprop.* * + * Pattern 6: + * With CONFIG_CFI_CLANG, clang appends .cfi to all indirectly called + * functions and creates a function stub with the original name. This + * stub is always placed in .text, even if the actual function with the + * .cfi postfix is in .init.text or .exit.text. + * This pattern is identified by + * tosec = init or exit section + * fromsec = text section + * tosym = *.cfi + * **/ static int secref_whitelist(const struct sectioncheck *mismatch, const char *fromsec, const char *fromsym, @@ -1212,6 +1223,12 @@ static int secref_whitelist(const struct sectioncheck *mismatch, match(fromsym, optim_symbols)) return 0; + /* Check for pattern 6 */ + if (match(fromsec, text_sections) && + match(tosec, init_exit_sections) && + match(tosym, cfi_symbols)) + return 0; + return 1; } -- GitLab From 7961b1309f8d501d1f7451549324a862412b3b4b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 12 Dec 2018 19:39:05 +0100 Subject: [PATCH 2031/2269] ANDROID: revert all remaining hisi_thermal.c changes hisi_thermal.c changes do not belong in the android-common tree, as there is a separate tree just for this board. Revert the remaining changes here as it is causing build and merge issues. Reported-by: Todd Kjos Cc: Daniel Lezcano Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/hisi_thermal.c | 468 ++++++++++----------------------- 1 file changed, 137 insertions(+), 331 deletions(-) diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index 7266ce75838ba..7c5d4647cb5e9 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -23,103 +23,79 @@ #include #include #include -#include #include "thermal_core.h" -#define HI6220_TEMP0_LAG (0x0) -#define HI6220_TEMP0_TH (0x4) -#define HI6220_TEMP0_RST_TH (0x8) -#define HI6220_TEMP0_CFG (0xC) -#define HI6220_TEMP0_CFG_SS_MSK (0xF000) -#define HI6220_TEMP0_CFG_HDAK_MSK (0x30) -#define HI6220_TEMP0_EN (0x10) -#define HI6220_TEMP0_INT_EN (0x14) -#define HI6220_TEMP0_INT_CLR (0x18) -#define HI6220_TEMP0_RST_MSK (0x1C) -#define HI6220_TEMP0_VALUE (0x28) - -#define HI3660_OFFSET(chan) ((chan) * 0x40) -#define HI3660_TEMP(chan) (HI3660_OFFSET(chan) + 0x1C) -#define HI3660_TH(chan) (HI3660_OFFSET(chan) + 0x20) -#define HI3660_LAG(chan) (HI3660_OFFSET(chan) + 0x28) -#define HI3660_INT_EN(chan) (HI3660_OFFSET(chan) + 0x2C) -#define HI3660_INT_CLR(chan) (HI3660_OFFSET(chan) + 0x30) - -#define HI6220_TEMP_BASE (-60000) -#define HI6220_TEMP_RESET (100000) -#define HI6220_TEMP_STEP (785) -#define HI6220_TEMP_LAG (3500) - -#define HI3660_TEMP_BASE (-63780) -#define HI3660_TEMP_STEP (205) -#define HI3660_TEMP_LAG (4000) - -#define HI6220_DEFAULT_SENSOR 2 -#define HI3660_DEFAULT_SENSOR 1 +#define TEMP0_LAG (0x0) +#define TEMP0_TH (0x4) +#define TEMP0_RST_TH (0x8) +#define TEMP0_CFG (0xC) +#define TEMP0_CFG_SS_MSK (0xF000) +#define TEMP0_CFG_HDAK_MSK (0x30) +#define TEMP0_EN (0x10) +#define TEMP0_INT_EN (0x14) +#define TEMP0_INT_CLR (0x18) +#define TEMP0_RST_MSK (0x1C) +#define TEMP0_VALUE (0x28) + +#define HISI_TEMP_BASE (-60000) +#define HISI_TEMP_RESET (100000) +#define HISI_TEMP_STEP (784) +#define HISI_TEMP_LAG (3500) + +#define HISI_MAX_SENSORS 4 +#define HISI_DEFAULT_SENSOR 2 struct hisi_thermal_sensor { + struct hisi_thermal_data *thermal; struct thermal_zone_device *tzd; + + long sensor_temp; uint32_t id; uint32_t thres_temp; }; struct hisi_thermal_data { - int (*get_temp)(struct hisi_thermal_data *data); - int (*enable_sensor)(struct hisi_thermal_data *data); - int (*disable_sensor)(struct hisi_thermal_data *data); - int (*irq_handler)(struct hisi_thermal_data *data); + struct mutex thermal_lock; /* protects register data */ struct platform_device *pdev; struct clk *clk; - struct hisi_thermal_sensor sensor; - void __iomem *regs; + struct hisi_thermal_sensor sensors; int irq; + void __iomem *regs; }; /* * The temperature computation on the tsensor is as follow: * Unit: millidegree Celsius - * Step: 200/255 (0.7843) + * Step: 255/200 (0.7843) * Temperature base: -60°C * - * The register is programmed in temperature steps, every step is 785 + * The register is programmed in temperature steps, every step is 784 * millidegree and begins at -60 000 m°C * * The temperature from the steps: * - * Temp = TempBase + (steps x 785) + * Temp = TempBase + (steps x 784) * * and the steps from the temperature: * - * steps = (Temp - TempBase) / 785 + * steps = (Temp - TempBase) / 784 * */ -static inline int hi6220_thermal_step_to_temp(int step) +static inline int hisi_thermal_step_to_temp(int step) { - return HI6220_TEMP_BASE + (step * HI6220_TEMP_STEP); + return HISI_TEMP_BASE + (step * HISI_TEMP_STEP); } -static inline int hi6220_thermal_temp_to_step(int temp) +static inline long hisi_thermal_temp_to_step(long temp) { - return DIV_ROUND_UP(temp - HI6220_TEMP_BASE, HI6220_TEMP_STEP); + return (temp - HISI_TEMP_BASE) / HISI_TEMP_STEP; } -/* - * for Hi3660, - * Step: 189/922 (0.205) - * Temperature base: -63.780°C - * - * The register is programmed in temperature steps, every step is 205 - * millidegree and begins at -63 780 m°C - */ -static inline int hi3660_thermal_step_to_temp(int step) +static inline long hisi_thermal_round_temp(int temp) { - return HI3660_TEMP_BASE + step * HI3660_TEMP_STEP; -} - -static inline int hi3660_thermal_temp_to_step(int temp) -{ - return DIV_ROUND_UP(temp - HI3660_TEMP_BASE, HI3660_TEMP_STEP); + return hisi_thermal_step_to_temp( + hisi_thermal_temp_to_step(temp)); } /* @@ -146,92 +122,51 @@ static inline int hi3660_thermal_temp_to_step(int temp) * * [0:4] : lag register * - * The temperature is coded in steps, cf. HI6220_TEMP_STEP. + * The temperature is coded in steps, cf. HISI_TEMP_STEP. * * Min : 0x00 : 0.0 °C * Max : 0x1F : 24.3 °C * * The 'value' parameter is in milliCelsius. */ -static inline void hi6220_thermal_set_lag(void __iomem *addr, int value) +static inline void hisi_thermal_set_lag(void __iomem *addr, int value) { - writel(DIV_ROUND_UP(value, HI6220_TEMP_STEP) & 0x1F, - addr + HI6220_TEMP0_LAG); + writel((value / HISI_TEMP_STEP) & 0x1F, addr + TEMP0_LAG); } -static inline void hi6220_thermal_alarm_clear(void __iomem *addr, int value) +static inline void hisi_thermal_alarm_clear(void __iomem *addr, int value) { - writel(value, addr + HI6220_TEMP0_INT_CLR); + writel(value, addr + TEMP0_INT_CLR); } -static inline void hi6220_thermal_alarm_enable(void __iomem *addr, int value) +static inline void hisi_thermal_alarm_enable(void __iomem *addr, int value) { - writel(value, addr + HI6220_TEMP0_INT_EN); + writel(value, addr + TEMP0_INT_EN); } -static inline void hi6220_thermal_alarm_set(void __iomem *addr, int temp) +static inline void hisi_thermal_alarm_set(void __iomem *addr, int temp) { - writel(hi6220_thermal_temp_to_step(temp) | 0x0FFFFFF00, - addr + HI6220_TEMP0_TH); + writel(hisi_thermal_temp_to_step(temp) | 0x0FFFFFF00, addr + TEMP0_TH); } -static inline void hi6220_thermal_reset_set(void __iomem *addr, int temp) +static inline void hisi_thermal_reset_set(void __iomem *addr, int temp) { - writel(hi6220_thermal_temp_to_step(temp), addr + HI6220_TEMP0_RST_TH); + writel(hisi_thermal_temp_to_step(temp), addr + TEMP0_RST_TH); } -static inline void hi6220_thermal_reset_enable(void __iomem *addr, int value) +static inline void hisi_thermal_reset_enable(void __iomem *addr, int value) { - writel(value, addr + HI6220_TEMP0_RST_MSK); + writel(value, addr + TEMP0_RST_MSK); } -static inline void hi6220_thermal_enable(void __iomem *addr, int value) +static inline void hisi_thermal_enable(void __iomem *addr, int value) { - writel(value, addr + HI6220_TEMP0_EN); + writel(value, addr + TEMP0_EN); } -static inline int hi6220_thermal_get_temperature(void __iomem *addr) +static inline int hisi_thermal_get_temperature(void __iomem *addr) { - return hi6220_thermal_step_to_temp(readl(addr + HI6220_TEMP0_VALUE)); -} - -/* - * [0:6] lag register - * - * The temperature is coded in steps, cf. HI3660_TEMP_STEP. - * - * Min : 0x00 : 0.0 °C - * Max : 0x7F : 26.0 °C - * - */ -static inline void hi3660_thermal_set_lag(void __iomem *addr, - int id, int value) -{ - writel(DIV_ROUND_UP(value, HI3660_TEMP_STEP) & 0x7F, - addr + HI3660_LAG(id)); -} - -static inline void hi3660_thermal_alarm_clear(void __iomem *addr, - int id, int value) -{ - writel(value, addr + HI3660_INT_CLR(id)); -} - -static inline void hi3660_thermal_alarm_enable(void __iomem *addr, - int id, int value) -{ - writel(value, addr + HI3660_INT_EN(id)); -} - -static inline void hi3660_thermal_alarm_set(void __iomem *addr, - int id, int value) -{ - writel(value, addr + HI3660_TH(id)); -} - -static inline int hi3660_thermal_get_temperature(void __iomem *addr, int id) -{ - return hi3660_thermal_step_to_temp(readl(addr + HI3660_TEMP(id))); + return hisi_thermal_step_to_temp(readl(addr + TEMP0_VALUE)); } /* @@ -244,10 +179,10 @@ static inline int hi3660_thermal_get_temperature(void __iomem *addr, int id) * 0x2: remote sensor 2 (ACPU cluster 0) * 0x3: remote sensor 3 (G3D) */ -static inline void hi6220_thermal_sensor_select(void __iomem *addr, int sensor) +static inline void hisi_thermal_sensor_select(void __iomem *addr, int sensor) { - writel((readl(addr + HI6220_TEMP0_CFG) & ~HI6220_TEMP0_CFG_SS_MSK) | - (sensor << 12), addr + HI6220_TEMP0_CFG); + writel((readl(addr + TEMP0_CFG) & ~TEMP0_CFG_SS_MSK) | + (sensor << 12), addr + TEMP0_CFG); } /* @@ -260,182 +195,30 @@ static inline void hi6220_thermal_sensor_select(void __iomem *addr, int sensor) * 0x2 : 49.152 ms * 0x3 : 393.216 ms */ -static inline void hi6220_thermal_hdak_set(void __iomem *addr, int value) -{ - writel((readl(addr + HI6220_TEMP0_CFG) & ~HI6220_TEMP0_CFG_HDAK_MSK) | - (value << 4), addr + HI6220_TEMP0_CFG); -} - -static int hi6220_thermal_irq_handler(struct hisi_thermal_data *data) -{ - hi6220_thermal_alarm_clear(data->regs, 1); - return 0; -} - -static int hi3660_thermal_irq_handler(struct hisi_thermal_data *data) -{ - hi3660_thermal_alarm_clear(data->regs, data->sensor.id, 1); - return 0; -} - -static int hi6220_thermal_get_temp(struct hisi_thermal_data *data) +static inline void hisi_thermal_hdak_set(void __iomem *addr, int value) { - return hi6220_thermal_get_temperature(data->regs); + writel((readl(addr + TEMP0_CFG) & ~TEMP0_CFG_HDAK_MSK) | + (value << 4), addr + TEMP0_CFG); } -static int hi3660_thermal_get_temp(struct hisi_thermal_data *data) +static void hisi_thermal_disable_sensor(struct hisi_thermal_data *data) { - return hi3660_thermal_get_temperature(data->regs, data->sensor.id); -} + mutex_lock(&data->thermal_lock); -static int hi6220_thermal_disable_sensor(struct hisi_thermal_data *data) -{ /* disable sensor module */ - hi6220_thermal_enable(data->regs, 0); - hi6220_thermal_alarm_enable(data->regs, 0); - hi6220_thermal_reset_enable(data->regs, 0); - - clk_disable_unprepare(data->clk); - - return 0; -} - -static int hi3660_thermal_disable_sensor(struct hisi_thermal_data *data) -{ - /* disable sensor module */ - hi3660_thermal_alarm_enable(data->regs, data->sensor.id, 0); - return 0; -} - -static int hi6220_thermal_enable_sensor(struct hisi_thermal_data *data) -{ - struct hisi_thermal_sensor *sensor = &data->sensor; - int ret; - - /* enable clock for tsensor */ - ret = clk_prepare_enable(data->clk); - if (ret) - return ret; - - /* disable module firstly */ - hi6220_thermal_reset_enable(data->regs, 0); - hi6220_thermal_enable(data->regs, 0); - - /* select sensor id */ - hi6220_thermal_sensor_select(data->regs, sensor->id); - - /* setting the hdak time */ - hi6220_thermal_hdak_set(data->regs, 0); - - /* setting lag value between current temp and the threshold */ - hi6220_thermal_set_lag(data->regs, HI6220_TEMP_LAG); - - /* enable for interrupt */ - hi6220_thermal_alarm_set(data->regs, sensor->thres_temp); - - hi6220_thermal_reset_set(data->regs, HI6220_TEMP_RESET); - - /* enable module */ - hi6220_thermal_reset_enable(data->regs, 1); - hi6220_thermal_enable(data->regs, 1); - - hi6220_thermal_alarm_clear(data->regs, 0); - hi6220_thermal_alarm_enable(data->regs, 1); - - return 0; -} - -static int hi3660_thermal_enable_sensor(struct hisi_thermal_data *data) -{ - unsigned int value; - struct hisi_thermal_sensor *sensor = &data->sensor; - - /* disable interrupt */ - hi3660_thermal_alarm_enable(data->regs, sensor->id, 0); - - /* setting lag value between current temp and the threshold */ - hi3660_thermal_set_lag(data->regs, sensor->id, HI3660_TEMP_LAG); - - /* set interrupt threshold */ - value = hi3660_thermal_temp_to_step(sensor->thres_temp); - hi3660_thermal_alarm_set(data->regs, sensor->id, value); - - /* enable interrupt */ - hi3660_thermal_alarm_clear(data->regs, sensor->id, 1); - hi3660_thermal_alarm_enable(data->regs, sensor->id, 1); - - return 0; -} - -static int hi6220_thermal_probe(struct hisi_thermal_data *data) -{ - struct platform_device *pdev = data->pdev; - struct device *dev = &pdev->dev; - struct resource *res; - int ret; - - data->get_temp = hi6220_thermal_get_temp; - data->enable_sensor = hi6220_thermal_enable_sensor; - data->disable_sensor = hi6220_thermal_disable_sensor; - data->irq_handler = hi6220_thermal_irq_handler; - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - data->regs = devm_ioremap_resource(dev, res); - if (IS_ERR(data->regs)) { - dev_err(dev, "failed to get io address\n"); - return PTR_ERR(data->regs); - } - - data->clk = devm_clk_get(dev, "thermal_clk"); - if (IS_ERR(data->clk)) { - ret = PTR_ERR(data->clk); - if (ret != -EPROBE_DEFER) - dev_err(dev, "failed to get thermal clk: %d\n", ret); - return ret; - } - - data->irq = platform_get_irq(pdev, 0); - if (data->irq < 0) - return data->irq; - - data->sensor.id = HI6220_DEFAULT_SENSOR; - - return 0; -} - -static int hi3660_thermal_probe(struct hisi_thermal_data *data) -{ - struct platform_device *pdev = data->pdev; - struct device *dev = &pdev->dev; - struct resource *res; - - data->get_temp = hi3660_thermal_get_temp; - data->enable_sensor = hi3660_thermal_enable_sensor; - data->disable_sensor = hi3660_thermal_disable_sensor; - data->irq_handler = hi3660_thermal_irq_handler; - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - data->regs = devm_ioremap_resource(dev, res); - if (IS_ERR(data->regs)) { - dev_err(dev, "failed to get io address\n"); - return PTR_ERR(data->regs); - } - - data->irq = platform_get_irq(pdev, 0); - if (data->irq < 0) - return data->irq; - - data->sensor.id = HI3660_DEFAULT_SENSOR; + hisi_thermal_enable(data->regs, 0); + hisi_thermal_alarm_enable(data->regs, 0); + hisi_thermal_reset_enable(data->regs, 0); - return 0; + mutex_unlock(&data->thermal_lock); } -static int hisi_thermal_get_temp(void *__data, int *temp) +static int hisi_thermal_get_temp(void *_sensor, int *temp) { - struct hisi_thermal_data *data = __data; - struct hisi_thermal_sensor *sensor = &data->sensor; + struct hisi_thermal_sensor *sensor = _sensor; + struct hisi_thermal_data *data = sensor->thermal; - *temp = data->get_temp(data); + *temp = hisi_thermal_get_temperature(data->regs); dev_dbg(&data->pdev->dev, "id=%d, temp=%d, thres=%d\n", sensor->id, *temp, sensor->thres_temp); @@ -450,21 +233,21 @@ static const struct thermal_zone_of_device_ops hisi_of_thermal_ops = { static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) { struct hisi_thermal_data *data = dev; - struct hisi_thermal_sensor *sensor = &data->sensor; - int temp = 0; + struct hisi_thermal_sensor *sensor = &data->sensors; + int temp; - data->irq_handler(data); + hisi_thermal_alarm_clear(data->regs, 1); - hisi_thermal_get_temp(data, &temp); + temp = hisi_thermal_get_temperature(data->regs); if (temp >= sensor->thres_temp) { dev_crit(&data->pdev->dev, "THERMAL ALARM: %d > %d\n", temp, sensor->thres_temp); - thermal_zone_device_update(data->sensor.tzd, + thermal_zone_device_update(data->sensors.tzd, THERMAL_EVENT_UNSPECIFIED); - } else { + } else if (temp < sensor->thres_temp) { dev_crit(&data->pdev->dev, "THERMAL ALARM stopped: %d < %d\n", temp, sensor->thres_temp); } @@ -474,14 +257,17 @@ static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) static int hisi_thermal_register_sensor(struct platform_device *pdev, struct hisi_thermal_data *data, - struct hisi_thermal_sensor *sensor) + struct hisi_thermal_sensor *sensor, + int index) { int ret, i; const struct thermal_trip *trip; + sensor->id = index; + sensor->thermal = data; + sensor->tzd = devm_thermal_zone_of_sensor_register(&pdev->dev, - sensor->id, data, - &hisi_of_thermal_ops); + sensor->id, sensor, &hisi_of_thermal_ops); if (IS_ERR(sensor->tzd)) { ret = PTR_ERR(sensor->tzd); sensor->tzd = NULL; @@ -494,7 +280,7 @@ static int hisi_thermal_register_sensor(struct platform_device *pdev, for (i = 0; i < of_thermal_get_ntrips(sensor->tzd); i++) { if (trip[i].type == THERMAL_TRIP_PASSIVE) { - sensor->thres_temp = trip[i].temperature; + sensor->thres_temp = hisi_thermal_round_temp(trip[i].temperature); break; } } @@ -503,14 +289,7 @@ static int hisi_thermal_register_sensor(struct platform_device *pdev, } static const struct of_device_id of_hisi_thermal_match[] = { - { - .compatible = "hisilicon,tsensor", - .data = hi6220_thermal_probe - }, - { - .compatible = "hisilicon,hi3660-tsensor", - .data = hi3660_thermal_probe - }, + { .compatible = "hisilicon,tsensor" }, { /* end */ } }; MODULE_DEVICE_TABLE(of, of_hisi_thermal_match); @@ -561,51 +340,69 @@ static int hisi_thermal_setup(struct hisi_thermal_data *data) static int hisi_thermal_probe(struct platform_device *pdev) { struct hisi_thermal_data *data; - int const (*platform_probe)(struct hisi_thermal_data *); - struct device *dev = &pdev->dev; + struct resource *res; int ret; - data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; + mutex_init(&data->thermal_lock); data->pdev = pdev; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + data->regs = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(data->regs)) { + dev_err(&pdev->dev, "failed to get io address\n"); + return PTR_ERR(data->regs); + } + + data->irq = platform_get_irq(pdev, 0); + if (data->irq < 0) + return data->irq; + platform_set_drvdata(pdev, data); - platform_probe = of_device_get_match_data(dev); - if (!platform_probe) { - dev_err(dev, "failed to get probe func\n"); - return -EINVAL; + data->clk = devm_clk_get(&pdev->dev, "thermal_clk"); + if (IS_ERR(data->clk)) { + ret = PTR_ERR(data->clk); + if (ret != -EPROBE_DEFER) + dev_err(&pdev->dev, + "failed to get thermal clk: %d\n", ret); + return ret; } - ret = platform_probe(data); - if (ret) + /* enable clock for thermal */ + ret = clk_prepare_enable(data->clk); + if (ret) { + dev_err(&pdev->dev, "failed to enable thermal clk: %d\n", ret); return ret; + } ret = hisi_thermal_register_sensor(pdev, data, - &data->sensor); + &data->sensors, + HISI_DEFAULT_SENSOR); if (ret) { - dev_err(dev, "failed to register thermal sensor: %d\n", ret); + dev_err(&pdev->dev, "failed to register thermal sensor: %d\n", + ret); return ret; } - ret = data->enable_sensor(data); + ret = hisi_thermal_setup(data); if (ret) { - dev_err(dev, "Failed to setup the sensor: %d\n", ret); + dev_err(&pdev->dev, "Failed to setup the sensor: %d\n", ret); return ret; } - if (data->irq) { - ret = devm_request_threaded_irq(dev, data->irq, NULL, - hisi_thermal_alarm_irq_thread, - IRQF_ONESHOT, "hisi_thermal", data); - if (ret < 0) { - dev_err(dev, "failed to request alarm irq: %d\n", ret); - return ret; - } + ret = devm_request_threaded_irq(&pdev->dev, data->irq, NULL, + hisi_thermal_alarm_irq_thread, + IRQF_ONESHOT, "hisi_thermal", data); + if (ret < 0) { + dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret); + return ret; } - hisi_thermal_toggle_sensor(&data->sensor, true); + hisi_thermal_toggle_sensor(&data->sensors, true); return 0; } @@ -613,11 +410,11 @@ static int hisi_thermal_probe(struct platform_device *pdev) static int hisi_thermal_remove(struct platform_device *pdev) { struct hisi_thermal_data *data = platform_get_drvdata(pdev); - struct hisi_thermal_sensor *sensor = &data->sensor; + struct hisi_thermal_sensor *sensor = &data->sensors; hisi_thermal_toggle_sensor(sensor, false); - - data->disable_sensor(data); + hisi_thermal_disable_sensor(data); + clk_disable_unprepare(data->clk); return 0; } @@ -627,7 +424,9 @@ static int hisi_thermal_suspend(struct device *dev) { struct hisi_thermal_data *data = dev_get_drvdata(dev); - data->disable_sensor(data); + hisi_thermal_disable_sensor(data); + + clk_disable_unprepare(data->clk); return 0; } @@ -635,8 +434,15 @@ static int hisi_thermal_suspend(struct device *dev) static int hisi_thermal_resume(struct device *dev) { struct hisi_thermal_data *data = dev_get_drvdata(dev); + int ret; + + ret = clk_prepare_enable(data->clk); + if (ret) + return ret; - return data->enable_sensor(data); + hisi_thermal_setup(data); + + return 0; } #endif -- GitLab From bcdfccfd8dee5f024b618a3ec4df17fab1296658 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 9 Oct 2018 07:49:49 -0400 Subject: [PATCH 2032/2269] media: omap3isp: Unregister media device as first [ Upstream commit 30efae3d789cd0714ef795545a46749236e29558 ] While there are issues related to object lifetime management, unregister the media device first when the driver is being unbound. This is slightly safer. Signed-off-by: Sakari Ailus Reviewed-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/omap3isp/isp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/platform/omap3isp/isp.c b/drivers/media/platform/omap3isp/isp.c index 6e6e978263b02..c834fea5f9b00 100644 --- a/drivers/media/platform/omap3isp/isp.c +++ b/drivers/media/platform/omap3isp/isp.c @@ -1592,6 +1592,8 @@ static void isp_pm_complete(struct device *dev) static void isp_unregister_entities(struct isp_device *isp) { + media_device_unregister(&isp->media_dev); + omap3isp_csi2_unregister_entities(&isp->isp_csi2a); omap3isp_ccp2_unregister_entities(&isp->isp_ccp2); omap3isp_ccdc_unregister_entities(&isp->isp_ccdc); @@ -1602,7 +1604,6 @@ static void isp_unregister_entities(struct isp_device *isp) omap3isp_stat_unregister_entities(&isp->isp_hist); v4l2_device_unregister(&isp->v4l2_dev); - media_device_unregister(&isp->media_dev); media_device_cleanup(&isp->media_dev); } -- GitLab From df11500b9dbf1bb3dd752f11be62a1cad22ca16d Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 5 Nov 2018 10:18:58 +0800 Subject: [PATCH 2033/2269] iommu/vt-d: Fix NULL pointer dereference in prq_event_thread() [ Upstream commit 19ed3e2dd8549c1a34914e8dad01b64e7837645a ] When handling page request without pasid event, go to "no_pasid" branch instead of "bad_req". Otherwise, a NULL pointer deference will happen there. Cc: Ashok Raj Cc: Jacob Pan Cc: Sohil Mehta Signed-off-by: Lu Baolu Fixes: a222a7f0bb6c9 'iommu/vt-d: Implement page request handling' Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/intel-svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index d7def26ccf79c..f5573bb9f450e 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -589,7 +589,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) pr_err("%s: Page request without PASID: %08llx %08llx\n", iommu->name, ((unsigned long long *)req)[0], ((unsigned long long *)req)[1]); - goto bad_req; + goto no_pasid; } if (!svm || svm->pasid != req->pasid) { -- GitLab From 8fa55f1d1859a6b0d2e6c1606b6d8fa1a9ef63d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Fri, 26 Oct 2018 12:50:39 +0200 Subject: [PATCH 2034/2269] brcmutil: really fix decoding channel info for 160 MHz bandwidth MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3401d42c7ea2d064d15c66698ff8eb96553179ce ] Previous commit /adding/ support for 160 MHz chanspecs was incomplete. It didn't set bandwidth info and didn't extract control channel info. As the result it was also using uninitialized "sb" var. This change has been tested for two chanspecs found to be reported by some devices/firmwares: 1) 60/160 (0xee32) Before: chnum:50 control_ch_num:36 After: chnum:50 control_ch_num:60 2) 120/160 (0xed72) Before: chnum:114 control_ch_num:100 After: chnum:114 control_ch_num:120 Fixes: 330994e8e8ec ("brcmfmac: fix for proper support of 160MHz bandwidth") Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c b/drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c index e7584b842dce4..eb5db94f57453 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c @@ -193,6 +193,9 @@ static void brcmu_d11ac_decchspec(struct brcmu_chan *ch) } break; case BRCMU_CHSPEC_D11AC_BW_160: + ch->bw = BRCMU_CHAN_BW_160; + ch->sb = brcmu_maskget16(ch->chspec, BRCMU_CHSPEC_D11AC_SB_MASK, + BRCMU_CHSPEC_D11AC_SB_SHIFT); switch (ch->sb) { case BRCMU_CHAN_SB_LLL: ch->control_ch_num -= CH_70MHZ_APART; -- GitLab From b65fa7b581f8a50ef147055006a7aa28ddb1086d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 7 Nov 2018 14:18:50 +0100 Subject: [PATCH 2035/2269] iommu/ipmmu-vmsa: Fix crash on early domain free [ Upstream commit e5b78f2e349eef5d4fca5dc1cf5a3b4b2cc27abd ] If iommu_ops.add_device() fails, iommu_ops.domain_free() is still called, leading to a crash, as the domain was only partially initialized: ipmmu-vmsa e67b0000.mmu: Cannot accommodate DMA translation for IOMMU page tables sata_rcar ee300000.sata: Unable to initialize IPMMU context iommu: Failed to add device ee300000.sata to group 0: -22 Unable to handle kernel NULL pointer dereference at virtual address 0000000000000038 ... Call trace: ipmmu_domain_free+0x1c/0xa0 iommu_group_release+0x48/0x68 kobject_put+0x74/0xe8 kobject_del.part.0+0x3c/0x50 kobject_put+0x60/0xe8 iommu_group_get_for_dev+0xa8/0x1f0 ipmmu_add_device+0x1c/0x40 of_iommu_configure+0x118/0x190 Fix this by checking if the domain's context already exists, before trying to destroy it. Signed-off-by: Geert Uytterhoeven Reviewed-by: Robin Murphy Fixes: d25a2a16f0889 ('iommu: Add driver for Renesas VMSA-compatible IPMMU') Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/ipmmu-vmsa.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 5d0ba5f644c48..777aff1f549fb 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -424,6 +424,9 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain) { + if (!domain->mmu) + return; + /* * Disable the context. Flush the TLB as required when modifying the * context registers. -- GitLab From c6b578b1b4a7c56b469e16e6afd3ec478e1872c3 Mon Sep 17 00:00:00 2001 From: Fabrizio Castro Date: Mon, 10 Sep 2018 11:43:13 +0100 Subject: [PATCH 2036/2269] can: rcar_can: Fix erroneous registration [ Upstream commit 68c8d209cd4337da4fa04c672f0b62bb735969bc ] Assigning 2 to "renesas,can-clock-select" tricks the driver into registering the CAN interface, even though we don't want that. This patch improves one of the checks to prevent that from happening. Fixes: 862e2b6af9413b43 ("can: rcar_can: support all input clocks") Signed-off-by: Fabrizio Castro Signed-off-by: Chris Paterson Reviewed-by: Simon Horman Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/rcar/rcar_can.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/rcar/rcar_can.c b/drivers/net/can/rcar/rcar_can.c index 11662f479e760..771a460837397 100644 --- a/drivers/net/can/rcar/rcar_can.c +++ b/drivers/net/can/rcar/rcar_can.c @@ -24,6 +24,9 @@ #define RCAR_CAN_DRV_NAME "rcar_can" +#define RCAR_SUPPORTED_CLOCKS (BIT(CLKR_CLKP1) | BIT(CLKR_CLKP2) | \ + BIT(CLKR_CLKEXT)) + /* Mailbox configuration: * mailbox 60 - 63 - Rx FIFO mailboxes * mailbox 56 - 59 - Tx FIFO mailboxes @@ -789,7 +792,7 @@ static int rcar_can_probe(struct platform_device *pdev) goto fail_clk; } - if (clock_select >= ARRAY_SIZE(clock_names)) { + if (!(BIT(clock_select) & RCAR_SUPPORTED_CLOCKS)) { err = -EINVAL; dev_err(&pdev->dev, "invalid CAN clock selected\n"); goto fail_clk; -- GitLab From 58e0bc43507120ce1185268f588b191c2f5f4df4 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 19 Oct 2018 13:58:01 +0100 Subject: [PATCH 2037/2269] test_firmware: fix error return getting clobbered [ Upstream commit 8bb0a88600f0267cfcc245d34f8c4abe8c282713 ] In the case where eq->fw->size > PAGE_SIZE the error return rc is being set to EINVAL however this is being overwritten to rc = req->fw->size because the error exit path via label 'out' is not being taken. Fix this by adding the jump to the error exit path 'out'. Detected by CoverityScan, CID#1453465 ("Unused value") Fixes: c92316bf8e94 ("test_firmware: add batched firmware tests") Signed-off-by: Colin Ian King Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- lib/test_firmware.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/test_firmware.c b/lib/test_firmware.c index e7008688769bb..71d371f971385 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -838,6 +838,7 @@ static ssize_t read_firmware_show(struct device *dev, if (req->fw->size > PAGE_SIZE) { pr_err("Testing interface must use PAGE_SIZE firmware for now\n"); rc = -EINVAL; + goto out; } memcpy(buf, req->fw->data, req->fw->size); -- GitLab From a1270af9b1a0d3ba66ac3425771767e677d2fd0d Mon Sep 17 00:00:00 2001 From: Benson Leung Date: Thu, 8 Nov 2018 15:59:21 -0800 Subject: [PATCH 2038/2269] HID: input: Ignore battery reported by Symbol DS4308 [ Upstream commit 0fd791841a6d67af1155a9c3de54dea51220721e ] The Motorola/Zebra Symbol DS4308-HD is a handheld USB barcode scanner which does not have a battery, but reports one anyway that always has capacity 2. Let's apply the IGNORE quirk to prevent it from being treated like a power supply so that userspaces don't get confused that this accessory is almost out of power and warn the user that they need to charge their wired barcode scanner. Reported here: https://bugs.chromium.org/p/chromium/issues/detail?id=804720 Signed-off-by: Benson Leung Reviewed-by: Benjamin Tissoires Signed-off-by: Benjamin Tissoires Signed-off-by: Sasha Levin --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-input.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 3fc8c0d675928..87904d2adadb5 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -1001,6 +1001,7 @@ #define USB_VENDOR_ID_SYMBOL 0x05e0 #define USB_DEVICE_ID_SYMBOL_SCANNER_1 0x0800 #define USB_DEVICE_ID_SYMBOL_SCANNER_2 0x1300 +#define USB_DEVICE_ID_SYMBOL_SCANNER_3 0x1200 #define USB_VENDOR_ID_SYNAPTICS 0x06cb #define USB_DEVICE_ID_SYNAPTICS_TP 0x0001 diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index bb984cc9753be..d146a9b545eed 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -325,6 +325,9 @@ static const struct hid_device_id hid_battery_quirks[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084), HID_BATTERY_QUIRK_IGNORE }, + { HID_USB_DEVICE(USB_VENDOR_ID_SYMBOL, + USB_DEVICE_ID_SYMBOL_SCANNER_3), + HID_BATTERY_QUIRK_IGNORE }, {} }; -- GitLab From 94f748fd4ed3ac4c90990413681e703444c26225 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Tue, 30 Oct 2018 12:17:10 +0100 Subject: [PATCH 2039/2269] batman-adv: Use explicit tvlv padding for ELP packets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f4156f9656feac21f4de712fac94fae964c5d402 ] The announcement messages of batman-adv COMPAT_VERSION 15 have the possibility to announce additional information via a dynamic TVLV part. This part is optional for the ELP packets and currently not parsed by the Linux implementation. Still out-of-tree versions are using it to transport things like neighbor hashes to optimize the rebroadcast behavior. Since the ELP broadcast packets are smaller than the minimal ethernet packet, it often has to be padded. This is often done (as specified in RFC894) with octets of zero and thus work perfectly fine with the TVLV part (making it a zero length and thus empty). But not all ethernet compatible hardware seems to follow this advice. To avoid ambiguous situations when parsing the TVLV header, just force the 4 bytes (TVLV length + padding) after the required ELP header to zero. Fixes: d6f94d91f766 ("batman-adv: ELP - adding basic infrastructure") Reported-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Sasha Levin --- net/batman-adv/bat_v_elp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index e92dfedccc165..fbc132f4670e0 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -338,19 +338,21 @@ out: */ int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface) { + static const size_t tvlv_padding = sizeof(__be32); struct batadv_elp_packet *elp_packet; unsigned char *elp_buff; u32 random_seqno; size_t size; int res = -ENOMEM; - size = ETH_HLEN + NET_IP_ALIGN + BATADV_ELP_HLEN; + size = ETH_HLEN + NET_IP_ALIGN + BATADV_ELP_HLEN + tvlv_padding; hard_iface->bat_v.elp_skb = dev_alloc_skb(size); if (!hard_iface->bat_v.elp_skb) goto out; skb_reserve(hard_iface->bat_v.elp_skb, ETH_HLEN + NET_IP_ALIGN); - elp_buff = skb_put_zero(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN); + elp_buff = skb_put_zero(hard_iface->bat_v.elp_skb, + BATADV_ELP_HLEN + tvlv_padding); elp_packet = (struct batadv_elp_packet *)elp_buff; elp_packet->packet_type = BATADV_ELP; -- GitLab From 2761d3237145412c1405b204f152b796bf9df4a2 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 7 Nov 2018 23:09:12 +0100 Subject: [PATCH 2040/2269] batman-adv: Expand merged fragment buffer for full packet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d7d8bbb40a5b1f682ee6589e212934f4c6b8ad60 ] The complete size ("total_size") of the fragmented packet is stored in the fragment header and in the size of the fragment chain. When the fragments are ready for merge, the skbuff's tail of the first fragment is expanded to have enough room after the data pointer for at least total_size. This means that it gets expanded by total_size - first_skb->len. But this is ignoring the fact that after expanding the buffer, the fragment header is pulled by from this buffer. Assuming that the tailroom of the buffer was already 0, the buffer after the data pointer of the skbuff is now only total_size - len(fragment_header) large. When the merge function is then processing the remaining fragments, the code to copy the data over to the merged skbuff will cause an skb_over_panic when it tries to actually put enough data to fill the total_size bytes of the packet. The size of the skb_pull must therefore also be taken into account when the buffer's tailroom is expanded. Fixes: 610bfc6bc99b ("batman-adv: Receive fragmented packets and merge") Reported-by: Martin Weinelt Co-authored-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Sasha Levin --- net/batman-adv/fragmentation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index b6abd19ab23ec..c6d37d22bd125 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -274,7 +274,7 @@ batadv_frag_merge_packets(struct hlist_head *chain) kfree(entry); packet = (struct batadv_frag_packet *)skb_out->data; - size = ntohs(packet->total_size); + size = ntohs(packet->total_size) + hdr_size; /* Make room for the rest of the fragments. */ if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) { -- GitLab From 41cb057690134a335b689be7e8db6d46db54a8ed Mon Sep 17 00:00:00 2001 From: Filippo Sironi Date: Mon, 12 Nov 2018 12:26:30 +0000 Subject: [PATCH 2041/2269] amd/iommu: Fix Guest Virtual APIC Log Tail Address Register [ Upstream commit ab99be4683d9db33b100497d463274ebd23bd67e ] This register should have been programmed with the physical address of the memory location containing the shadow tail pointer for the guest virtual APIC log instead of the base address. Fixes: 8bda0cfbdc1a ('iommu/amd: Detect and initialize guest vAPIC log') Signed-off-by: Filippo Sironi Signed-off-by: Wei Wang Signed-off-by: Suravee Suthikulpanit Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/amd_iommu_init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 6fe2d03460730..b97984a5ddad2 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -796,7 +796,8 @@ static int iommu_init_ga_log(struct amd_iommu *iommu) entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512; memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET, &entry, sizeof(entry)); - entry = (iommu_virt_to_phys(iommu->ga_log) & 0xFFFFFFFFFFFFFULL) & ~7ULL; + entry = (iommu_virt_to_phys(iommu->ga_log_tail) & + (BIT_ULL(52)-1)) & ~7ULL; memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET, &entry, sizeof(entry)); writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET); -- GitLab From fd358f42a60d85798b6d4715cd936b438fa2cc3e Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Sun, 11 Nov 2018 18:27:34 -0800 Subject: [PATCH 2042/2269] bnx2x: Assign unique DMAE channel number for FW DMAE transactions. [ Upstream commit 77e461d14ed141253573eeeb4d34eccc51e38328 ] Driver assigns DMAE channel 0 for FW as part of START_RAMROD command. FW uses this channel for DMAE operations (e.g., TIME_SYNC implementation). Driver also uses the same channel 0 for DMAE operations for some of the PFs (e.g., PF0 on Port0). This could lead to concurrent access to the DMAE channel by FW and driver which is not legal. Hence need to assign unique DMAE id for FW. Currently following DMAE channels are used by the clients, MFW - OCBB/OCSD functionality uses DMAE channel 14/15 Driver 0-3 and 8-11 (for PF dmae operations) 4 and 12 (for stats requests) Assigning unique dmae_id '13' to the FW. Changes from previous version: ------------------------------ v2: Incorporated the review comments. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 7 +++++++ drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c | 1 + 2 files changed, 8 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 828e2e56b75e7..1b7f4342dab97 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -2187,6 +2187,13 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id, #define PMF_DMAE_C(bp) (BP_PORT(bp) * MAX_DMAE_C_PER_PORT + \ E1HVN_MAX) +/* Following is the DMAE channel number allocation for the clients. + * MFW: OCBB/OCSD implementations use DMAE channels 14/15 respectively. + * Driver: 0-3 and 8-11 (for PF dmae operations) + * 4 and 12 (for stats requests) + */ +#define BNX2X_FW_DMAE_C 13 /* Channel for FW DMAE operations */ + /* PCIE link and speed */ #define PCICFG_LINK_WIDTH 0x1f00000 #define PCICFG_LINK_WIDTH_SHIFT 20 diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c index 8baf9d3eb4b1c..453bfd83a070b 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c @@ -6149,6 +6149,7 @@ static inline int bnx2x_func_send_start(struct bnx2x *bp, rdata->sd_vlan_tag = cpu_to_le16(start_params->sd_vlan_tag); rdata->path_id = BP_PATH(bp); rdata->network_cos_mode = start_params->network_cos_mode; + rdata->dmae_cmd_id = BNX2X_FW_DMAE_C; rdata->vxlan_dst_port = cpu_to_le16(start_params->vxlan_dst_port); rdata->geneve_dst_port = cpu_to_le16(start_params->geneve_dst_port); -- GitLab From 0f1c847db352a87f9974aa1869ca3e6f26b621f7 Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Mon, 12 Nov 2018 12:50:20 +0200 Subject: [PATCH 2043/2269] qed: Fix PTT leak in qed_drain() [ Upstream commit 9aaa4e8ba12972d674caeefbc5f88d83235dd697 ] Release PTT before entering error flow. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 954f7ce4cf285..ecc2d42965260 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1561,9 +1561,9 @@ static int qed_drain(struct qed_dev *cdev) return -EBUSY; } rc = qed_mcp_drain(hwfn, ptt); + qed_ptt_release(hwfn, ptt); if (rc) return rc; - qed_ptt_release(hwfn, ptt); } return 0; -- GitLab From 93663e617b8fdcaceb6f407af6e494df2d5c7067 Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Mon, 12 Nov 2018 12:50:23 +0200 Subject: [PATCH 2044/2269] qed: Fix reading wrong value in loop condition [ Upstream commit ed4eac20dcffdad47709422e0cb925981b056668 ] The value of "sb_index" is written by the hardware. Reading its value and writing it to "index" must finish before checking the loop condition. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_int.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index 719cdbfe16952..7746417130bd7 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -992,6 +992,8 @@ static int qed_int_attentions(struct qed_hwfn *p_hwfn) */ do { index = p_sb_attn->sb_index; + /* finish reading index before the loop condition */ + dma_rmb(); attn_bits = le32_to_cpu(p_sb_attn->atten_bits); attn_acks = le32_to_cpu(p_sb_attn->atten_ack); } while (index != p_sb_attn->sb_index); -- GitLab From 65d9e9821b86155dc810ff08c12cc29c7fabbca9 Mon Sep 17 00:00:00 2001 From: Shen Jing Date: Thu, 1 Nov 2018 15:35:17 +0530 Subject: [PATCH 2045/2269] Revert "usb: gadget: ffs: Fix BUG when userland exits with submitted AIO transfers" [ Upstream commit a9c859033f6ec772f8e3228c343bb1321584ae0e ] This reverts commit b4194da3f9087dd38d91b40f9bec42d59ce589a8 since it causes list corruption followed by kernel panic: Workqueue: adb ffs_aio_cancel_worker RIP: 0010:__list_add_valid+0x4d/0x70 Call Trace: insert_work+0x47/0xb0 __queue_work+0xf6/0x400 queue_work_on+0x65/0x70 dwc3_gadget_giveback+0x44/0x50 [dwc3] dwc3_gadget_ep_dequeue+0x83/0x2d0 [dwc3] ? finish_wait+0x80/0x80 usb_ep_dequeue+0x1e/0x90 process_one_work+0x18c/0x3b0 worker_thread+0x3c/0x390 ? process_one_work+0x3b0/0x3b0 kthread+0x11e/0x140 ? kthread_create_worker_on_cpu+0x70/0x70 ret_from_fork+0x3a/0x50 This issue is seen with warm reboot stability testing. Signed-off-by: Shen Jing Signed-off-by: Saranya Gopal Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/f_fs.c | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 17467545391b5..52e6897fa35a4 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -219,7 +219,6 @@ struct ffs_io_data { struct mm_struct *mm; struct work_struct work; - struct work_struct cancellation_work; struct usb_ep *ep; struct usb_request *req; @@ -1074,31 +1073,22 @@ ffs_epfile_open(struct inode *inode, struct file *file) return 0; } -static void ffs_aio_cancel_worker(struct work_struct *work) -{ - struct ffs_io_data *io_data = container_of(work, struct ffs_io_data, - cancellation_work); - - ENTER(); - - usb_ep_dequeue(io_data->ep, io_data->req); -} - static int ffs_aio_cancel(struct kiocb *kiocb) { struct ffs_io_data *io_data = kiocb->private; - struct ffs_data *ffs = io_data->ffs; + struct ffs_epfile *epfile = kiocb->ki_filp->private_data; int value; ENTER(); - if (likely(io_data && io_data->ep && io_data->req)) { - INIT_WORK(&io_data->cancellation_work, ffs_aio_cancel_worker); - queue_work(ffs->io_completion_wq, &io_data->cancellation_work); - value = -EINPROGRESS; - } else { + spin_lock_irq(&epfile->ffs->eps_lock); + + if (likely(io_data && io_data->ep && io_data->req)) + value = usb_ep_dequeue(io_data->ep, io_data->req); + else value = -EINVAL; - } + + spin_unlock_irq(&epfile->ffs->eps_lock); return value; } -- GitLab From 471b2b9c525424e5213b4d6084ed1d12758ce8c8 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 15 Nov 2018 18:05:13 +0200 Subject: [PATCH 2046/2269] net/mlx4_core: Zero out lkey field in SW2HW_MPT fw command [ Upstream commit bd85fbc2038a1bbe84990b23ff69b6fc81a32b2c ] When re-registering a user mr, the mpt information for the existing mr when running SRIOV is obtained via the QUERY_MPT fw command. The returned information includes the mpt's lkey. This retrieved mpt information is used to move the mpt back to hardware ownership in the rereg flow (via the SW2HW_MPT fw command when running SRIOV). The fw API spec states that for SW2HW_MPT, the lkey field must be zero. Any ConnectX-3 PF driver which checks for strict spec adherence will return failure for SW2HW_MPT if the lkey field is not zero (although the fw in practice ignores this field for SW2HW_MPT). Thus, in order to conform to the fw API spec, set the lkey field to zero before invoking SW2HW_MPT when running SRIOV. Fixes: e630664c8383 ("mlx4_core: Add helper functions to support MR re-registration") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx4/mr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index c7c0764991c98..20043f82c1d82 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -363,6 +363,7 @@ int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr, container_of((void *)mpt_entry, struct mlx4_cmd_mailbox, buf); + (*mpt_entry)->lkey = 0; err = mlx4_SW2HW_MPT(dev, mailbox, key); } -- GitLab From e6e5d3dda0ea79e4c9a52ed2031ed832713f3e99 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 15 Nov 2018 18:05:14 +0200 Subject: [PATCH 2047/2269] net/mlx4_core: Fix uninitialized variable compilation warning [ Upstream commit 3ea7e7ea53c9f6ee41cb69a29c375fe9dd9a56a7 ] Initialize the uid variable to zero to avoid the compilation warning. Fixes: 7a89399ffad7 ("net/mlx4: Add mlx4_bitmap zone allocator") Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx4/alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index 6dabd983e7e0f..94f4dc4a77e96 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -337,7 +337,7 @@ void mlx4_zone_allocator_destroy(struct mlx4_zone_allocator *zone_alloc) static u32 __mlx4_alloc_from_zone(struct mlx4_zone_entry *zone, int count, int align, u32 skip_mask, u32 *puid) { - u32 uid; + u32 uid = 0; u32 res; struct mlx4_zone_allocator *zone_alloc = zone->allocator; struct mlx4_zone_entry *curr_node; -- GitLab From 934f4965b46b99885e31b89fbf3240cb034c0959 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 15 Nov 2018 18:05:15 +0200 Subject: [PATCH 2048/2269] net/mlx4: Fix UBSAN warning of signed integer overflow [ Upstream commit a463146e67c848cbab5ce706d6528281b7cded08 ] UBSAN: Undefined behavior in drivers/net/ethernet/mellanox/mlx4/resource_tracker.c:626:29 signed integer overflow: 1802201963 + 1802201963 cannot be represented in type 'int' The union of res_reserved and res_port_rsvd[MLX4_MAX_PORTS] monitors granting of reserved resources. The grant operation is calculated and protected, thus both members of the union cannot be negative. Changed type of res_reserved and of res_port_rsvd[MLX4_MAX_PORTS] from signed int to unsigned int, allowing large value. Fixes: 5a0d0a6161ae ("mlx4: Structures and init/teardown for VF resource quotas") Signed-off-by: Aya Levin Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index c68da1986e51d..aaeb446bba622 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -541,8 +541,8 @@ struct slave_list { struct resource_allocator { spinlock_t alloc_lock; /* protect quotas */ union { - int res_reserved; - int res_port_rsvd[MLX4_MAX_PORTS]; + unsigned int res_reserved; + unsigned int res_port_rsvd[MLX4_MAX_PORTS]; }; union { int res_free; -- GitLab From 6c65ce92019b50861cd0b1763da8ea8217d6a420 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 8 Nov 2018 17:52:53 +0100 Subject: [PATCH 2049/2269] gpio: mockup: fix indicated direction [ Upstream commit bff466bac59994cfcceabe4d0be5fdc1c20cd5b8 ] Commit 3edfb7bd76bd ("gpiolib: Show correct direction from the beginning") fixed an existing issue but broke libgpiod tests by changing the default direction of dummy lines to output. We don't break user-space so make gpio-mockup behave as before. Signed-off-by: Bartosz Golaszewski Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/gpio/gpio-mockup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c index 9532d86a82f7c..d99c8d8da9a05 100644 --- a/drivers/gpio/gpio-mockup.c +++ b/drivers/gpio/gpio-mockup.c @@ -35,8 +35,8 @@ #define GPIO_MOCKUP_MAX_RANGES (GPIO_MOCKUP_MAX_GC * 2) enum { - GPIO_MOCKUP_DIR_OUT = 0, - GPIO_MOCKUP_DIR_IN = 1, + GPIO_MOCKUP_DIR_IN = 0, + GPIO_MOCKUP_DIR_OUT = 1, }; /* @@ -112,7 +112,7 @@ static int gpio_mockup_get_direction(struct gpio_chip *gc, unsigned int offset) { struct gpio_mockup_chip *chip = gpiochip_get_data(gc); - return chip->lines[offset].dir; + return !chip->lines[offset].dir; } static int gpio_mockup_name_lines(struct device *dev, -- GitLab From 60adf764a94f393d9ec56e05d976987d4d3cfdd3 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Fri, 16 Nov 2018 19:43:27 -0800 Subject: [PATCH 2050/2269] mtd: rawnand: qcom: Namespace prefix some commands [ Upstream commit 33bf5519ae5dd356b182a94e3622f42860274a38 ] PAGE_READ is used by RISC-V arch code included through mm headers, and it makes sense to bring in a prefix on these in the driver. drivers/mtd/nand/raw/qcom_nandc.c:153: warning: "PAGE_READ" redefined #define PAGE_READ 0x2 In file included from include/linux/memremap.h:7, from include/linux/mm.h:27, from include/linux/scatterlist.h:8, from include/linux/dma-mapping.h:11, from drivers/mtd/nand/raw/qcom_nandc.c:17: arch/riscv/include/asm/pgtable.h:48: note: this is the location of the previous definition Caught by riscv allmodconfig. Signed-off-by: Olof Johansson Reviewed-by: Miquel Raynal Signed-off-by: Boris Brezillon Signed-off-by: Sasha Levin --- drivers/mtd/nand/qcom_nandc.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/mtd/nand/qcom_nandc.c b/drivers/mtd/nand/qcom_nandc.c index b49ca02b399da..09d5f7df60237 100644 --- a/drivers/mtd/nand/qcom_nandc.c +++ b/drivers/mtd/nand/qcom_nandc.c @@ -149,15 +149,15 @@ #define NAND_VERSION_MINOR_SHIFT 16 /* NAND OP_CMDs */ -#define PAGE_READ 0x2 -#define PAGE_READ_WITH_ECC 0x3 -#define PAGE_READ_WITH_ECC_SPARE 0x4 -#define PROGRAM_PAGE 0x6 -#define PAGE_PROGRAM_WITH_ECC 0x7 -#define PROGRAM_PAGE_SPARE 0x9 -#define BLOCK_ERASE 0xa -#define FETCH_ID 0xb -#define RESET_DEVICE 0xd +#define OP_PAGE_READ 0x2 +#define OP_PAGE_READ_WITH_ECC 0x3 +#define OP_PAGE_READ_WITH_ECC_SPARE 0x4 +#define OP_PROGRAM_PAGE 0x6 +#define OP_PAGE_PROGRAM_WITH_ECC 0x7 +#define OP_PROGRAM_PAGE_SPARE 0x9 +#define OP_BLOCK_ERASE 0xa +#define OP_FETCH_ID 0xb +#define OP_RESET_DEVICE 0xd /* Default Value for NAND_DEV_CMD_VLD */ #define NAND_DEV_CMD_VLD_VAL (READ_START_VLD | WRITE_START_VLD | \ @@ -629,11 +629,11 @@ static void update_rw_regs(struct qcom_nand_host *host, int num_cw, bool read) if (read) { if (host->use_ecc) - cmd = PAGE_READ_WITH_ECC | PAGE_ACC | LAST_PAGE; + cmd = OP_PAGE_READ_WITH_ECC | PAGE_ACC | LAST_PAGE; else - cmd = PAGE_READ | PAGE_ACC | LAST_PAGE; + cmd = OP_PAGE_READ | PAGE_ACC | LAST_PAGE; } else { - cmd = PROGRAM_PAGE | PAGE_ACC | LAST_PAGE; + cmd = OP_PROGRAM_PAGE | PAGE_ACC | LAST_PAGE; } if (host->use_ecc) { @@ -1030,7 +1030,7 @@ static int nandc_param(struct qcom_nand_host *host) * in use. we configure the controller to perform a raw read of 512 * bytes to read onfi params */ - nandc_set_reg(nandc, NAND_FLASH_CMD, PAGE_READ | PAGE_ACC | LAST_PAGE); + nandc_set_reg(nandc, NAND_FLASH_CMD, OP_PAGE_READ | PAGE_ACC | LAST_PAGE); nandc_set_reg(nandc, NAND_ADDR0, 0); nandc_set_reg(nandc, NAND_ADDR1, 0); nandc_set_reg(nandc, NAND_DEV0_CFG0, 0 << CW_PER_PAGE @@ -1084,7 +1084,7 @@ static int erase_block(struct qcom_nand_host *host, int page_addr) struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip); nandc_set_reg(nandc, NAND_FLASH_CMD, - BLOCK_ERASE | PAGE_ACC | LAST_PAGE); + OP_BLOCK_ERASE | PAGE_ACC | LAST_PAGE); nandc_set_reg(nandc, NAND_ADDR0, page_addr); nandc_set_reg(nandc, NAND_ADDR1, 0); nandc_set_reg(nandc, NAND_DEV0_CFG0, @@ -1115,7 +1115,7 @@ static int read_id(struct qcom_nand_host *host, int column) if (column == -1) return 0; - nandc_set_reg(nandc, NAND_FLASH_CMD, FETCH_ID); + nandc_set_reg(nandc, NAND_FLASH_CMD, OP_FETCH_ID); nandc_set_reg(nandc, NAND_ADDR0, column); nandc_set_reg(nandc, NAND_ADDR1, 0); nandc_set_reg(nandc, NAND_FLASH_CHIP_SELECT, @@ -1136,7 +1136,7 @@ static int reset(struct qcom_nand_host *host) struct nand_chip *chip = &host->chip; struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip); - nandc_set_reg(nandc, NAND_FLASH_CMD, RESET_DEVICE); + nandc_set_reg(nandc, NAND_FLASH_CMD, OP_RESET_DEVICE); nandc_set_reg(nandc, NAND_EXEC_CMD, 1); write_reg_dma(nandc, NAND_FLASH_CMD, 1, NAND_BAM_NEXT_SGL); -- GitLab From 16f2433ede2374060ccb79767fcd3a69cde2a22f Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 14 Nov 2018 05:35:20 +0000 Subject: [PATCH 2051/2269] HID: multitouch: Add pointstick support for Cirque Touchpad [ Upstream commit 12d43aacf9a74d0eb66fd0ea54ebeb79ca28940f ] Cirque Touchpad/Pointstick combo is similar to Alps devices, it requires MT_CLS_WIN_8_DUAL to expose its pointstick as a mouse. Signed-off-by: Kai-Heng Feng Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/hid-multitouch.c | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 87904d2adadb5..fcc688df694ce 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -266,6 +266,9 @@ #define USB_VENDOR_ID_CIDC 0x1677 +#define I2C_VENDOR_ID_CIRQUE 0x0488 +#define I2C_PRODUCT_ID_CIRQUE_121F 0x121F + #define USB_VENDOR_ID_CJTOUCH 0x24b8 #define USB_DEVICE_ID_CJTOUCH_MULTI_TOUCH_0020 0x0020 #define USB_DEVICE_ID_CJTOUCH_MULTI_TOUCH_0040 0x0040 diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index c3b9bd5dba75c..07d92d4a9f7c8 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1474,6 +1474,12 @@ static const struct hid_device_id mt_devices[] = { MT_USB_DEVICE(USB_VENDOR_ID_CHUNGHWAT, USB_DEVICE_ID_CHUNGHWAT_MULTITOUCH) }, + /* Cirque devices */ + { .driver_data = MT_CLS_WIN_8_DUAL, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + I2C_VENDOR_ID_CIRQUE, + I2C_PRODUCT_ID_CIRQUE_121F) }, + /* CJTouch panels */ { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_CJTOUCH, -- GitLab From 4b4c6714a19792f616fc03696594dafbb5a9b9a1 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Fri, 16 Nov 2018 08:25:49 -0600 Subject: [PATCH 2052/2269] mtd: spi-nor: Fix Cadence QSPI page fault kernel panic [ Upstream commit a6a66f80c85e8e20573ca03fabf32445954a88d5 ] The current Cadence QSPI driver caused a kernel panic sporadically when writing to QSPI. The problem was caused by writing more bytes than needed because the QSPI operated on 4 bytes at a time. [ 11.202044] Unable to handle kernel paging request at virtual address bffd3000 [ 11.209254] pgd = e463054d [ 11.211948] [bffd3000] *pgd=2fffb811, *pte=00000000, *ppte=00000000 [ 11.218202] Internal error: Oops: 7 [#1] SMP ARM [ 11.222797] Modules linked in: [ 11.225844] CPU: 1 PID: 1317 Comm: systemd-hwdb Not tainted 4.17.7-d0c45cd44a8f [ 11.235796] Hardware name: Altera SOCFPGA Arria10 [ 11.240487] PC is at __raw_writesl+0x70/0xd4 [ 11.244741] LR is at cqspi_write+0x1a0/0x2cc On a page boundary limit the number of bytes copied from the tx buffer to remain within the page. This patch uses a temporary buffer to hold the 4 bytes to write and then copies only the bytes required from the tx buffer. Reported-by: Adrian Amborzewicz Signed-off-by: Thor Thayer Signed-off-by: Boris Brezillon Signed-off-by: Sasha Levin --- drivers/mtd/spi-nor/cadence-quadspi.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c index 8d89204b90d25..f22dd34f4f834 100644 --- a/drivers/mtd/spi-nor/cadence-quadspi.c +++ b/drivers/mtd/spi-nor/cadence-quadspi.c @@ -625,9 +625,23 @@ static int cqspi_indirect_write_execute(struct spi_nor *nor, reg_base + CQSPI_REG_INDIRECTWR); while (remaining > 0) { + size_t write_words, mod_bytes; + write_bytes = remaining > page_size ? page_size : remaining; - iowrite32_rep(cqspi->ahb_base, txbuf, - DIV_ROUND_UP(write_bytes, 4)); + write_words = write_bytes / 4; + mod_bytes = write_bytes % 4; + /* Write 4 bytes at a time then single bytes. */ + if (write_words) { + iowrite32_rep(cqspi->ahb_base, txbuf, write_words); + txbuf += (write_words * 4); + } + if (mod_bytes) { + unsigned int temp = 0xFFFFFFFF; + + memcpy(&temp, txbuf, mod_bytes); + iowrite32(temp, cqspi->ahb_base); + txbuf += mod_bytes; + } ret = wait_for_completion_timeout(&cqspi->transfer_complete, msecs_to_jiffies @@ -638,7 +652,6 @@ static int cqspi_indirect_write_execute(struct spi_nor *nor, goto failwr; } - txbuf += write_bytes; remaining -= write_bytes; if (remaining > 0) -- GitLab From 9dded0ffdeb7fa10698d534978892b406b60c91e Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Mon, 19 Nov 2018 16:28:30 +0200 Subject: [PATCH 2053/2269] qed: Fix bitmap_weight() check [ Upstream commit 276d43f0ae963312c0cd0e2b9a85fd11ac65dfcc ] Fix the condition which verifies that only one flag is set. The API bitmap_weight() should receive size in bits instead of bytes. Fixes: b5a9ee7cf3be ("qed: Revise QM cofiguration") Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index ef23746997267..a51cd1028ecbb 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -440,8 +440,11 @@ static u16 *qed_init_qm_get_idx_from_flags(struct qed_hwfn *p_hwfn, struct qed_qm_info *qm_info = &p_hwfn->qm_info; /* Can't have multiple flags set here */ - if (bitmap_weight((unsigned long *)&pq_flags, sizeof(pq_flags)) > 1) + if (bitmap_weight((unsigned long *)&pq_flags, + sizeof(pq_flags) * BITS_PER_BYTE) > 1) { + DP_ERR(p_hwfn, "requested multiple pq flags 0x%x\n", pq_flags); goto err; + } switch (pq_flags) { case PQ_FLAGS_RLS: -- GitLab From f7a6fd087a300dabc4dbde84b1f2d4d0ac5173e0 Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Mon, 19 Nov 2018 16:28:31 +0200 Subject: [PATCH 2054/2269] qed: Fix QM getters to always return a valid pq [ Upstream commit eb62cca9bee842e5b23bd0ddfb1f271ca95e8759 ] The getter callers doesn't know the valid Physical Queues (PQ) values. This patch makes sure that a valid PQ will always be returned. The patch consists of 3 fixes: - When qed_init_qm_get_idx_from_flags() receives a disabled flag, it returned PQ 0, which can potentially be another function's pq. Verify that flag is enabled, otherwise return default start_pq. - When qed_init_qm_get_idx_from_flags() receives an unknown flag, it returned NULL and could lead to a segmentation fault. Return default start_pq instead. - A modulo operation was added to MCOS/VFS PQ getters to make sure the PQ returned is in range of the required flag. Fixes: b5a9ee7cf3be ("qed: Revise QM cofiguration") Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 24 +++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index a51cd1028ecbb..16953c4ebd71b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -446,6 +446,11 @@ static u16 *qed_init_qm_get_idx_from_flags(struct qed_hwfn *p_hwfn, goto err; } + if (!(qed_get_pq_flags(p_hwfn) & pq_flags)) { + DP_ERR(p_hwfn, "pq flag 0x%x is not set\n", pq_flags); + goto err; + } + switch (pq_flags) { case PQ_FLAGS_RLS: return &qm_info->first_rl_pq; @@ -468,8 +473,7 @@ static u16 *qed_init_qm_get_idx_from_flags(struct qed_hwfn *p_hwfn, } err: - DP_ERR(p_hwfn, "BAD pq flags %d\n", pq_flags); - return NULL; + return &qm_info->start_pq; } /* save pq index in qm info */ @@ -493,20 +497,32 @@ u16 qed_get_cm_pq_idx_mcos(struct qed_hwfn *p_hwfn, u8 tc) { u8 max_tc = qed_init_qm_get_num_tcs(p_hwfn); + if (max_tc == 0) { + DP_ERR(p_hwfn, "pq with flag 0x%lx do not exist\n", + PQ_FLAGS_MCOS); + return p_hwfn->qm_info.start_pq; + } + if (tc > max_tc) DP_ERR(p_hwfn, "tc %d must be smaller than %d\n", tc, max_tc); - return qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_MCOS) + tc; + return qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_MCOS) + (tc % max_tc); } u16 qed_get_cm_pq_idx_vf(struct qed_hwfn *p_hwfn, u16 vf) { u16 max_vf = qed_init_qm_get_num_vfs(p_hwfn); + if (max_vf == 0) { + DP_ERR(p_hwfn, "pq with flag 0x%lx do not exist\n", + PQ_FLAGS_VFS); + return p_hwfn->qm_info.start_pq; + } + if (vf > max_vf) DP_ERR(p_hwfn, "vf %d must be smaller than %d\n", vf, max_vf); - return qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_VFS) + vf; + return qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_VFS) + (vf % max_vf); } u16 qed_get_cm_pq_idx_rl(struct qed_hwfn *p_hwfn, u8 rl) -- GitLab From d0decb8d7cbb225336e043a63e9a8c72498834fd Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Wed, 21 Nov 2018 09:38:11 +0800 Subject: [PATCH 2055/2269] net: faraday: ftmac100: remove netif_running(netdev) check before disabling interrupts [ Upstream commit 426a593e641ebf0d9288f0a2fcab644a86820220 ] In the original ftmac100_interrupt(), the interrupts are only disabled when the condition "netif_running(netdev)" is true. However, this condition causes kerenl hang in the following case. When the user requests to disable the network device, kernel will clear the bit __LINK_STATE_START from the dev->state and then call the driver's ndo_stop function. Network device interrupts are not blocked during this process. If an interrupt occurs between clearing __LINK_STATE_START and stopping network device, kernel cannot disable the interrupts due to the condition "netif_running(netdev)" in the ISR. Hence, kernel will hang due to the continuous interruption of the network device. In order to solve the above problem, the interrupts of the network device should always be disabled in the ISR without being restricted by the condition "netif_running(netdev)". [V2] Remove unnecessary curly braces. Signed-off-by: Vincent Chen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/faraday/ftmac100.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c index 66928a922824f..415fd93e9930f 100644 --- a/drivers/net/ethernet/faraday/ftmac100.c +++ b/drivers/net/ethernet/faraday/ftmac100.c @@ -870,11 +870,10 @@ static irqreturn_t ftmac100_interrupt(int irq, void *dev_id) struct net_device *netdev = dev_id; struct ftmac100 *priv = netdev_priv(netdev); - if (likely(netif_running(netdev))) { - /* Disable interrupts for polling */ - ftmac100_disable_all_int(priv); + /* Disable interrupts for polling */ + ftmac100_disable_all_int(priv); + if (likely(netif_running(netdev))) napi_schedule(&priv->napi); - } return IRQ_HANDLED; } -- GitLab From 84f49bb07f9d8f48891976229a6eaed9e1a4d6ca Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 21 Nov 2018 17:53:47 +0800 Subject: [PATCH 2056/2269] iommu/vt-d: Use memunmap to free memremap [ Upstream commit 829383e183728dec7ed9150b949cd6de64127809 ] memunmap() should be used to free the return of memremap(), not iounmap(). Fixes: dfddb969edf0 ('iommu/vt-d: Switch from ioremap_cache to memremap') Signed-off-by: Pan Bian Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index aaf3fed974771..e86c1c8ec7f6d 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3086,7 +3086,7 @@ static int copy_context_table(struct intel_iommu *iommu, } if (old_ce) - iounmap(old_ce); + memunmap(old_ce); ret = 0; if (devfn < 0x80) -- GitLab From dab45f4d03f01e8d6612803518c225bf859f796c Mon Sep 17 00:00:00 2001 From: Tigran Mkrtchyan Date: Wed, 21 Nov 2018 12:25:41 +0100 Subject: [PATCH 2057/2269] flexfiles: use per-mirror specified stateid for IO [ Upstream commit bb21ce0ad227b69ec0f83279297ee44232105d96 ] rfc8435 says: For tight coupling, ffds_stateid provides the stateid to be used by the client to access the file. However current implementation replaces per-mirror provided stateid with by open or lock stateid. Ensure that per-mirror stateid is used by ff_layout_write_prepare_v4 and nfs4_ff_layout_prepare_ds. Signed-off-by: Tigran Mkrtchyan Signed-off-by: Rick Macklem Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/flexfilelayout/flexfilelayout.c | 21 +++++++++------------ fs/nfs/flexfilelayout/flexfilelayout.h | 4 ++++ fs/nfs/flexfilelayout/flexfilelayoutdev.c | 19 +++++++++++++++++++ 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index b0fa83a607541..13612a848378f 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1365,12 +1365,7 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data) task)) return; - if (ff_layout_read_prepare_common(task, hdr)) - return; - - if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, - hdr->args.lock_context, FMODE_READ) == -EIO) - rpc_exit(task, -EIO); /* lost lock, terminate I/O */ + ff_layout_read_prepare_common(task, hdr); } static void ff_layout_read_call_done(struct rpc_task *task, void *data) @@ -1539,12 +1534,7 @@ static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data) task)) return; - if (ff_layout_write_prepare_common(task, hdr)) - return; - - if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, - hdr->args.lock_context, FMODE_WRITE) == -EIO) - rpc_exit(task, -EIO); /* lost lock, terminate I/O */ + ff_layout_write_prepare_common(task, hdr); } static void ff_layout_write_call_done(struct rpc_task *task, void *data) @@ -1734,6 +1724,10 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) fh = nfs4_ff_layout_select_ds_fh(lseg, idx); if (fh) hdr->args.fh = fh; + + if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) + goto out_failed; + /* * Note that if we ever decide to split across DSes, * then we may need to handle dense-like offsets. @@ -1796,6 +1790,9 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) if (fh) hdr->args.fh = fh; + if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) + goto out_failed; + /* * Note that if we ever decide to split across DSes, * then we may need to handle dense-like offsets. diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 679cb087ef3f2..d6515f1584f3c 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -214,6 +214,10 @@ unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, unsigned int maxnum); struct nfs_fh * nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx); +int +nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg, + u32 mirror_idx, + nfs4_stateid *stateid); struct nfs4_pnfs_ds * nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index d62279d3fc5d3..9f69e83810caf 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -369,6 +369,25 @@ out: return fh; } +int +nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg, + u32 mirror_idx, + nfs4_stateid *stateid) +{ + struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx); + + if (!ff_layout_mirror_valid(lseg, mirror, false)) { + pr_err_ratelimited("NFS: %s: No data server for mirror offset index %d\n", + __func__, mirror_idx); + goto out; + } + + nfs4_stateid_copy(stateid, &mirror->stateid); + return 1; +out: + return 0; +} + /** * nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call * @lseg: the layout segment we're operating on -- GitLab From 1c1fbd4d2e9200f4b8a69d2b517e5708b0965a68 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Wed, 21 Nov 2018 11:17:58 -0600 Subject: [PATCH 2058/2269] ibmvnic: Fix RX queue buffer cleanup [ Upstream commit b7cdec3d699db2e5985ad39de0f25d3b6111928e ] The wrong index is used when cleaning up RX buffer objects during release of RX queues. Update to use the correct index counter. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/ibm/ibmvnic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 5c7134ccc1fdb..14c53ed5cca60 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -457,8 +457,8 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter) for (j = 0; j < rx_pool->size; j++) { if (rx_pool->rx_buff[j].skb) { - dev_kfree_skb_any(rx_pool->rx_buff[i].skb); - rx_pool->rx_buff[i].skb = NULL; + dev_kfree_skb_any(rx_pool->rx_buff[j].skb); + rx_pool->rx_buff[j].skb = NULL; } } -- GitLab From 63e307e81cffa11dbc8b513ee2b951ffa59fc61c Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 22 Nov 2018 16:15:28 +0800 Subject: [PATCH 2059/2269] team: no need to do team_notify_peers or team_mcast_rejoin when disabling port [ Upstream commit 5ed9dc99107144f83b6c1bb52a69b58875baf540 ] team_notify_peers() will send ARP and NA to notify peers. team_mcast_rejoin() will send multicast join group message to notify peers. We should do this when enabling/changed to a new port. But it doesn't make sense to do it when a port is disabled. On the other hand, when we set mcast_rejoin_count to 2, and do a failover, team_port_disable() will increase mcast_rejoin.count_pending to 2 and then team_port_enable() will increase mcast_rejoin.count_pending to 4. We will send 4 mcast rejoin messages at latest, which will make user confused. The same with notify_peers.count. Fix it by deleting team_notify_peers() and team_mcast_rejoin() in team_port_disable(). Reported-by: Liang Li Fixes: fc423ff00df3a ("team: add peer notification") Fixes: 492b200efdd20 ("team: add support for sending multicast rejoins") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/team/team.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 817451a1efd6d..bd455a6cc82cf 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -989,8 +989,6 @@ static void team_port_disable(struct team *team, team->en_port_count--; team_queue_override_port_del(team, port); team_adjust_ops(team); - team_notify_peers(team); - team_mcast_rejoin(team); team_lower_state_changed(port); } -- GitLab From ccbffb5d7dc88b7e7eecf504d8fa2a271f10ee16 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Thu, 22 Nov 2018 07:34:41 -0500 Subject: [PATCH 2060/2269] net: amd: add missing of_node_put() [ Upstream commit c44c749d3b6fdfca39002e7e48e03fe9f9fe37a3 ] of_find_node_by_path() acquires a reference to the node returned by it and that reference needs to be dropped by its caller. This place doesn't do that, so fix it. Signed-off-by: Yangtao Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amd/sunlance.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c index 291ca5187f123..9845e07d40cd3 100644 --- a/drivers/net/ethernet/amd/sunlance.c +++ b/drivers/net/ethernet/amd/sunlance.c @@ -1418,7 +1418,7 @@ static int sparc_lance_probe_one(struct platform_device *op, prop = of_get_property(nd, "tpe-link-test?", NULL); if (!prop) - goto no_link_test; + goto node_put; if (strcmp(prop, "true")) { printk(KERN_NOTICE "SunLance: warning: overriding option " @@ -1427,6 +1427,8 @@ static int sparc_lance_probe_one(struct platform_device *op, "to ecd@skynet.be\n"); auxio_set_lte(AUXIO_LTE_ON); } +node_put: + of_node_put(nd); no_link_test: lp->auto_select = 1; lp->tpe = 0; -- GitLab From 4515bbc4e284066d4e2f4a66eccf13813f6e7206 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 15 Nov 2017 17:38:37 -0800 Subject: [PATCH 2061/2269] mm: don't warn about allocations which stall for too long [ Upstream commit 400e22499dd92613821374c8c6c88c7225359980 ] Commit 63f53dea0c98 ("mm: warn about allocations which stall for too long") was a great step for reducing possibility of silent hang up problem caused by memory allocation stalls. But this commit reverts it, for it is possible to trigger OOM lockup and/or soft lockups when many threads concurrently called warn_alloc() (in order to warn about memory allocation stalls) due to current implementation of printk(), and it is difficult to obtain useful information due to limitation of synchronous warning approach. Current printk() implementation flushes all pending logs using the context of a thread which called console_unlock(). printk() should be able to flush all pending logs eventually unless somebody continues appending to printk() buffer. Since warn_alloc() started appending to printk() buffer while waiting for oom_kill_process() to make forward progress when oom_kill_process() is processing pending logs, it became possible for warn_alloc() to force oom_kill_process() loop inside printk(). As a result, warn_alloc() significantly increased possibility of preventing oom_kill_process() from making forward progress. ---------- Pseudo code start ---------- Before warn_alloc() was introduced: retry: if (mutex_trylock(&oom_lock)) { while (atomic_read(&printk_pending_logs) > 0) { atomic_dec(&printk_pending_logs); print_one_log(); } // Send SIGKILL here. mutex_unlock(&oom_lock) } goto retry; After warn_alloc() was introduced: retry: if (mutex_trylock(&oom_lock)) { while (atomic_read(&printk_pending_logs) > 0) { atomic_dec(&printk_pending_logs); print_one_log(); } // Send SIGKILL here. mutex_unlock(&oom_lock) } else if (waited_for_10seconds()) { atomic_inc(&printk_pending_logs); } goto retry; ---------- Pseudo code end ---------- Although waited_for_10seconds() becomes true once per 10 seconds, unbounded number of threads can call waited_for_10seconds() at the same time. Also, since threads doing waited_for_10seconds() keep doing almost busy loop, the thread doing print_one_log() can use little CPU resource. Therefore, this situation can be simplified like ---------- Pseudo code start ---------- retry: if (mutex_trylock(&oom_lock)) { while (atomic_read(&printk_pending_logs) > 0) { atomic_dec(&printk_pending_logs); print_one_log(); } // Send SIGKILL here. mutex_unlock(&oom_lock) } else { atomic_inc(&printk_pending_logs); } goto retry; ---------- Pseudo code end ---------- when printk() is called faster than print_one_log() can process a log. One of possible mitigation would be to introduce a new lock in order to make sure that no other series of printk() (either oom_kill_process() or warn_alloc()) can append to printk() buffer when one series of printk() (either oom_kill_process() or warn_alloc()) is already in progress. Such serialization will also help obtaining kernel messages in readable form. ---------- Pseudo code start ---------- retry: if (mutex_trylock(&oom_lock)) { mutex_lock(&oom_printk_lock); while (atomic_read(&printk_pending_logs) > 0) { atomic_dec(&printk_pending_logs); print_one_log(); } // Send SIGKILL here. mutex_unlock(&oom_printk_lock); mutex_unlock(&oom_lock) } else { if (mutex_trylock(&oom_printk_lock)) { atomic_inc(&printk_pending_logs); mutex_unlock(&oom_printk_lock); } } goto retry; ---------- Pseudo code end ---------- But this commit does not go that direction, for we don't want to introduce a new lock dependency, and we unlikely be able to obtain useful information even if we serialized oom_kill_process() and warn_alloc(). Synchronous approach is prone to unexpected results (e.g. too late [1], too frequent [2], overlooked [3]). As far as I know, warn_alloc() never helped with providing information other than "something is going wrong". I want to consider asynchronous approach which can obtain information during stalls with possibly relevant threads (e.g. the owner of oom_lock and kswapd-like threads) and serve as a trigger for actions (e.g. turn on/off tracepoints, ask libvirt daemon to take a memory dump of stalling KVM guest for diagnostic purpose). This commit temporarily loses ability to report e.g. OOM lockup due to unable to invoke the OOM killer due to !__GFP_FS allocation request. But asynchronous approach will be able to detect such situation and emit warning. Thus, let's remove warn_alloc(). [1] https://bugzilla.kernel.org/show_bug.cgi?id=192981 [2] http://lkml.kernel.org/r/CAM_iQpWuPVGc2ky8M-9yukECtS+zKjiDasNymX7rMcBjBFyM_A@mail.gmail.com [3] commit db73ee0d46379922 ("mm, vmscan: do not loop on too_many_isolated for ever")) Link: http://lkml.kernel.org/r/1509017339-4802-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Reported-by: Cong Wang Reported-by: yuwang.yuwang Reported-by: Johannes Weiner Acked-by: Michal Hocko Acked-by: Johannes Weiner Cc: Vlastimil Babka Cc: Mel Gorman Cc: Dave Hansen Cc: Sergey Senozhatsky Cc: Petr Mladek Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/page_alloc.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2074f424dabfa..6be91a1a00d9f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3862,8 +3862,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, enum compact_result compact_result; int compaction_retries; int no_progress_loops; - unsigned long alloc_start = jiffies; - unsigned int stall_timeout = 10 * HZ; unsigned int cpuset_mems_cookie; int reserve_flags; @@ -3983,14 +3981,6 @@ retry: if (!can_direct_reclaim) goto nopage; - /* Make sure we know about allocations which stall for too long */ - if (time_after(jiffies, alloc_start + stall_timeout)) { - warn_alloc(gfp_mask & ~__GFP_NOWARN, ac->nodemask, - "page allocation stalls for %ums, order:%u", - jiffies_to_msecs(jiffies-alloc_start), order); - stall_timeout += 10 * HZ; - } - /* Avoid recursion of direct reclaim */ if (current->flags & PF_MEMALLOC) goto nopage; -- GitLab From d2afd22aff69bc448b1defc06fcd311479f336f5 Mon Sep 17 00:00:00 2001 From: Harry Pan Date: Thu, 29 Nov 2018 00:40:41 +0800 Subject: [PATCH 2062/2269] usb: quirk: add no-LPM quirk on SanDisk Ultra Flair device commit 2f2dde6ba89b1ef1fe23c1138131b315d9aa4019 upstream. Some lower volume SanDisk Ultra Flair in 16GB, which the VID:PID is in 0781:5591, will aggressively request LPM of U1/U2 during runtime, when using this thumb drive as the OS installation key we found the device will generate failure during U1 exit path making it dropped from the USB bus, this causes a corrupted installation in system at the end. i.e., [ 166.918296] hub 2-0:1.0: state 7 ports 7 chg 0000 evt 0004 [ 166.918327] usb usb2-port2: link state change [ 166.918337] usb usb2-port2: do warm reset [ 166.970039] usb usb2-port2: not warm reset yet, waiting 50ms [ 167.022040] usb usb2-port2: not warm reset yet, waiting 200ms [ 167.276043] usb usb2-port2: status 02c0, change 0041, 5.0 Gb/s [ 167.276050] usb 2-2: USB disconnect, device number 2 [ 167.276058] usb 2-2: unregistering device [ 167.276060] usb 2-2: unregistering interface 2-2:1.0 [ 167.276170] xhci_hcd 0000:00:15.0: shutdown urb ffffa3c7cc695cc0 ep1in-bulk [ 167.284055] sd 0:0:0:0: [sda] tag#0 FAILED Result: hostbyte=DID_NO_CONNECT driverbyte=DRIVER_OK [ 167.284064] sd 0:0:0:0: [sda] tag#0 CDB: Read(10) 28 00 00 33 04 90 00 01 00 00 ... Analyzed the USB trace in the link layer we realized it is because of the 6-ms timer of tRecoveryConfigurationTimeout which documented on the USB 3.2 Revision 1.0, the section 7.5.10.4.2 of "Exit from Recovery.Configuration"; device initiates U1 exit -> Recovery.Active -> Recovery.Configuration, then the host timer timeout makes the link transits to eSS.Inactive -> Rx.Detect follows by a Warm Reset. Interestingly, the other higher volume of SanDisk Ultra Flair sharing the same VID:PID, such as 64GB, would not request LPM during runtime, it sticks at U0 always, thus disabling LPM does not affect those thumb drives at all. The same odd occures in SanDisk Ultra Fit 16GB, VID:PID in 0781:5583. Signed-off-by: Harry Pan Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 808437c5ec49d..cf378b1ed3739 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -188,6 +188,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* Midiman M-Audio Keystation 88es */ { USB_DEVICE(0x0763, 0x0192), .driver_info = USB_QUIRK_RESET_RESUME }, + /* SanDisk Ultra Fit and Ultra Flair */ + { USB_DEVICE(0x0781, 0x5583), .driver_info = USB_QUIRK_NO_LPM }, + { USB_DEVICE(0x0781, 0x5591), .driver_info = USB_QUIRK_NO_LPM }, + /* M-Systems Flash Disk Pioneers */ { USB_DEVICE(0x08ec, 0x1000), .driver_info = USB_QUIRK_RESET_RESUME }, -- GitLab From 810487aa9d483262044c41fc1973c7abd95e1d86 Mon Sep 17 00:00:00 2001 From: Alexander Theissen Date: Tue, 4 Dec 2018 23:43:35 +0100 Subject: [PATCH 2063/2269] usb: appledisplay: Add 27" Apple Cinema Display commit d7859905301880ad3e16272399d26900af3ac496 upstream. Add another Apple Cinema Display to the list of supported displays. Signed-off-by: Alexander Theissen Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/appledisplay.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/misc/appledisplay.c b/drivers/usb/misc/appledisplay.c index 288fe3e69d52d..03be7c75c5be7 100644 --- a/drivers/usb/misc/appledisplay.c +++ b/drivers/usb/misc/appledisplay.c @@ -64,6 +64,7 @@ static const struct usb_device_id appledisplay_table[] = { { APPLEDISPLAY_DEVICE(0x921c) }, { APPLEDISPLAY_DEVICE(0x921d) }, { APPLEDISPLAY_DEVICE(0x9222) }, + { APPLEDISPLAY_DEVICE(0x9226) }, { APPLEDISPLAY_DEVICE(0x9236) }, /* Terminating entry */ -- GitLab From 7b6e85da8d94948201abb8d576d485892a6a878f Mon Sep 17 00:00:00 2001 From: Mathias Payer Date: Wed, 5 Dec 2018 21:19:59 +0100 Subject: [PATCH 2064/2269] USB: check usb_get_extra_descriptor for proper size commit 704620afc70cf47abb9d6a1a57f3825d2bca49cf upstream. When reading an extra descriptor, we need to properly check the minimum and maximum size allowed, to prevent from invalid data being sent by a device. Reported-by: Hui Peng Reported-by: Mathias Payer Co-developed-by: Linus Torvalds Signed-off-by: Hui Peng Signed-off-by: Mathias Payer Signed-off-by: Linus Torvalds Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 2 +- drivers/usb/core/usb.c | 6 +++--- drivers/usb/host/hwa-hc.c | 2 +- include/linux/usb.h | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 638dc6f66d70d..a073cb5be0135 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2231,7 +2231,7 @@ static int usb_enumerate_device_otg(struct usb_device *udev) /* descriptor may appear anywhere in config */ err = __usb_get_extra_descriptor(udev->rawdescriptors[0], le16_to_cpu(udev->config[0].desc.wTotalLength), - USB_DT_OTG, (void **) &desc); + USB_DT_OTG, (void **) &desc, sizeof(*desc)); if (err || !(desc->bmAttributes & USB_OTG_HNP)) return 0; diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index f8b50eaf6d1ee..7a4e3da549fef 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -833,14 +833,14 @@ EXPORT_SYMBOL_GPL(usb_get_current_frame_number); */ int __usb_get_extra_descriptor(char *buffer, unsigned size, - unsigned char type, void **ptr) + unsigned char type, void **ptr, size_t minsize) { struct usb_descriptor_header *header; while (size >= sizeof(struct usb_descriptor_header)) { header = (struct usb_descriptor_header *)buffer; - if (header->bLength < 2) { + if (header->bLength < 2 || header->bLength > size) { printk(KERN_ERR "%s: bogus descriptor, type %d length %d\n", usbcore_name, @@ -849,7 +849,7 @@ int __usb_get_extra_descriptor(char *buffer, unsigned size, return -1; } - if (header->bDescriptorType == type) { + if (header->bDescriptorType == type && header->bLength >= minsize) { *ptr = header; return 0; } diff --git a/drivers/usb/host/hwa-hc.c b/drivers/usb/host/hwa-hc.c index da3b18038d231..216069c396a0c 100644 --- a/drivers/usb/host/hwa-hc.c +++ b/drivers/usb/host/hwa-hc.c @@ -654,7 +654,7 @@ static int hwahc_security_create(struct hwahc *hwahc) top = itr + itr_size; result = __usb_get_extra_descriptor(usb_dev->rawdescriptors[index], le16_to_cpu(usb_dev->actconfig->desc.wTotalLength), - USB_DT_SECURITY, (void **) &secd); + USB_DT_SECURITY, (void **) &secd, sizeof(*secd)); if (result == -1) { dev_warn(dev, "BUG? WUSB host has no security descriptors\n"); return 0; diff --git a/include/linux/usb.h b/include/linux/usb.h index 4192a1755ccba..8c7ba40cf021e 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -407,11 +407,11 @@ struct usb_host_bos { }; int __usb_get_extra_descriptor(char *buffer, unsigned size, - unsigned char type, void **ptr); + unsigned char type, void **ptr, size_t min); #define usb_get_extra_descriptor(ifpoint, type, ptr) \ __usb_get_extra_descriptor((ifpoint)->extra, \ (ifpoint)->extralen, \ - type, (void **)ptr) + type, (void **)ptr, sizeof(**(ptr))) /* ----------------------------------------------------------------------- */ -- GitLab From 19f74e45746253cafb8cb1e773041e7cadbac622 Mon Sep 17 00:00:00 2001 From: Hui Peng Date: Mon, 3 Dec 2018 16:09:34 +0100 Subject: [PATCH 2065/2269] ALSA: usb-audio: Fix UAF decrement if card has no live interfaces in card.c commit 5f8cf712582617d523120df67d392059eaf2fc4b upstream. If a USB sound card reports 0 interfaces, an error condition is triggered and the function usb_audio_probe errors out. In the error path, there was a use-after-free vulnerability where the memory object of the card was first freed, followed by a decrement of the number of active chips. Moving the decrement above the atomic_dec fixes the UAF. [ The original problem was introduced in 3.1 kernel, while it was developed in a different form. The Fixes tag below indicates the original commit but it doesn't mean that the patch is applicable cleanly. -- tiwai ] Fixes: 362e4e49abe5 ("ALSA: usb-audio - clear chip->probing on error exit") Reported-by: Hui Peng Reported-by: Mathias Payer Signed-off-by: Hui Peng Signed-off-by: Mathias Payer Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/card.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/usb/card.c b/sound/usb/card.c index 23d1d23aefec3..4169c71f8a32e 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -644,9 +644,12 @@ static int usb_audio_probe(struct usb_interface *intf, __error: if (chip) { + /* chip->active is inside the chip->card object, + * decrement before memory is possibly returned. + */ + atomic_dec(&chip->active); if (!chip->num_interfaces) snd_card_free(chip->card); - atomic_dec(&chip->active); } mutex_unlock(®ister_mutex); return err; -- GitLab From 0618c71c73caa194f4d9ac2d42551ad6df047103 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 29 Nov 2018 08:57:37 +0000 Subject: [PATCH 2066/2269] ALSA: hda: Add support for AMD Stoney Ridge commit 3deef52ce10514ccdebba8e8ab85f9cebd0eb3f7 upstream. It's similar to other AMD audio devices, it also supports D3, which can save some power drain. Signed-off-by: Kai-Heng Feng Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 4e38905bc47db..d8e80b6f5a6b2 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2513,6 +2513,10 @@ static const struct pci_device_id azx_ids[] = { /* AMD Hudson */ { PCI_DEVICE(0x1022, 0x780d), .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB }, + /* AMD Stoney */ + { PCI_DEVICE(0x1022, 0x157a), + .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB | + AZX_DCAPS_PM_RUNTIME }, /* AMD Raven */ { PCI_DEVICE(0x1022, 0x15e3), .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB | -- GitLab From 108ab9eafbbeffa17431cb1acd7a930d23bfd197 Mon Sep 17 00:00:00 2001 From: Chanho Min Date: Mon, 26 Nov 2018 14:36:37 +0900 Subject: [PATCH 2067/2269] ALSA: pcm: Fix starvation on down_write_nonblock() commit b888a5f713e4d17faaaff24316585a4eb07f35b7 upstream. Commit 67ec1072b053 ("ALSA: pcm: Fix rwsem deadlock for non-atomic PCM stream") fixes deadlock for non-atomic PCM stream. But, This patch causes antother stuck. If writer is RT thread and reader is a normal thread, the reader thread will be difficult to get scheduled. It may not give chance to release readlocks and writer gets stuck for a long time if they are pinned to single cpu. The deadlock described in the previous commit is because the linux rwsem queues like a FIFO. So, we might need non-FIFO writelock, not non-block one. My suggestion is that the writer gives reader a chance to be scheduled by using the minimum msleep() instaed of spinning without blocking by writer. Also, The *_nonblock may be changed to *_nonfifo appropriately to this concept. In terms of performance, when trylock is failed, this minimum periodic msleep will have the same performance as the tick-based schedule()/wake_up_q(). [ Although this has a fairly high performance penalty, the relevant code path became already rare due to the previous commit ("ALSA: pcm: Call snd_pcm_unlink() conditionally at closing"). That is, now this unconditional msleep appears only when using linked streams, and this must be a rare case. So we accept this as a quick workaround until finding a more suitable one -- tiwai ] Fixes: 67ec1072b053 ("ALSA: pcm: Fix rwsem deadlock for non-atomic PCM stream") Suggested-by: Wonmin Jung Signed-off-by: Chanho Min Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/pcm_native.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index ab3bf36786b60..30784bd1c5af7 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "pcm_local.h" @@ -91,12 +92,12 @@ static DECLARE_RWSEM(snd_pcm_link_rwsem); * and this may lead to a deadlock when the code path takes read sem * twice (e.g. one in snd_pcm_action_nonatomic() and another in * snd_pcm_stream_lock()). As a (suboptimal) workaround, let writer to - * spin until it gets the lock. + * sleep until all the readers are completed without blocking by writer. */ -static inline void down_write_nonblock(struct rw_semaphore *lock) +static inline void down_write_nonfifo(struct rw_semaphore *lock) { while (!down_write_trylock(lock)) - cond_resched(); + msleep(1); } /** @@ -1935,7 +1936,7 @@ static int snd_pcm_link(struct snd_pcm_substream *substream, int fd) res = -ENOMEM; goto _nolock; } - down_write_nonblock(&snd_pcm_link_rwsem); + down_write_nonfifo(&snd_pcm_link_rwsem); write_lock_irq(&snd_pcm_link_rwlock); if (substream->runtime->status->state == SNDRV_PCM_STATE_OPEN || substream->runtime->status->state != substream1->runtime->status->state || @@ -1982,7 +1983,7 @@ static int snd_pcm_unlink(struct snd_pcm_substream *substream) struct snd_pcm_substream *s; int res = 0; - down_write_nonblock(&snd_pcm_link_rwsem); + down_write_nonfifo(&snd_pcm_link_rwsem); write_lock_irq(&snd_pcm_link_rwlock); if (!snd_pcm_stream_linked(substream)) { res = -EALREADY; -- GitLab From cabb9539dc1741b2a5bf6077a4aac8156f07a1c8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 29 Nov 2018 08:02:49 +0100 Subject: [PATCH 2068/2269] ALSA: pcm: Call snd_pcm_unlink() conditionally at closing commit b51abed8355e5556886623b2772fa6b7598d2282 upstream. Currently the PCM core calls snd_pcm_unlink() always unconditionally at closing a stream. However, since snd_pcm_unlink() invokes the global rwsem down, the lock can be easily contended. More badly, when a thread runs in a high priority RT-FIFO, it may stall at spinning. Basically the call of snd_pcm_unlink() is required only for the linked streams that are already rare occasion. For normal use cases, this code path is fairly superfluous. As an optimization (and also as a workaround for the RT problem above in normal situations without linked streams), this patch adds a check before calling snd_pcm_unlink() and calls it only when needed. Reported-by: Chanho Min Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/pcm_native.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 30784bd1c5af7..966ac384c3f4c 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -2338,7 +2338,8 @@ int snd_pcm_hw_constraints_complete(struct snd_pcm_substream *substream) static void pcm_release_private(struct snd_pcm_substream *substream) { - snd_pcm_unlink(substream); + if (snd_pcm_stream_linked(substream)) + snd_pcm_unlink(substream); } void snd_pcm_release_substream(struct snd_pcm_substream *substream) -- GitLab From 7852f54f08a0a7222235561fc57ccd83f1c358d4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 29 Nov 2018 12:05:19 +0100 Subject: [PATCH 2069/2269] ALSA: pcm: Fix interval evaluation with openmin/max commit 5363857b916c1f48027e9b96ee8be8376bf20811 upstream. As addressed in alsa-lib (commit b420056604f0), we need to fix the case where the evaluation of PCM interval "(x x+1]" leading to -EINVAL. After applying rules, such an interval may be translated as "(x x+1)". Fixes: ff2d6acdf6f1 ("ALSA: pcm: Fix snd_interval_refine first/last with open min/max") Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- include/sound/pcm_params.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/sound/pcm_params.h b/include/sound/pcm_params.h index c704357775fc4..2af7bb3ee57d0 100644 --- a/include/sound/pcm_params.h +++ b/include/sound/pcm_params.h @@ -247,11 +247,13 @@ static inline int snd_interval_empty(const struct snd_interval *i) static inline int snd_interval_single(const struct snd_interval *i) { return (i->min == i->max || - (i->min + 1 == i->max && i->openmax)); + (i->min + 1 == i->max && (i->openmin || i->openmax))); } static inline int snd_interval_value(const struct snd_interval *i) { + if (i->openmin && !i->openmax) + return i->max; return i->min; } -- GitLab From 01a02e66228b69b497136168716cb1ba2b5ebd73 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 3 Dec 2018 10:44:15 +0100 Subject: [PATCH 2070/2269] ALSA: hda/realtek - Fix speaker output regression on Thinkpad T570 commit 54947cd64c1b8290f64bb2958e343c07270e3a58 upstream. We've got a regression report for some Thinkpad models (at least T570s) which shows the too low speaker output volume. The bisection leaded to the commit 61fcf8ece9b6 ("ALSA: hda/realtek - Enable Thinkpad Dock device for ALC298 platform"), and it's basically adding the two pin configurations for the dock, and looks harmless. The real culprit seems, though, that the DAC assignment for the speaker pin is implicitly assumed on these devices, i.e. pin NID 0x14 to be coupled with DAC NID 0x03. When more pins are configured by the commit above, the auto-parser changes the DAC assignment, and this resulted in the regression. As a workaround, just provide the fixed pin / DAC mapping table for this Thinkpad fixup function. It's no generic solution, but the problem itself is pretty much device-specific, so must be good enough. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1554304 Fixes: 61fcf8ece9b6 ("ALSA: hda/realtek - Enable Thinkpad Dock device for ALC298 platform") Cc: Reported-and-tested-by: Jeremy Cline Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 66b0a124beaea..f6136f041a810 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4863,9 +4863,18 @@ static void alc_fixup_tpt470_dock(struct hda_codec *codec, { 0x19, 0x21a11010 }, /* dock mic */ { } }; + /* Assure the speaker pin to be coupled with DAC NID 0x03; otherwise + * the speaker output becomes too low by some reason on Thinkpads with + * ALC298 codec + */ + static hda_nid_t preferred_pairs[] = { + 0x14, 0x03, 0x17, 0x02, 0x21, 0x02, + 0 + }; struct alc_spec *spec = codec->spec; if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->gen.preferred_dacs = preferred_pairs; spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP; snd_hda_apply_pincfgs(codec, pincfgs); } else if (action == HDA_FIXUP_ACT_INIT) { -- GitLab From 73f6d525be2cded3bd3dd3bcfcfffec548a79e94 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Wed, 26 Sep 2018 18:48:29 +0200 Subject: [PATCH 2071/2269] virtio/s390: avoid race on vcdev->config commit 2448a299ec416a80f699940a86f4a6d9a4f643b1 upstream. Currently we have a race on vcdev->config in virtio_ccw_get_config() and in virtio_ccw_set_config(). This normally does not cause problems, as these are usually infrequent operations. However, for some devices writing to/reading from the config space can be triggered through sysfs attributes. For these, userspace can force the race by increasing the frequency. Signed-off-by: Halil Pasic Cc: stable@vger.kernel.org Message-Id: <20180925121309.58524-2-pasic@linux.ibm.com> Signed-off-by: Cornelia Huck Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/s390/virtio/virtio_ccw.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index b18fe2014cf21..3a1b553e76538 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -831,6 +831,7 @@ static void virtio_ccw_get_config(struct virtio_device *vdev, int ret; struct ccw1 *ccw; void *config_area; + unsigned long flags; ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL); if (!ccw) @@ -849,11 +850,13 @@ static void virtio_ccw_get_config(struct virtio_device *vdev, if (ret) goto out_free; + spin_lock_irqsave(&vcdev->lock, flags); memcpy(vcdev->config, config_area, offset + len); - if (buf) - memcpy(buf, &vcdev->config[offset], len); if (vcdev->config_ready < offset + len) vcdev->config_ready = offset + len; + spin_unlock_irqrestore(&vcdev->lock, flags); + if (buf) + memcpy(buf, config_area + offset, len); out_free: kfree(config_area); @@ -867,6 +870,7 @@ static void virtio_ccw_set_config(struct virtio_device *vdev, struct virtio_ccw_device *vcdev = to_vc_device(vdev); struct ccw1 *ccw; void *config_area; + unsigned long flags; ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL); if (!ccw) @@ -879,9 +883,11 @@ static void virtio_ccw_set_config(struct virtio_device *vdev, /* Make sure we don't overwrite fields. */ if (vcdev->config_ready < offset) virtio_ccw_get_config(vdev, 0, NULL, offset); + spin_lock_irqsave(&vcdev->lock, flags); memcpy(&vcdev->config[offset], buf, len); /* Write the config area to the host. */ memcpy(config_area, vcdev->config, sizeof(vcdev->config)); + spin_unlock_irqrestore(&vcdev->lock, flags); ccw->cmd_code = CCW_CMD_WRITE_CONF; ccw->flags = 0; ccw->count = offset + len; -- GitLab From 52b993b4c4c2a88b62601b0cc0662a564e1d65dc Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Wed, 26 Sep 2018 18:48:30 +0200 Subject: [PATCH 2072/2269] virtio/s390: fix race in ccw_io_helper() commit 78b1a52e05c9db11d293342e8d6d8a230a04b4e7 upstream. While ccw_io_helper() seems like intended to be exclusive in a sense that it is supposed to facilitate I/O for at most one thread at any given time, there is actually nothing ensuring that threads won't pile up at vcdev->wait_q. If they do, all threads get woken up and see the status that belongs to some other request than their own. This can lead to bugs. For an example see: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1788432 This race normally does not cause any problems. The operations provided by struct virtio_config_ops are usually invoked in a well defined sequence, normally don't fail, and are normally used quite infrequent too. Yet, if some of the these operations are directly triggered via sysfs attributes, like in the case described by the referenced bug, userspace is given an opportunity to force races by increasing the frequency of the given operations. Let us fix the problem by ensuring, that for each device, we finish processing the previous request before starting with a new one. Signed-off-by: Halil Pasic Reported-by: Colin Ian King Cc: stable@vger.kernel.org Message-Id: <20180925121309.58524-3-pasic@linux.ibm.com> Signed-off-by: Cornelia Huck Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/s390/virtio/virtio_ccw.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 3a1b553e76538..0847d05e138b3 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -59,6 +59,7 @@ struct virtio_ccw_device { unsigned int revision; /* Transport revision */ wait_queue_head_t wait_q; spinlock_t lock; + struct mutex io_lock; /* Serializes I/O requests */ struct list_head virtqueues; unsigned long indicators; unsigned long indicators2; @@ -299,6 +300,7 @@ static int ccw_io_helper(struct virtio_ccw_device *vcdev, unsigned long flags; int flag = intparm & VIRTIO_CCW_INTPARM_MASK; + mutex_lock(&vcdev->io_lock); do { spin_lock_irqsave(get_ccwdev_lock(vcdev->cdev), flags); ret = ccw_device_start(vcdev->cdev, ccw, intparm, 0, 0); @@ -311,7 +313,9 @@ static int ccw_io_helper(struct virtio_ccw_device *vcdev, cpu_relax(); } while (ret == -EBUSY); wait_event(vcdev->wait_q, doing_io(vcdev, flag) == 0); - return ret ? ret : vcdev->err; + ret = ret ? ret : vcdev->err; + mutex_unlock(&vcdev->io_lock); + return ret; } static void virtio_ccw_drop_indicator(struct virtio_ccw_device *vcdev, @@ -1256,6 +1260,7 @@ static int virtio_ccw_online(struct ccw_device *cdev) init_waitqueue_head(&vcdev->wait_q); INIT_LIST_HEAD(&vcdev->virtqueues); spin_lock_init(&vcdev->lock); + mutex_init(&vcdev->io_lock); spin_lock_irqsave(get_ccwdev_lock(cdev), flags); dev_set_drvdata(&cdev->dev, vcdev); -- GitLab From f15c072d6576c5e2b693c22e39ccc9103c952078 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 5 Nov 2018 10:35:47 +0000 Subject: [PATCH 2073/2269] vhost/vsock: fix use-after-free in network stack callers commit 834e772c8db0c6a275d75315d90aba4ebbb1e249 upstream. If the network stack calls .send_pkt()/.cancel_pkt() during .release(), a struct vhost_vsock use-after-free is possible. This occurs because .release() does not wait for other CPUs to stop using struct vhost_vsock. Switch to an RCU-enabled hashtable (indexed by guest CID) so that .release() can wait for other CPUs by calling synchronize_rcu(). This also eliminates vhost_vsock_lock acquisition in the data path so it could have a positive effect on performance. This is CVE-2018-14625 "kernel: use-after-free Read in vhost_transport_send_pkt". Cc: stable@vger.kernel.org Reported-and-tested-by: syzbot+bd391451452fb0b93039@syzkaller.appspotmail.com Reported-by: syzbot+e3e074963495f92a89ed@syzkaller.appspotmail.com Reported-by: syzbot+d5a0a170c5069658b141@syzkaller.appspotmail.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vsock.c | 57 +++++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index c9de9c41aa976..b044a08008050 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "vhost.h" @@ -27,14 +28,14 @@ enum { /* Used to track all the vhost_vsock instances on the system. */ static DEFINE_SPINLOCK(vhost_vsock_lock); -static LIST_HEAD(vhost_vsock_list); +static DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8); struct vhost_vsock { struct vhost_dev dev; struct vhost_virtqueue vqs[2]; - /* Link to global vhost_vsock_list, protected by vhost_vsock_lock */ - struct list_head list; + /* Link to global vhost_vsock_hash, writes use vhost_vsock_lock */ + struct hlist_node hash; struct vhost_work send_pkt_work; spinlock_t send_pkt_list_lock; @@ -50,11 +51,14 @@ static u32 vhost_transport_get_local_cid(void) return VHOST_VSOCK_DEFAULT_HOST_CID; } -static struct vhost_vsock *__vhost_vsock_get(u32 guest_cid) +/* Callers that dereference the return value must hold vhost_vsock_lock or the + * RCU read lock. + */ +static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) { struct vhost_vsock *vsock; - list_for_each_entry(vsock, &vhost_vsock_list, list) { + hash_for_each_possible_rcu(vhost_vsock_hash, vsock, hash, guest_cid) { u32 other_cid = vsock->guest_cid; /* Skip instances that have no CID yet */ @@ -69,17 +73,6 @@ static struct vhost_vsock *__vhost_vsock_get(u32 guest_cid) return NULL; } -static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) -{ - struct vhost_vsock *vsock; - - spin_lock_bh(&vhost_vsock_lock); - vsock = __vhost_vsock_get(guest_cid); - spin_unlock_bh(&vhost_vsock_lock); - - return vsock; -} - static void vhost_transport_do_send_pkt(struct vhost_vsock *vsock, struct vhost_virtqueue *vq) @@ -210,9 +203,12 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt) struct vhost_vsock *vsock; int len = pkt->len; + rcu_read_lock(); + /* Find the vhost_vsock according to guest context id */ vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid)); if (!vsock) { + rcu_read_unlock(); virtio_transport_free_pkt(pkt); return -ENODEV; } @@ -225,6 +221,8 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt) spin_unlock_bh(&vsock->send_pkt_list_lock); vhost_work_queue(&vsock->dev, &vsock->send_pkt_work); + + rcu_read_unlock(); return len; } @@ -234,12 +232,15 @@ vhost_transport_cancel_pkt(struct vsock_sock *vsk) struct vhost_vsock *vsock; struct virtio_vsock_pkt *pkt, *n; int cnt = 0; + int ret = -ENODEV; LIST_HEAD(freeme); + rcu_read_lock(); + /* Find the vhost_vsock according to guest context id */ vsock = vhost_vsock_get(vsk->remote_addr.svm_cid); if (!vsock) - return -ENODEV; + goto out; spin_lock_bh(&vsock->send_pkt_list_lock); list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) { @@ -265,7 +266,10 @@ vhost_transport_cancel_pkt(struct vsock_sock *vsk) vhost_poll_queue(&tx_vq->poll); } - return 0; + ret = 0; +out: + rcu_read_unlock(); + return ret; } static struct virtio_vsock_pkt * @@ -531,10 +535,6 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file) spin_lock_init(&vsock->send_pkt_list_lock); INIT_LIST_HEAD(&vsock->send_pkt_list); vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work); - - spin_lock_bh(&vhost_vsock_lock); - list_add_tail(&vsock->list, &vhost_vsock_list); - spin_unlock_bh(&vhost_vsock_lock); return 0; out: @@ -575,9 +575,13 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file) struct vhost_vsock *vsock = file->private_data; spin_lock_bh(&vhost_vsock_lock); - list_del(&vsock->list); + if (vsock->guest_cid) + hash_del_rcu(&vsock->hash); spin_unlock_bh(&vhost_vsock_lock); + /* Wait for other CPUs to finish using vsock */ + synchronize_rcu(); + /* Iterating over all connections for all CIDs to find orphans is * inefficient. Room for improvement here. */ vsock_for_each_connected_socket(vhost_vsock_reset_orphans); @@ -618,12 +622,17 @@ static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid) /* Refuse if CID is already in use */ spin_lock_bh(&vhost_vsock_lock); - other = __vhost_vsock_get(guest_cid); + other = vhost_vsock_get(guest_cid); if (other && other != vsock) { spin_unlock_bh(&vhost_vsock_lock); return -EADDRINUSE; } + + if (vsock->guest_cid) + hash_del_rcu(&vsock->hash); + vsock->guest_cid = guest_cid; + hash_add_rcu(vhost_vsock_hash, &vsock->hash, guest_cid); spin_unlock_bh(&vhost_vsock_lock); return 0; -- GitLab From e3dccb527cfc97c4027824b6104635714ae14f47 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 30 Nov 2018 15:39:57 -0500 Subject: [PATCH 2074/2269] SUNRPC: Fix leak of krb5p encode pages commit 8dae5398ab1ac107b1517e8195ed043d5f422bd0 upstream. call_encode can be invoked more than once per RPC call. Ensure that each call to gss_wrap_req_priv does not overwrite pointers to previously allocated memory. Signed-off-by: Chuck Lever Cc: stable@kernel.org Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/auth_gss/auth_gss.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 9463af4b32e8d..1281b967dbf96 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1736,6 +1736,7 @@ priv_release_snd_buf(struct rpc_rqst *rqstp) for (i=0; i < rqstp->rq_enc_pages_num; i++) __free_page(rqstp->rq_enc_pages[i]); kfree(rqstp->rq_enc_pages); + rqstp->rq_release_snd_buf = NULL; } static int @@ -1744,6 +1745,9 @@ alloc_enc_pages(struct rpc_rqst *rqstp) struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; int first, last, i; + if (rqstp->rq_release_snd_buf) + rqstp->rq_release_snd_buf(rqstp); + if (snd_buf->page_len == 0) { rqstp->rq_enc_pages_num = 0; return 0; -- GitLab From bc3d506a116f1a00dba4a380ff511007c71e4db9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 5 Dec 2018 18:33:59 +0200 Subject: [PATCH 2075/2269] dmaengine: dw: Fix FIFO size for Intel Merrifield commit ffe843b18211301ad25893eba09f402c19d12304 upstream. Intel Merrifield has a reduced size of FIFO used in iDMA 32-bit controller, i.e. 512 bytes instead of 1024. Fix this by partitioning it as 64 bytes per channel. Note, in the future we might switch to 'fifo-size' property instead of hard coded value. Fixes: 199244d69458 ("dmaengine: dw: add support of iDMA 32-bit hardware") Signed-off-by: Andy Shevchenko Cc: stable@vger.kernel.org Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/dw/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index f43e6dafe446d..0f389e008ce64 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -1064,12 +1064,12 @@ static void dwc_issue_pending(struct dma_chan *chan) /* * Program FIFO size of channels. * - * By default full FIFO (1024 bytes) is assigned to channel 0. Here we + * By default full FIFO (512 bytes) is assigned to channel 0. Here we * slice FIFO on equal parts between channels. */ static void idma32_fifo_partition(struct dw_dma *dw) { - u64 value = IDMA32C_FP_PSIZE_CH0(128) | IDMA32C_FP_PSIZE_CH1(128) | + u64 value = IDMA32C_FP_PSIZE_CH0(64) | IDMA32C_FP_PSIZE_CH1(64) | IDMA32C_FP_UPDATE; u64 fifo_partition = 0; @@ -1082,7 +1082,7 @@ static void idma32_fifo_partition(struct dw_dma *dw) /* Fill FIFO_PARTITION high bits (Channels 2..3, 6..7) */ fifo_partition |= value << 32; - /* Program FIFO Partition registers - 128 bytes for each channel */ + /* Program FIFO Partition registers - 64 bytes per channel */ idma32_writeq(dw, FIFO_PARTITION1, fifo_partition); idma32_writeq(dw, FIFO_PARTITION0, fifo_partition); } -- GitLab From e47a6e68c3bbfebb6d6f336d92884df37b722e4b Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Mon, 12 Nov 2018 09:43:22 -0600 Subject: [PATCH 2076/2269] dmaengine: cppi41: delete channel from pending list when stop channel commit 59861547ec9a9736e7882f6fb0c096a720ff811a upstream. The driver defines three states for a cppi channel. - idle: .chan_busy == 0 && not in .pending list - pending: .chan_busy == 0 && in .pending list - busy: .chan_busy == 1 && not in .pending list There are cases in which the cppi channel could be in the pending state when cppi41_dma_issue_pending() is called after cppi41_runtime_suspend() is called. cppi41_stop_chan() has a bug for these cases to set channels to idle state. It only checks the .chan_busy flag, but not the .pending list, then later when cppi41_runtime_resume() is called the channels in .pending list will be transitioned to busy state. Removing channels from the .pending list solves the problem. Fixes: 975faaeb9985 ("dma: cppi41: start tear down only if channel is busy") Cc: stable@vger.kernel.org # v3.15+ Signed-off-by: Bin Liu Reviewed-by: Peter Ujfalusi Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/cppi41.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c index f7e965f632747..ddd4a3932127e 100644 --- a/drivers/dma/cppi41.c +++ b/drivers/dma/cppi41.c @@ -723,8 +723,22 @@ static int cppi41_stop_chan(struct dma_chan *chan) desc_phys = lower_32_bits(c->desc_phys); desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc); - if (!cdd->chan_busy[desc_num]) + if (!cdd->chan_busy[desc_num]) { + struct cppi41_channel *cc, *_ct; + + /* + * channels might still be in the pendling list if + * cppi41_dma_issue_pending() is called after + * cppi41_runtime_suspend() is called + */ + list_for_each_entry_safe(cc, _ct, &cdd->pending, node) { + if (cc != c) + continue; + list_del(&cc->node); + break; + } return 0; + } ret = cppi41_tear_down_chan(c); if (ret) -- GitLab From f9e0bc710347fadad55910846ee36f0681d0dca8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 30 Oct 2018 22:12:56 +0100 Subject: [PATCH 2077/2269] ARM: 8806/1: kprobes: Fix false positive with FORTIFY_SOURCE commit e46daee53bb50bde38805f1823a182979724c229 upstream. The arm compiler internally interprets an inline assembly label as an unsigned long value, not a pointer. As a result, under CONFIG_FORTIFY_SOURCE, the address of a label has a size of 4 bytes, which was tripping the runtime checks. Instead, we can just cast the label (as done with the size calculations earlier). Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1639397 Reported-by: William Cohen Fixes: 6974f0c4555e ("include/linux/string.h: add the option of fortified string.h functions") Cc: stable@vger.kernel.org Acked-by: Laura Abbott Acked-by: Masami Hiramatsu Tested-by: William Cohen Signed-off-by: Kees Cook Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/probes/kprobes/opt-arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c index b2aa9b32bff2b..2c118a6ab3587 100644 --- a/arch/arm/probes/kprobes/opt-arm.c +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -247,7 +247,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *or } /* Copy arch-dep-instance from template. */ - memcpy(code, &optprobe_template_entry, + memcpy(code, (unsigned char *)optprobe_template_entry, TMPL_END_IDX * sizeof(kprobe_opcode_t)); /* Adjust buffer according to instruction. */ -- GitLab From 3ff0131284e94ddac80520fa549d55a4294776cb Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Wed, 5 Dec 2018 14:22:38 +0200 Subject: [PATCH 2078/2269] xhci: workaround CSS timeout on AMD SNPS 3.0 xHC commit a7d57abcc8a5bdeb53bbf8e87558e8e0a2c2a29d upstream. Occasionally AMD SNPS 3.0 xHC does not respond to CSS when set, also it does not flag anything on SRE and HCE to point the internal xHC errors on USBSTS register. This stalls the entire system wide suspend and there is no point in stalling just because of xHC CSS is not responding. To work around this problem, if the xHC does not flag anything on SRE and HCE, we can skip the CSS timeout and allow the system to continue the suspend. Once the system resume happens we can internally reset the controller using XHCI_RESET_ON_RESUME quirk Signed-off-by: Shyam Sundar S K Signed-off-by: Sandeep Singh cc: Nehal Shah Cc: Tested-by: Kai-Heng Feng Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 4 ++++ drivers/usb/host/xhci.c | 26 ++++++++++++++++++++++---- drivers/usb/host/xhci.h | 3 +++ 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 4b07b6859b4c7..0fbc549cc55c3 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -144,6 +144,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == 0x43bb)) xhci->quirks |= XHCI_SUSPEND_DELAY; + if (pdev->vendor == PCI_VENDOR_ID_AMD && + (pdev->device == 0x15e0 || pdev->device == 0x15e1)) + xhci->quirks |= XHCI_SNPS_BROKEN_SUSPEND; + if (pdev->vendor == PCI_VENDOR_ID_AMD) xhci->quirks |= XHCI_TRUST_TX_LENGTH; diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index faf0486821942..155b36265f7ac 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -918,6 +918,7 @@ int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup) unsigned int delay = XHCI_MAX_HALT_USEC; struct usb_hcd *hcd = xhci_to_hcd(xhci); u32 command; + u32 res; if (!hcd->state) return 0; @@ -969,11 +970,28 @@ int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup) command = readl(&xhci->op_regs->command); command |= CMD_CSS; writel(command, &xhci->op_regs->command); + xhci->broken_suspend = 0; if (xhci_handshake(&xhci->op_regs->status, STS_SAVE, 0, 10 * 1000)) { - xhci_warn(xhci, "WARN: xHC save state timeout\n"); - spin_unlock_irq(&xhci->lock); - return -ETIMEDOUT; + /* + * AMD SNPS xHC 3.0 occasionally does not clear the + * SSS bit of USBSTS and when driver tries to poll + * to see if the xHC clears BIT(8) which never happens + * and driver assumes that controller is not responding + * and times out. To workaround this, its good to check + * if SRE and HCE bits are not set (as per xhci + * Section 5.4.2) and bypass the timeout. + */ + res = readl(&xhci->op_regs->status); + if ((xhci->quirks & XHCI_SNPS_BROKEN_SUSPEND) && + (((res & STS_SRE) == 0) && + ((res & STS_HCE) == 0))) { + xhci->broken_suspend = 1; + } else { + xhci_warn(xhci, "WARN: xHC save state timeout\n"); + spin_unlock_irq(&xhci->lock); + return -ETIMEDOUT; + } } spin_unlock_irq(&xhci->lock); @@ -1026,7 +1044,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) set_bit(HCD_FLAG_HW_ACCESSIBLE, &xhci->shared_hcd->flags); spin_lock_irq(&xhci->lock); - if (xhci->quirks & XHCI_RESET_ON_RESUME) + if ((xhci->quirks & XHCI_RESET_ON_RESUME) || xhci->broken_suspend) hibernated = true; if (!hibernated) { diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 74ba20556020c..1ccff2d9dee99 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1839,6 +1839,7 @@ struct xhci_hcd { #define XHCI_SUSPEND_DELAY BIT_ULL(30) #define XHCI_INTEL_USB_ROLE_SW BIT_ULL(31) #define XHCI_RESET_PLL_ON_DISCONNECT BIT_ULL(34) +#define XHCI_SNPS_BROKEN_SUSPEND BIT_ULL(35) unsigned int num_active_eps; unsigned int limit_active_eps; @@ -1870,6 +1871,8 @@ struct xhci_hcd { /* platform-specific data -- must come last */ unsigned long priv[0] __aligned(sizeof(s64)); + /* Broken Suspend flag for SNPS Suspend resume issue */ + u8 broken_suspend; }; /* Platform specific overrides to generic XHCI hc_driver ops */ -- GitLab From cacfa255bee4fd7137939411b05a0527475a19ec Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 5 Dec 2018 14:22:39 +0200 Subject: [PATCH 2079/2269] xhci: Prevent U1/U2 link pm states if exit latency is too long commit 0472bf06c6fd33c1a18aaead4c8f91e5a03d8d7b upstream. Don't allow USB3 U1 or U2 if the latency to wake up from the U-state reaches the service interval for a periodic endpoint. This is according to xhci 1.1 specification section 4.23.5.2 extra note: "Software shall ensure that a device is prevented from entering a U-state where its worst case exit latency approaches the ESIT." Allowing too long exit latencies for periodic endpoint confuses xHC internal scheduling, and new devices may fail to enumerate with a "Not enough bandwidth for new device state" error from the host. Cc: Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 155b36265f7ac..930eecd864299 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -4381,6 +4381,14 @@ static u16 xhci_calculate_u1_timeout(struct xhci_hcd *xhci, { unsigned long long timeout_ns; + /* Prevent U1 if service interval is shorter than U1 exit latency */ + if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) { + if (xhci_service_interval_to_ns(desc) <= udev->u1_params.mel) { + dev_dbg(&udev->dev, "Disable U1, ESIT shorter than exit latency\n"); + return USB3_LPM_DISABLED; + } + } + if (xhci->quirks & XHCI_INTEL_HOST) timeout_ns = xhci_calculate_intel_u1_timeout(udev, desc); else @@ -4437,6 +4445,14 @@ static u16 xhci_calculate_u2_timeout(struct xhci_hcd *xhci, { unsigned long long timeout_ns; + /* Prevent U2 if service interval is shorter than U2 exit latency */ + if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) { + if (xhci_service_interval_to_ns(desc) <= udev->u2_params.mel) { + dev_dbg(&udev->dev, "Disable U2, ESIT shorter than exit latency\n"); + return USB3_LPM_DISABLED; + } + } + if (xhci->quirks & XHCI_INTEL_HOST) timeout_ns = xhci_calculate_intel_u2_timeout(udev, desc); else -- GitLab From 38fce19d4d7bc8acfa183ee2918758d279a69c9a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 10 Dec 2018 17:00:50 +0000 Subject: [PATCH 2080/2269] f2fs: fix to do sanity check with block address in main area v2 commit 91291e9998d208370eb8156c760691b873bd7522 upstream. This patch adds f2fs_is_valid_blkaddr() in below functions to do sanity check with block address to avoid pentential panic: - f2fs_grab_read_bio() - __written_first_block() https://bugzilla.kernel.org/show_bug.cgi?id=200465 - Reproduce - POC (poc.c) #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void activity(char *mpoint) { char *xattr; int err; err = asprintf(&xattr, "%s/foo/bar/xattr", mpoint); char buf2[113]; memset(buf2, 0, sizeof(buf2)); listxattr(xattr, buf2, sizeof(buf2)); } int main(int argc, char *argv[]) { activity(argv[1]); return 0; } - kernel message [ 844.718738] F2FS-fs (loop0): Mounted with checkpoint version = 2 [ 846.430929] F2FS-fs (loop0): access invalid blkaddr:1024 [ 846.431058] WARNING: CPU: 1 PID: 1249 at fs/f2fs/checkpoint.c:154 f2fs_is_valid_blkaddr+0x10f/0x160 [ 846.431059] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd input_leds joydev soundcore serio_raw i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 multipath linear qxl ttm crct10dif_pclmul crc32_pclmul drm_kms_helper ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops pcbc drm 8139too aesni_intel 8139cp floppy psmouse mii aes_x86_64 crypto_simd pata_acpi cryptd glue_helper [ 846.431310] CPU: 1 PID: 1249 Comm: a.out Not tainted 4.18.0-rc3+ #1 [ 846.431312] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 846.431315] RIP: 0010:f2fs_is_valid_blkaddr+0x10f/0x160 [ 846.431316] Code: 00 eb ed 31 c0 83 fa 05 75 ae 48 83 ec 08 48 8b 3f 89 f1 48 c7 c2 fc 0b 0f 8b 48 c7 c6 8b d7 09 8b 88 44 24 07 e8 61 8b ff ff <0f> 0b 0f b6 44 24 07 48 83 c4 08 eb 81 4c 8b 47 10 8b 8f 38 04 00 [ 846.431347] RSP: 0018:ffff961c414a7bc0 EFLAGS: 00010282 [ 846.431349] RAX: 0000000000000000 RBX: ffffc5f787b8ea80 RCX: 0000000000000000 [ 846.431350] RDX: 0000000000000000 RSI: ffff89dfffd165d8 RDI: ffff89dfffd165d8 [ 846.431351] RBP: ffff961c414a7c20 R08: 0000000000000001 R09: 0000000000000248 [ 846.431353] R10: 0000000000000000 R11: 0000000000000248 R12: 0000000000000007 [ 846.431369] R13: ffff89dff5492800 R14: ffff89dfae3aa000 R15: ffff89dff4ff88d0 [ 846.431372] FS: 00007f882e2fb700(0000) GS:ffff89dfffd00000(0000) knlGS:0000000000000000 [ 846.431373] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 846.431374] CR2: 0000000001a88008 CR3: 00000001eb572000 CR4: 00000000000006e0 [ 846.431384] Call Trace: [ 846.431426] f2fs_iget+0x6f4/0xe70 [ 846.431430] ? f2fs_find_entry+0x71/0x90 [ 846.431432] f2fs_lookup+0x1aa/0x390 [ 846.431452] __lookup_slow+0x97/0x150 [ 846.431459] lookup_slow+0x35/0x50 [ 846.431462] walk_component+0x1c6/0x470 [ 846.431479] ? memcg_kmem_charge_memcg+0x70/0x90 [ 846.431488] ? page_add_file_rmap+0x13/0x200 [ 846.431491] path_lookupat+0x76/0x230 [ 846.431501] ? __alloc_pages_nodemask+0xfc/0x280 [ 846.431504] filename_lookup+0xb8/0x1a0 [ 846.431534] ? _cond_resched+0x16/0x40 [ 846.431541] ? kmem_cache_alloc+0x160/0x1d0 [ 846.431549] ? path_listxattr+0x41/0xa0 [ 846.431551] path_listxattr+0x41/0xa0 [ 846.431570] do_syscall_64+0x55/0x100 [ 846.431583] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 846.431607] RIP: 0033:0x7f882de1c0d7 [ 846.431607] Code: f0 ff ff 73 01 c3 48 8b 0d be dd 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 c2 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 dd 2b 00 f7 d8 64 89 01 48 [ 846.431639] RSP: 002b:00007ffe8e66c238 EFLAGS: 00000202 ORIG_RAX: 00000000000000c2 [ 846.431641] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f882de1c0d7 [ 846.431642] RDX: 0000000000000071 RSI: 00007ffe8e66c280 RDI: 0000000001a880c0 [ 846.431643] RBP: 00007ffe8e66c300 R08: 0000000001a88010 R09: 0000000000000000 [ 846.431645] R10: 00000000000001ab R11: 0000000000000202 R12: 0000000000400550 [ 846.431646] R13: 00007ffe8e66c400 R14: 0000000000000000 R15: 0000000000000000 [ 846.431648] ---[ end trace abca54df39d14f5c ]--- [ 846.431651] F2FS-fs (loop0): invalid blkaddr: 1024, type: 5, run fsck to fix. [ 846.431762] WARNING: CPU: 1 PID: 1249 at fs/f2fs/f2fs.h:2697 f2fs_iget+0xd17/0xe70 [ 846.431763] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd input_leds joydev soundcore serio_raw i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 multipath linear qxl ttm crct10dif_pclmul crc32_pclmul drm_kms_helper ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops pcbc drm 8139too aesni_intel 8139cp floppy psmouse mii aes_x86_64 crypto_simd pata_acpi cryptd glue_helper [ 846.431797] CPU: 1 PID: 1249 Comm: a.out Tainted: G W 4.18.0-rc3+ #1 [ 846.431798] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 846.431800] RIP: 0010:f2fs_iget+0xd17/0xe70 [ 846.431801] Code: ff ff 48 63 d8 e9 e1 f6 ff ff 48 8b 45 c8 41 b8 05 00 00 00 48 c7 c2 d8 e8 0e 8b 48 c7 c6 1d b0 0a 8b 48 8b 38 e8 f9 b4 00 00 <0f> 0b 48 8b 45 c8 f0 80 48 48 04 e9 d8 f9 ff ff 0f 0b 48 8b 43 18 [ 846.431832] RSP: 0018:ffff961c414a7bd0 EFLAGS: 00010282 [ 846.431834] RAX: 0000000000000000 RBX: ffffc5f787b8ea80 RCX: 0000000000000006 [ 846.431835] RDX: 0000000000000000 RSI: 0000000000000096 RDI: ffff89dfffd165d0 [ 846.431836] RBP: ffff961c414a7c20 R08: 0000000000000000 R09: 0000000000000273 [ 846.431837] R10: 0000000000000000 R11: ffff89dfad50ca60 R12: 0000000000000007 [ 846.431838] R13: ffff89dff5492800 R14: ffff89dfae3aa000 R15: ffff89dff4ff88d0 [ 846.431840] FS: 00007f882e2fb700(0000) GS:ffff89dfffd00000(0000) knlGS:0000000000000000 [ 846.431841] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 846.431842] CR2: 0000000001a88008 CR3: 00000001eb572000 CR4: 00000000000006e0 [ 846.431846] Call Trace: [ 846.431850] ? f2fs_find_entry+0x71/0x90 [ 846.431853] f2fs_lookup+0x1aa/0x390 [ 846.431856] __lookup_slow+0x97/0x150 [ 846.431858] lookup_slow+0x35/0x50 [ 846.431874] walk_component+0x1c6/0x470 [ 846.431878] ? memcg_kmem_charge_memcg+0x70/0x90 [ 846.431880] ? page_add_file_rmap+0x13/0x200 [ 846.431882] path_lookupat+0x76/0x230 [ 846.431884] ? __alloc_pages_nodemask+0xfc/0x280 [ 846.431886] filename_lookup+0xb8/0x1a0 [ 846.431890] ? _cond_resched+0x16/0x40 [ 846.431891] ? kmem_cache_alloc+0x160/0x1d0 [ 846.431894] ? path_listxattr+0x41/0xa0 [ 846.431896] path_listxattr+0x41/0xa0 [ 846.431898] do_syscall_64+0x55/0x100 [ 846.431901] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 846.431902] RIP: 0033:0x7f882de1c0d7 [ 846.431903] Code: f0 ff ff 73 01 c3 48 8b 0d be dd 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 c2 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 dd 2b 00 f7 d8 64 89 01 48 [ 846.431934] RSP: 002b:00007ffe8e66c238 EFLAGS: 00000202 ORIG_RAX: 00000000000000c2 [ 846.431936] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f882de1c0d7 [ 846.431937] RDX: 0000000000000071 RSI: 00007ffe8e66c280 RDI: 0000000001a880c0 [ 846.431939] RBP: 00007ffe8e66c300 R08: 0000000001a88010 R09: 0000000000000000 [ 846.431940] R10: 00000000000001ab R11: 0000000000000202 R12: 0000000000400550 [ 846.431941] R13: 00007ffe8e66c400 R14: 0000000000000000 R15: 0000000000000000 [ 846.431943] ---[ end trace abca54df39d14f5d ]--- [ 846.432033] F2FS-fs (loop0): access invalid blkaddr:1024 [ 846.432051] WARNING: CPU: 1 PID: 1249 at fs/f2fs/checkpoint.c:154 f2fs_is_valid_blkaddr+0x10f/0x160 [ 846.432051] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd input_leds joydev soundcore serio_raw i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 multipath linear qxl ttm crct10dif_pclmul crc32_pclmul drm_kms_helper ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops pcbc drm 8139too aesni_intel 8139cp floppy psmouse mii aes_x86_64 crypto_simd pata_acpi cryptd glue_helper [ 846.432085] CPU: 1 PID: 1249 Comm: a.out Tainted: G W 4.18.0-rc3+ #1 [ 846.432086] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 846.432089] RIP: 0010:f2fs_is_valid_blkaddr+0x10f/0x160 [ 846.432089] Code: 00 eb ed 31 c0 83 fa 05 75 ae 48 83 ec 08 48 8b 3f 89 f1 48 c7 c2 fc 0b 0f 8b 48 c7 c6 8b d7 09 8b 88 44 24 07 e8 61 8b ff ff <0f> 0b 0f b6 44 24 07 48 83 c4 08 eb 81 4c 8b 47 10 8b 8f 38 04 00 [ 846.432120] RSP: 0018:ffff961c414a7900 EFLAGS: 00010286 [ 846.432122] RAX: 0000000000000000 RBX: 0000000000000400 RCX: 0000000000000006 [ 846.432123] RDX: 0000000000000000 RSI: 0000000000000096 RDI: ffff89dfffd165d0 [ 846.432124] RBP: ffff89dff5492800 R08: 0000000000000001 R09: 000000000000029d [ 846.432125] R10: ffff961c414a7820 R11: 000000000000029d R12: 0000000000000400 [ 846.432126] R13: 0000000000000000 R14: ffff89dff4ff88d0 R15: 0000000000000000 [ 846.432128] FS: 00007f882e2fb700(0000) GS:ffff89dfffd00000(0000) knlGS:0000000000000000 [ 846.432130] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 846.432131] CR2: 0000000001a88008 CR3: 00000001eb572000 CR4: 00000000000006e0 [ 846.432135] Call Trace: [ 846.432151] f2fs_wait_on_block_writeback+0x20/0x110 [ 846.432158] f2fs_grab_read_bio+0xbc/0xe0 [ 846.432161] f2fs_submit_page_read+0x21/0x280 [ 846.432163] f2fs_get_read_data_page+0xb7/0x3c0 [ 846.432165] f2fs_get_lock_data_page+0x29/0x1e0 [ 846.432167] f2fs_get_new_data_page+0x148/0x550 [ 846.432170] f2fs_add_regular_entry+0x1d2/0x550 [ 846.432178] ? __switch_to+0x12f/0x460 [ 846.432181] f2fs_add_dentry+0x6a/0xd0 [ 846.432184] f2fs_do_add_link+0xe9/0x140 [ 846.432186] __recover_dot_dentries+0x260/0x280 [ 846.432189] f2fs_lookup+0x343/0x390 [ 846.432193] __lookup_slow+0x97/0x150 [ 846.432195] lookup_slow+0x35/0x50 [ 846.432208] walk_component+0x1c6/0x470 [ 846.432212] ? memcg_kmem_charge_memcg+0x70/0x90 [ 846.432215] ? page_add_file_rmap+0x13/0x200 [ 846.432217] path_lookupat+0x76/0x230 [ 846.432219] ? __alloc_pages_nodemask+0xfc/0x280 [ 846.432221] filename_lookup+0xb8/0x1a0 [ 846.432224] ? _cond_resched+0x16/0x40 [ 846.432226] ? kmem_cache_alloc+0x160/0x1d0 [ 846.432228] ? path_listxattr+0x41/0xa0 [ 846.432230] path_listxattr+0x41/0xa0 [ 846.432233] do_syscall_64+0x55/0x100 [ 846.432235] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 846.432237] RIP: 0033:0x7f882de1c0d7 [ 846.432237] Code: f0 ff ff 73 01 c3 48 8b 0d be dd 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 c2 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 dd 2b 00 f7 d8 64 89 01 48 [ 846.432269] RSP: 002b:00007ffe8e66c238 EFLAGS: 00000202 ORIG_RAX: 00000000000000c2 [ 846.432271] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f882de1c0d7 [ 846.432272] RDX: 0000000000000071 RSI: 00007ffe8e66c280 RDI: 0000000001a880c0 [ 846.432273] RBP: 00007ffe8e66c300 R08: 0000000001a88010 R09: 0000000000000000 [ 846.432274] R10: 00000000000001ab R11: 0000000000000202 R12: 0000000000400550 [ 846.432275] R13: 00007ffe8e66c400 R14: 0000000000000000 R15: 0000000000000000 [ 846.432277] ---[ end trace abca54df39d14f5e ]--- [ 846.432279] F2FS-fs (loop0): invalid blkaddr: 1024, type: 5, run fsck to fix. [ 846.432376] WARNING: CPU: 1 PID: 1249 at fs/f2fs/f2fs.h:2697 f2fs_wait_on_block_writeback+0xb1/0x110 [ 846.432376] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd input_leds joydev soundcore serio_raw i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 multipath linear qxl ttm crct10dif_pclmul crc32_pclmul drm_kms_helper ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops pcbc drm 8139too aesni_intel 8139cp floppy psmouse mii aes_x86_64 crypto_simd pata_acpi cryptd glue_helper [ 846.432410] CPU: 1 PID: 1249 Comm: a.out Tainted: G W 4.18.0-rc3+ #1 [ 846.432411] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 846.432413] RIP: 0010:f2fs_wait_on_block_writeback+0xb1/0x110 [ 846.432414] Code: 66 90 f0 ff 4b 34 74 59 5b 5d c3 48 8b 7d 00 41 b8 05 00 00 00 89 d9 48 c7 c2 d8 e8 0e 8b 48 c7 c6 1d b0 0a 8b e8 df bc fd ff <0f> 0b f0 80 4d 48 04 e9 67 ff ff ff 48 8b 03 48 c1 e8 37 83 e0 07 [ 846.432445] RSP: 0018:ffff961c414a7910 EFLAGS: 00010286 [ 846.432447] RAX: 0000000000000000 RBX: 0000000000000400 RCX: 0000000000000006 [ 846.432448] RDX: 0000000000000000 RSI: 0000000000000092 RDI: ffff89dfffd165d0 [ 846.432449] RBP: ffff89dff5492800 R08: 0000000000000000 R09: 00000000000002d1 [ 846.432450] R10: ffff961c414a7820 R11: ffff89dfad50cf80 R12: 0000000000000400 [ 846.432451] R13: 0000000000000000 R14: ffff89dff4ff88d0 R15: 0000000000000000 [ 846.432453] FS: 00007f882e2fb700(0000) GS:ffff89dfffd00000(0000) knlGS:0000000000000000 [ 846.432454] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 846.432455] CR2: 0000000001a88008 CR3: 00000001eb572000 CR4: 00000000000006e0 [ 846.432459] Call Trace: [ 846.432463] f2fs_grab_read_bio+0xbc/0xe0 [ 846.432464] f2fs_submit_page_read+0x21/0x280 [ 846.432466] f2fs_get_read_data_page+0xb7/0x3c0 [ 846.432468] f2fs_get_lock_data_page+0x29/0x1e0 [ 846.432470] f2fs_get_new_data_page+0x148/0x550 [ 846.432473] f2fs_add_regular_entry+0x1d2/0x550 [ 846.432475] ? __switch_to+0x12f/0x460 [ 846.432477] f2fs_add_dentry+0x6a/0xd0 [ 846.432480] f2fs_do_add_link+0xe9/0x140 [ 846.432483] __recover_dot_dentries+0x260/0x280 [ 846.432485] f2fs_lookup+0x343/0x390 [ 846.432488] __lookup_slow+0x97/0x150 [ 846.432490] lookup_slow+0x35/0x50 [ 846.432505] walk_component+0x1c6/0x470 [ 846.432509] ? memcg_kmem_charge_memcg+0x70/0x90 [ 846.432511] ? page_add_file_rmap+0x13/0x200 [ 846.432513] path_lookupat+0x76/0x230 [ 846.432515] ? __alloc_pages_nodemask+0xfc/0x280 [ 846.432517] filename_lookup+0xb8/0x1a0 [ 846.432520] ? _cond_resched+0x16/0x40 [ 846.432522] ? kmem_cache_alloc+0x160/0x1d0 [ 846.432525] ? path_listxattr+0x41/0xa0 [ 846.432526] path_listxattr+0x41/0xa0 [ 846.432529] do_syscall_64+0x55/0x100 [ 846.432531] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 846.432533] RIP: 0033:0x7f882de1c0d7 [ 846.432533] Code: f0 ff ff 73 01 c3 48 8b 0d be dd 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 c2 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 dd 2b 00 f7 d8 64 89 01 48 [ 846.432565] RSP: 002b:00007ffe8e66c238 EFLAGS: 00000202 ORIG_RAX: 00000000000000c2 [ 846.432567] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f882de1c0d7 [ 846.432568] RDX: 0000000000000071 RSI: 00007ffe8e66c280 RDI: 0000000001a880c0 [ 846.432569] RBP: 00007ffe8e66c300 R08: 0000000001a88010 R09: 0000000000000000 [ 846.432570] R10: 00000000000001ab R11: 0000000000000202 R12: 0000000000400550 [ 846.432571] R13: 00007ffe8e66c400 R14: 0000000000000000 R15: 0000000000000000 [ 846.432573] ---[ end trace abca54df39d14f5f ]--- [ 846.434280] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 [ 846.434424] PGD 80000001ebd3a067 P4D 80000001ebd3a067 PUD 1eb1ae067 PMD 0 [ 846.434551] Oops: 0000 [#1] SMP PTI [ 846.434697] CPU: 0 PID: 44 Comm: kworker/u5:0 Tainted: G W 4.18.0-rc3+ #1 [ 846.434805] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 846.435000] Workqueue: fscrypt_read_queue decrypt_work [ 846.435174] RIP: 0010:fscrypt_do_page_crypto+0x6e/0x2d0 [ 846.435351] Code: 00 65 48 8b 04 25 28 00 00 00 48 89 84 24 88 00 00 00 31 c0 e8 43 c2 e0 ff 49 8b 86 48 02 00 00 85 ed c7 44 24 70 00 00 00 00 <48> 8b 58 08 0f 84 14 02 00 00 48 8b 78 10 48 8b 0c 24 48 c7 84 24 [ 846.435696] RSP: 0018:ffff961c40f9bd60 EFLAGS: 00010206 [ 846.435870] RAX: 0000000000000000 RBX: ffffc5f787719b80 RCX: ffffc5f787719b80 [ 846.436051] RDX: ffffffff8b9f4b88 RSI: ffffffff8b0ae622 RDI: ffff961c40f9bdb8 [ 846.436261] RBP: 0000000000001000 R08: ffffc5f787719b80 R09: 0000000000001000 [ 846.436433] R10: 0000000000000018 R11: fefefefefefefeff R12: ffffc5f787719b80 [ 846.436562] R13: ffffc5f787719b80 R14: ffff89dff4ff88d0 R15: 0ffff89dfaddee60 [ 846.436658] FS: 0000000000000000(0000) GS:ffff89dfffc00000(0000) knlGS:0000000000000000 [ 846.436758] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 846.436898] CR2: 0000000000000008 CR3: 00000001eddd0000 CR4: 00000000000006f0 [ 846.437001] Call Trace: [ 846.437181] ? check_preempt_wakeup+0xf2/0x230 [ 846.437276] ? check_preempt_curr+0x7c/0x90 [ 846.437370] fscrypt_decrypt_page+0x48/0x4d [ 846.437466] __fscrypt_decrypt_bio+0x5b/0x90 [ 846.437542] decrypt_work+0x12/0x20 [ 846.437651] process_one_work+0x15e/0x3d0 [ 846.437740] worker_thread+0x4c/0x440 [ 846.437848] kthread+0xf8/0x130 [ 846.437938] ? rescuer_thread+0x350/0x350 [ 846.438022] ? kthread_associate_blkcg+0x90/0x90 [ 846.438117] ret_from_fork+0x35/0x40 [ 846.438201] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd input_leds joydev soundcore serio_raw i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 multipath linear qxl ttm crct10dif_pclmul crc32_pclmul drm_kms_helper ghash_clmulni_intel syscopyarea sysfillrect sysimgblt fb_sys_fops pcbc drm 8139too aesni_intel 8139cp floppy psmouse mii aes_x86_64 crypto_simd pata_acpi cryptd glue_helper [ 846.438653] CR2: 0000000000000008 [ 846.438713] ---[ end trace abca54df39d14f60 ]--- [ 846.438796] RIP: 0010:fscrypt_do_page_crypto+0x6e/0x2d0 [ 846.438844] Code: 00 65 48 8b 04 25 28 00 00 00 48 89 84 24 88 00 00 00 31 c0 e8 43 c2 e0 ff 49 8b 86 48 02 00 00 85 ed c7 44 24 70 00 00 00 00 <48> 8b 58 08 0f 84 14 02 00 00 48 8b 78 10 48 8b 0c 24 48 c7 84 24 [ 846.439084] RSP: 0018:ffff961c40f9bd60 EFLAGS: 00010206 [ 846.439176] RAX: 0000000000000000 RBX: ffffc5f787719b80 RCX: ffffc5f787719b80 [ 846.440927] RDX: ffffffff8b9f4b88 RSI: ffffffff8b0ae622 RDI: ffff961c40f9bdb8 [ 846.442083] RBP: 0000000000001000 R08: ffffc5f787719b80 R09: 0000000000001000 [ 846.443284] R10: 0000000000000018 R11: fefefefefefefeff R12: ffffc5f787719b80 [ 846.444448] R13: ffffc5f787719b80 R14: ffff89dff4ff88d0 R15: 0ffff89dfaddee60 [ 846.445558] FS: 0000000000000000(0000) GS:ffff89dfffc00000(0000) knlGS:0000000000000000 [ 846.446687] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 846.447796] CR2: 0000000000000008 CR3: 00000001eddd0000 CR4: 00000000000006f0 - Location https://elixir.bootlin.com/linux/v4.18-rc4/source/fs/crypto/crypto.c#L149 struct crypto_skcipher *tfm = ci->ci_ctfm; Here ci can be NULL Note that this issue maybe require CONFIG_F2FS_FS_ENCRYPTION=y to reproduce. Reported-by Wen Xu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim [bwh: Backported to 4.14: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- fs/f2fs/data.c | 3 +++ fs/f2fs/inode.c | 18 +++++++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8f6e7c3a10f83..c68b319b07aa0 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -468,6 +468,9 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, struct fscrypt_ctx *ctx = NULL; struct bio *bio; + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) + return ERR_PTR(-EFAULT); + if (f2fs_encrypted_file(inode)) { ctx = fscrypt_get_ctx(inode, GFP_NOFS); if (IS_ERR(ctx)) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 9a40724dbaa62..50818b519df8b 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -62,14 +62,16 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) } } -static bool __written_first_block(struct f2fs_sb_info *sbi, +static int __written_first_block(struct f2fs_sb_info *sbi, struct f2fs_inode *ri) { block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]); - if (is_valid_data_blkaddr(sbi, addr)) - return true; - return false; + if (!__is_valid_data_blkaddr(addr)) + return 1; + if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC)) + return -EFAULT; + return 0; } static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) @@ -253,6 +255,7 @@ static int do_read_inode(struct inode *inode) struct page *node_page; struct f2fs_inode *ri; projid_t i_projid; + int err; /* Check if ino is within scope */ if (check_nid_range(sbi, inode->i_ino)) @@ -307,7 +310,12 @@ static int do_read_inode(struct inode *inode) /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); - if (__written_first_block(sbi, ri)) + err = __written_first_block(sbi, ri); + if (err < 0) { + f2fs_put_page(node_page, 1); + return err; + } + if (!err) set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); if (!need_inode_block_update(sbi, inode->i_ino)) -- GitLab From f2a4f7622d052eb987e8693633468c239c13575a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 10 Dec 2018 18:14:58 +0000 Subject: [PATCH 2081/2269] swiotlb: clean up reporting commit 7d63fb3af87aa67aa7d24466e792f9d7c57d8e79 upstream. This removes needless use of '%p', and refactors the printk calls to use pr_*() helpers instead. Signed-off-by: Kees Cook Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Christoph Hellwig [bwh: Backported to 4.14: - Adjust filename - Remove "swiotlb: " prefix from an additional log message] Signed-off-by: Ben Hutchings Signed-off-by: Sasha Levin --- lib/swiotlb.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 20df2fd9b1501..b4c768de33449 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -17,6 +17,8 @@ * 08/12/11 beckyb Add highmem support */ +#define pr_fmt(fmt) "software IO TLB: " fmt + #include #include #include @@ -177,20 +179,16 @@ static bool no_iotlb_memory; void swiotlb_print_info(void) { unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; - unsigned char *vstart, *vend; if (no_iotlb_memory) { - pr_warn("software IO TLB: No low mem\n"); + pr_warn("No low mem\n"); return; } - vstart = phys_to_virt(io_tlb_start); - vend = phys_to_virt(io_tlb_end); - - printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n", + pr_info("mapped [mem %#010llx-%#010llx] (%luMB)\n", (unsigned long long)io_tlb_start, (unsigned long long)io_tlb_end, - bytes >> 20, vstart, vend - 1); + bytes >> 20); } /* @@ -290,7 +288,7 @@ swiotlb_init(int verbose) if (io_tlb_start) memblock_free_early(io_tlb_start, PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); - pr_warn("Cannot allocate SWIOTLB buffer"); + pr_warn("Cannot allocate buffer"); no_iotlb_memory = true; } @@ -332,8 +330,8 @@ swiotlb_late_init_with_default_size(size_t default_size) return -ENOMEM; } if (order != get_order(bytes)) { - printk(KERN_WARNING "Warning: only able to allocate %ld MB " - "for software IO TLB\n", (PAGE_SIZE << order) >> 20); + pr_warn("only able to allocate %ld MB\n", + (PAGE_SIZE << order) >> 20); io_tlb_nslabs = SLABS_PER_PAGE << order; } rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs); @@ -770,7 +768,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, err_warn: if (warn && printk_ratelimit()) { - pr_warn("swiotlb: coherent allocation failed for device %s size=%zu\n", + pr_warn("coherent allocation failed for device %s size=%zu\n", dev_name(hwdev), size); dump_stack(); } -- GitLab From b4e4b85708bb31df79d6fc77661bb171e07e0868 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 11 Dec 2018 13:50:37 +0100 Subject: [PATCH 2082/2269] Staging: lustre: remove two build warnings [for older kernels only, lustre has been removed from upstream] When someone writes: strncpy(dest, source, sizeof(source)); they really are just doing the same thing as: strcpy(dest, source); but somehow they feel better because they are now using the "safe" version of the string functions. Cargo-cult programming at its finest... gcc-8 rightfully warns you about doing foolish things like this. Now that the stable kernels are all starting to be built using gcc-8, let's get rid of this warning so that we do not have to gaze at this horror. To dropt the warning, just convert the code to using strcpy() so that if someone really wants to audit this code and find all of the obvious problems, it will be easier to do so. Signed-off-by: Greg Kroah-Hartman --- drivers/staging/lustre/lnet/lnet/config.c | 3 +-- drivers/staging/lustre/lustre/lmv/lmv_obd.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/staging/lustre/lnet/lnet/config.c b/drivers/staging/lustre/lnet/lnet/config.c index 26841a7b6213c..99b400c4190f1 100644 --- a/drivers/staging/lustre/lnet/lnet/config.c +++ b/drivers/staging/lustre/lnet/lnet/config.c @@ -354,8 +354,7 @@ lnet_parse_networks(struct list_head *nilist, char *networks) CERROR("Can't allocate net interface name\n"); goto failed; } - strncpy(ni->ni_interfaces[niface], iface, - strlen(iface)); + strcpy(ni->ni_interfaces[niface], iface); niface++; iface = comma; } while (iface); diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c b/drivers/staging/lustre/lustre/lmv/lmv_obd.c index c2aadb2d1fead..fa46fe9e1bd98 100644 --- a/drivers/staging/lustre/lustre/lmv/lmv_obd.c +++ b/drivers/staging/lustre/lustre/lmv/lmv_obd.c @@ -645,7 +645,7 @@ repeat_fid2path: memmove(ptr + strlen(gf->gf_path) + 1, ptr, strlen(ori_gf->gf_path)); - strncpy(ptr, gf->gf_path, strlen(gf->gf_path)); + strcpy(ptr, gf->gf_path); ptr += strlen(gf->gf_path); *ptr = '/'; } -- GitLab From a604686c468dc0ab9492dbeaa1722cc3878b0582 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 11 Dec 2018 13:50:55 +0100 Subject: [PATCH 2083/2269] staging: atomisp: remove "fun" strncpy warning [for older kernels only, atomisp has been removed from upstream] gcc-8 rightfully warns that this instance of strncpy is just copying from the source, to the same source, for a few bytes. Meaning this call does nothing. As the author of the code obviously meant it to do something, but this code must be working properly, just replace the call to the kernel internal strscpy() which gcc doesn't know about, so the warning goes away. As this driver was deleted from newer kernel versions, none of this really matters but now at least we do not have to worry about a build warning in the stable trees. Signed-off-by: Greg Kroah-Hartman --- .../pci/atomisp2/css2400/runtime/debug/src/ia_css_debug.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/debug/src/ia_css_debug.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/debug/src/ia_css_debug.c index 0fa7cb2423d86..0320c089f6881 100644 --- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/debug/src/ia_css_debug.c +++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/debug/src/ia_css_debug.c @@ -2860,9 +2860,7 @@ ia_css_debug_pipe_graph_dump_stage( if (l <= ENABLE_LINE_MAX_LENGTH) { /* It fits on one line, copy string and init */ /* other helper strings with empty string */ - strcpy_s(enable_info, - sizeof(enable_info), - ei); + strscpy(enable_info, ei, sizeof(enable_info)); } else { /* Too big for one line, find last comma */ p = ENABLE_LINE_MAX_LENGTH; -- GitLab From b505b9b09e531cfe9bfcb602ed9407d8bd902ad4 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 15 Nov 2018 15:20:52 +0100 Subject: [PATCH 2084/2269] cifs: Fix separator when building path from dentry commit c988de29ca161823db6a7125e803d597ef75b49c upstream. Make sure to use the CIFS_DIR_SEP(cifs_sb) as path separator for prefixpath too. Fixes a bug with smb1 UNIX extensions. Fixes: a6b5058fafdf ("fs/cifs: make share unaccessible at root level mountable") Signed-off-by: Paulo Alcantara Reviewed-by: Aurelien Aptel Signed-off-by: Steve French CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 925844343038a..ca98afda3cdb7 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -174,7 +174,7 @@ cifs_bp_rename_retry: cifs_dbg(FYI, "using cifs_sb prepath <%s>\n", cifs_sb->prepath); memcpy(full_path+dfsplen+1, cifs_sb->prepath, pplen-1); - full_path[dfsplen] = '\\'; + full_path[dfsplen] = dirsep; for (i = 0; i < pplen-1; i++) if (full_path[dfsplen+1+i] == '/') full_path[dfsplen+1+i] = CIFS_DIR_SEP(cifs_sb); -- GitLab From 948ef68099ebe26a52749ad031febb15b4c5dac7 Mon Sep 17 00:00:00 2001 From: Young Xiao Date: Wed, 28 Nov 2018 08:06:53 +0000 Subject: [PATCH 2085/2269] staging: rtl8712: Fix possible buffer overrun commit 300cd664865bed5d50ae0a42fb4e3a6f415e8a10 upstream. In commit 8b7a13c3f404 ("staging: r8712u: Fix possible buffer overrun") we fix a potential off by one by making the limit smaller. The better fix is to make the buffer larger. This makes it match up with the similar code in other drivers. Fixes: 8b7a13c3f404 ("staging: r8712u: Fix possible buffer overrun") Signed-off-by: Young Xiao Cc: stable Reviewed-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8712/mlme_linux.c | 2 +- drivers/staging/rtl8712/rtl871x_mlme.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/rtl8712/mlme_linux.c b/drivers/staging/rtl8712/mlme_linux.c index a077069d62273..7e367452c3393 100644 --- a/drivers/staging/rtl8712/mlme_linux.c +++ b/drivers/staging/rtl8712/mlme_linux.c @@ -158,7 +158,7 @@ void r8712_report_sec_ie(struct _adapter *adapter, u8 authmode, u8 *sec_ie) p = buff; p += sprintf(p, "ASSOCINFO(ReqIEs="); len = sec_ie[1] + 2; - len = (len < IW_CUSTOM_MAX) ? len : IW_CUSTOM_MAX - 1; + len = (len < IW_CUSTOM_MAX) ? len : IW_CUSTOM_MAX; for (i = 0; i < len; i++) p += sprintf(p, "%02x", sec_ie[i]); p += sprintf(p, ")"); diff --git a/drivers/staging/rtl8712/rtl871x_mlme.c b/drivers/staging/rtl8712/rtl871x_mlme.c index bf1ac22bae1c4..98c7e8b229d10 100644 --- a/drivers/staging/rtl8712/rtl871x_mlme.c +++ b/drivers/staging/rtl8712/rtl871x_mlme.c @@ -1361,7 +1361,7 @@ sint r8712_restruct_sec_ie(struct _adapter *adapter, u8 *in_ie, u8 *out_ie, uint in_len) { u8 authmode = 0, match; - u8 sec_ie[255], uncst_oui[4], bkup_ie[255]; + u8 sec_ie[IW_CUSTOM_MAX], uncst_oui[4], bkup_ie[255]; u8 wpa_oui[4] = {0x0, 0x50, 0xf2, 0x01}; uint ielength, cnt, remove_cnt; int iEntry; -- GitLab From c33e7a5a3714250ca452ef8353c91cf7ad411ac6 Mon Sep 17 00:00:00 2001 From: Young Xiao Date: Tue, 27 Nov 2018 09:12:20 +0000 Subject: [PATCH 2086/2269] Revert commit ef9209b642f "staging: rtl8723bs: Fix indenting errors and an off-by-one mistake in core/rtw_mlme_ext.c" commit 87e4a5405f087427fbf8b437d2796283dce2b38f upstream. pstapriv->max_num_sta is always <= NUM_STA, since max_num_sta is either set in _rtw_init_sta_priv() or rtw_set_beacon(). Fixes: ef9209b642f1 ("staging: rtl8723bs: Fix indenting errors and an off-by-one mistake in core/rtw_mlme_ext.c") Signed-off-by: Young Xiao Reviewed-by: Dan Carpenter Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/core/rtw_mlme_ext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c b/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c index b6d137f505e1e..111752f0bc271 100644 --- a/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c +++ b/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c @@ -1574,7 +1574,7 @@ unsigned int OnAssocReq(struct adapter *padapter, union recv_frame *precv_frame) if (pstat->aid > 0) { DBG_871X(" old AID %d\n", pstat->aid); } else { - for (pstat->aid = 1; pstat->aid < NUM_STA; pstat->aid++) + for (pstat->aid = 1; pstat->aid <= NUM_STA; pstat->aid++) if (pstapriv->sta_aid[pstat->aid - 1] == NULL) break; -- GitLab From 99e6900dfa125ef5062e7b0595f6ce5eb75a6d2c Mon Sep 17 00:00:00 2001 From: Junwei Zhang Date: Thu, 22 Nov 2018 17:53:00 +0800 Subject: [PATCH 2087/2269] drm/amdgpu: update mc firmware image for polaris12 variants commit d7fd67653f847327e545bdb198b901ee124afd7c upstream. Some new variants require updated firmware. Signed-off-by: Junwei Zhang Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 3b3326daf32b9..b3de9334ed487 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -52,6 +52,7 @@ MODULE_FIRMWARE("amdgpu/tonga_mc.bin"); MODULE_FIRMWARE("amdgpu/polaris11_mc.bin"); MODULE_FIRMWARE("amdgpu/polaris10_mc.bin"); MODULE_FIRMWARE("amdgpu/polaris12_mc.bin"); +MODULE_FIRMWARE("amdgpu/polaris12_k_mc.bin"); static const u32 golden_settings_tonga_a11[] = { @@ -226,6 +227,15 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev) break; case CHIP_POLARIS12: chip_name = "polaris12"; + if (((adev->pdev->device == 0x6987) && + ((adev->pdev->revision == 0xc0) || + (adev->pdev->revision == 0xc3))) || + ((adev->pdev->device == 0x6981) && + ((adev->pdev->revision == 0x00) || + (adev->pdev->revision == 0x01) || + (adev->pdev->revision == 0x10)))) { + chip_name = "polaris12_k"; + } break; case CHIP_FIJI: case CHIP_CARRIZO: -- GitLab From eff5e74f7b680d51c9cd9a087f18ce99788b2aba Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 28 Nov 2018 23:25:41 -0500 Subject: [PATCH 2088/2269] drm/amdgpu/gmc8: update MC firmware for polaris MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a81a7c9c9ea3042ab02d66ac35def74abf091c15 upstream. Some variants require different MC firmware images. Acked-by: Christian König Reviewed-by: Junwei Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 29 ++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index b3de9334ed487..0f5dc97ae920a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -52,6 +52,8 @@ MODULE_FIRMWARE("amdgpu/tonga_mc.bin"); MODULE_FIRMWARE("amdgpu/polaris11_mc.bin"); MODULE_FIRMWARE("amdgpu/polaris10_mc.bin"); MODULE_FIRMWARE("amdgpu/polaris12_mc.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_k_mc.bin"); +MODULE_FIRMWARE("amdgpu/polaris10_k_mc.bin"); MODULE_FIRMWARE("amdgpu/polaris12_k_mc.bin"); static const u32 golden_settings_tonga_a11[] = @@ -220,22 +222,39 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev) chip_name = "tonga"; break; case CHIP_POLARIS11: - chip_name = "polaris11"; + if (((adev->pdev->device == 0x67ef) && + ((adev->pdev->revision == 0xe0) || + (adev->pdev->revision == 0xe5))) || + ((adev->pdev->device == 0x67ff) && + ((adev->pdev->revision == 0xcf) || + (adev->pdev->revision == 0xef) || + (adev->pdev->revision == 0xff)))) + chip_name = "polaris11_k"; + else if ((adev->pdev->device == 0x67ef) && + (adev->pdev->revision == 0xe2)) + chip_name = "polaris11_k"; + else + chip_name = "polaris11"; break; case CHIP_POLARIS10: - chip_name = "polaris10"; + if ((adev->pdev->device == 0x67df) && + ((adev->pdev->revision == 0xe1) || + (adev->pdev->revision == 0xf7))) + chip_name = "polaris10_k"; + else + chip_name = "polaris10"; break; case CHIP_POLARIS12: - chip_name = "polaris12"; if (((adev->pdev->device == 0x6987) && ((adev->pdev->revision == 0xc0) || (adev->pdev->revision == 0xc3))) || ((adev->pdev->device == 0x6981) && ((adev->pdev->revision == 0x00) || (adev->pdev->revision == 0x01) || - (adev->pdev->revision == 0x10)))) { + (adev->pdev->revision == 0x10)))) chip_name = "polaris12_k"; - } + else + chip_name = "polaris12"; break; case CHIP_FIJI: case CHIP_CARRIZO: -- GitLab From c1063941e7360e1a5c998c58de789d5de267175b Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 3 Dec 2018 00:54:35 +0000 Subject: [PATCH 2089/2269] Drivers: hv: vmbus: Offload the handling of channels to two workqueues commit 37c2578c0c40e286bc0d30bdc05290b2058cf66e upstream. vmbus_process_offer() mustn't call channel->sc_creation_callback() directly for sub-channels, because sc_creation_callback() -> vmbus_open() may never get the host's response to the OPEN_CHANNEL message (the host may rescind a channel at any time, e.g. in the case of hot removing a NIC), and vmbus_onoffer_rescind() may not wake up the vmbus_open() as it's blocked due to a non-zero vmbus_connection.offer_in_progress, and finally we have a deadlock. The above is also true for primary channels, if the related device drivers use sync probing mode by default. And, usually the handling of primary channels and sub-channels can depend on each other, so we should offload them to different workqueues to avoid possible deadlock, e.g. in sync-probing mode, NIC1's netvsc_subchan_work() can race with NIC2's netvsc_probe() -> rtnl_lock(), and causes deadlock: the former gets the rtnl_lock and waits for all the sub-channels to appear, but the latter can't get the rtnl_lock and this blocks the handling of sub-channels. The patch can fix the multiple-NIC deadlock described above for v3.x kernels (e.g. RHEL 7.x) which don't support async-probing of devices, and v4.4, v4.9, v4.14 and v4.18 which support async-probing but don't enable async-probing for Hyper-V drivers (yet). The patch can also fix the hang issue in sub-channel's handling described above for all versions of kernels, including v4.19 and v4.20-rc4. So actually the patch should be applied to all the existing kernels, not only the kernels that have 8195b1396ec8. Fixes: 8195b1396ec8 ("hv_netvsc: fix deadlock on hotplug") Cc: stable@vger.kernel.org Cc: Stephen Hemminger Cc: K. Y. Srinivasan Cc: Haiyang Zhang Signed-off-by: Dexuan Cui Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 185 +++++++++++++++++++++++++------------- drivers/hv/connection.c | 24 ++++- drivers/hv/hyperv_vmbus.h | 7 ++ include/linux/hyperv.h | 7 ++ 4 files changed, 160 insertions(+), 63 deletions(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 752c52f7353d8..43eaf54736f4e 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -444,61 +444,16 @@ void vmbus_free_channels(void) } } -/* - * vmbus_process_offer - Process the offer by creating a channel/device - * associated with this offer - */ -static void vmbus_process_offer(struct vmbus_channel *newchannel) +/* Note: the function can run concurrently for primary/sub channels. */ +static void vmbus_add_channel_work(struct work_struct *work) { - struct vmbus_channel *channel; - bool fnew = true; + struct vmbus_channel *newchannel = + container_of(work, struct vmbus_channel, add_channel_work); + struct vmbus_channel *primary_channel = newchannel->primary_channel; unsigned long flags; u16 dev_type; int ret; - /* Make sure this is a new offer */ - mutex_lock(&vmbus_connection.channel_mutex); - - /* - * Now that we have acquired the channel_mutex, - * we can release the potentially racing rescind thread. - */ - atomic_dec(&vmbus_connection.offer_in_progress); - - list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { - if (!uuid_le_cmp(channel->offermsg.offer.if_type, - newchannel->offermsg.offer.if_type) && - !uuid_le_cmp(channel->offermsg.offer.if_instance, - newchannel->offermsg.offer.if_instance)) { - fnew = false; - break; - } - } - - if (fnew) - list_add_tail(&newchannel->listentry, - &vmbus_connection.chn_list); - - mutex_unlock(&vmbus_connection.channel_mutex); - - if (!fnew) { - /* - * Check to see if this is a sub-channel. - */ - if (newchannel->offermsg.offer.sub_channel_index != 0) { - /* - * Process the sub-channel. - */ - newchannel->primary_channel = channel; - spin_lock_irqsave(&channel->lock, flags); - list_add_tail(&newchannel->sc_list, &channel->sc_list); - channel->num_sc++; - spin_unlock_irqrestore(&channel->lock, flags); - } else { - goto err_free_chan; - } - } - dev_type = hv_get_dev_type(newchannel); init_vp_index(newchannel, dev_type); @@ -516,21 +471,22 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) /* * This state is used to indicate a successful open * so that when we do close the channel normally, we - * can cleanup properly + * can cleanup properly. */ newchannel->state = CHANNEL_OPEN_STATE; - if (!fnew) { - if (channel->sc_creation_callback != NULL) - channel->sc_creation_callback(newchannel); + if (primary_channel != NULL) { + /* newchannel is a sub-channel. */ + + if (primary_channel->sc_creation_callback != NULL) + primary_channel->sc_creation_callback(newchannel); + newchannel->probe_done = true; return; } /* - * Start the process of binding this offer to the driver - * We need to set the DeviceObject field before calling - * vmbus_child_dev_add() + * Start the process of binding the primary channel to the driver */ newchannel->device_obj = vmbus_device_create( &newchannel->offermsg.offer.if_type, @@ -559,13 +515,28 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) err_deq_chan: mutex_lock(&vmbus_connection.channel_mutex); - list_del(&newchannel->listentry); + + /* + * We need to set the flag, otherwise + * vmbus_onoffer_rescind() can be blocked. + */ + newchannel->probe_done = true; + + if (primary_channel == NULL) { + list_del(&newchannel->listentry); + } else { + spin_lock_irqsave(&primary_channel->lock, flags); + list_del(&newchannel->sc_list); + spin_unlock_irqrestore(&primary_channel->lock, flags); + } + mutex_unlock(&vmbus_connection.channel_mutex); if (newchannel->target_cpu != get_cpu()) { put_cpu(); smp_call_function_single(newchannel->target_cpu, - percpu_channel_deq, newchannel, true); + percpu_channel_deq, + newchannel, true); } else { percpu_channel_deq(newchannel); put_cpu(); @@ -573,14 +544,104 @@ err_deq_chan: vmbus_release_relid(newchannel->offermsg.child_relid); -err_free_chan: free_channel(newchannel); } +/* + * vmbus_process_offer - Process the offer by creating a channel/device + * associated with this offer + */ +static void vmbus_process_offer(struct vmbus_channel *newchannel) +{ + struct vmbus_channel *channel; + struct workqueue_struct *wq; + unsigned long flags; + bool fnew = true; + + mutex_lock(&vmbus_connection.channel_mutex); + + /* + * Now that we have acquired the channel_mutex, + * we can release the potentially racing rescind thread. + */ + atomic_dec(&vmbus_connection.offer_in_progress); + + list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { + if (!uuid_le_cmp(channel->offermsg.offer.if_type, + newchannel->offermsg.offer.if_type) && + !uuid_le_cmp(channel->offermsg.offer.if_instance, + newchannel->offermsg.offer.if_instance)) { + fnew = false; + break; + } + } + + if (fnew) + list_add_tail(&newchannel->listentry, + &vmbus_connection.chn_list); + else { + /* + * Check to see if this is a valid sub-channel. + */ + if (newchannel->offermsg.offer.sub_channel_index == 0) { + mutex_unlock(&vmbus_connection.channel_mutex); + /* + * Don't call free_channel(), because newchannel->kobj + * is not initialized yet. + */ + kfree(newchannel); + WARN_ON_ONCE(1); + return; + } + /* + * Process the sub-channel. + */ + newchannel->primary_channel = channel; + spin_lock_irqsave(&channel->lock, flags); + list_add_tail(&newchannel->sc_list, &channel->sc_list); + spin_unlock_irqrestore(&channel->lock, flags); + } + + mutex_unlock(&vmbus_connection.channel_mutex); + + /* + * vmbus_process_offer() mustn't call channel->sc_creation_callback() + * directly for sub-channels, because sc_creation_callback() -> + * vmbus_open() may never get the host's response to the + * OPEN_CHANNEL message (the host may rescind a channel at any time, + * e.g. in the case of hot removing a NIC), and vmbus_onoffer_rescind() + * may not wake up the vmbus_open() as it's blocked due to a non-zero + * vmbus_connection.offer_in_progress, and finally we have a deadlock. + * + * The above is also true for primary channels, if the related device + * drivers use sync probing mode by default. + * + * And, usually the handling of primary channels and sub-channels can + * depend on each other, so we should offload them to different + * workqueues to avoid possible deadlock, e.g. in sync-probing mode, + * NIC1's netvsc_subchan_work() can race with NIC2's netvsc_probe() -> + * rtnl_lock(), and causes deadlock: the former gets the rtnl_lock + * and waits for all the sub-channels to appear, but the latter + * can't get the rtnl_lock and this blocks the handling of + * sub-channels. + */ + INIT_WORK(&newchannel->add_channel_work, vmbus_add_channel_work); + wq = fnew ? vmbus_connection.handle_primary_chan_wq : + vmbus_connection.handle_sub_chan_wq; + queue_work(wq, &newchannel->add_channel_work); +} + /* * We use this state to statically distribute the channel interrupt load. */ static int next_numa_node_id; +/* + * init_vp_index() accesses global variables like next_numa_node_id, and + * it can run concurrently for primary channels and sub-channels: see + * vmbus_process_offer(), so we need the lock to protect the global + * variables. + */ +static DEFINE_SPINLOCK(bind_channel_to_cpu_lock); /* * Starting with Win8, we can statically distribute the incoming @@ -618,6 +679,8 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) return; } + spin_lock(&bind_channel_to_cpu_lock); + /* * Based on the channel affinity policy, we will assign the NUMA * nodes. @@ -700,6 +763,8 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) channel->target_cpu = cur_cpu; channel->target_vp = hv_cpu_number_to_vp_number(cur_cpu); + spin_unlock(&bind_channel_to_cpu_lock); + free_cpumask_var(available_mask); } diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index 5449fc59b7f5e..4b1b70751be31 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -161,6 +161,20 @@ int vmbus_connect(void) goto cleanup; } + vmbus_connection.handle_primary_chan_wq = + create_workqueue("hv_pri_chan"); + if (!vmbus_connection.handle_primary_chan_wq) { + ret = -ENOMEM; + goto cleanup; + } + + vmbus_connection.handle_sub_chan_wq = + create_workqueue("hv_sub_chan"); + if (!vmbus_connection.handle_sub_chan_wq) { + ret = -ENOMEM; + goto cleanup; + } + INIT_LIST_HEAD(&vmbus_connection.chn_msg_list); spin_lock_init(&vmbus_connection.channelmsg_lock); @@ -251,10 +265,14 @@ void vmbus_disconnect(void) */ vmbus_initiate_unload(false); - if (vmbus_connection.work_queue) { - drain_workqueue(vmbus_connection.work_queue); + if (vmbus_connection.handle_sub_chan_wq) + destroy_workqueue(vmbus_connection.handle_sub_chan_wq); + + if (vmbus_connection.handle_primary_chan_wq) + destroy_workqueue(vmbus_connection.handle_primary_chan_wq); + + if (vmbus_connection.work_queue) destroy_workqueue(vmbus_connection.work_queue); - } if (vmbus_connection.int_page) { free_pages((unsigned long)vmbus_connection.int_page, 0); diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 49569f8fe038a..a166de6efd99c 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -327,7 +327,14 @@ struct vmbus_connection { struct list_head chn_list; struct mutex channel_mutex; + /* + * An offer message is handled first on the work_queue, and then + * is further handled on handle_primary_chan_wq or + * handle_sub_chan_wq. + */ struct workqueue_struct *work_queue; + struct workqueue_struct *handle_primary_chan_wq; + struct workqueue_struct *handle_sub_chan_wq; }; diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 0c51f753652d8..d1324d3c72b0f 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -869,6 +869,13 @@ struct vmbus_channel { bool probe_done; + /* + * We must offload the handling of the primary/sub channels + * from the single-threaded vmbus_connection.work_queue to + * two different workqueue, otherwise we can block + * vmbus_connection.work_queue and hang: see vmbus_process_offer(). + */ + struct work_struct add_channel_work; }; static inline bool is_hvsock_channel(const struct vmbus_channel *c) -- GitLab From 4f50d3a08df40a3ad261b8527345df92c9f1a731 Mon Sep 17 00:00:00 2001 From: Peter Shih Date: Tue, 27 Nov 2018 12:49:50 +0800 Subject: [PATCH 2090/2269] tty: serial: 8250_mtk: always resume the device in probe. commit 100bc3e2bebf95506da57cbdf5f26b25f6da4c81 upstream. serial8250_register_8250_port calls uart_config_port, which calls config_port on the port before it tries to power on the port. So we need the port to be on before calling serial8250_register_8250_port. Change the code to always do a runtime resume in probe before registering port, and always do a runtime suspend in remove. This basically reverts the change in commit 68e5fc4a255a ("tty: serial: 8250_mtk: use pm_runtime callbacks for enabling"), but still use pm_runtime callbacks. Fixes: 68e5fc4a255a ("tty: serial: 8250_mtk: use pm_runtime callbacks for enabling") Signed-off-by: Peter Shih Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_mtk.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c index fb45770d47aa2..fa909fa3c4cdb 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c @@ -222,17 +222,17 @@ static int mtk8250_probe(struct platform_device *pdev) platform_set_drvdata(pdev, data); - pm_runtime_enable(&pdev->dev); - if (!pm_runtime_enabled(&pdev->dev)) { - err = mtk8250_runtime_resume(&pdev->dev); - if (err) - return err; - } + err = mtk8250_runtime_resume(&pdev->dev); + if (err) + return err; data->line = serial8250_register_8250_port(&uart); if (data->line < 0) return data->line; + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + return 0; } @@ -243,13 +243,11 @@ static int mtk8250_remove(struct platform_device *pdev) pm_runtime_get_sync(&pdev->dev); serial8250_unregister_port(data->line); + mtk8250_runtime_suspend(&pdev->dev); pm_runtime_disable(&pdev->dev); pm_runtime_put_noidle(&pdev->dev); - if (!pm_runtime_status_suspended(&pdev->dev)) - mtk8250_runtime_suspend(&pdev->dev); - return 0; } -- GitLab From e4c3cc6b08087550e7f000bd7510ccb9a887c543 Mon Sep 17 00:00:00 2001 From: Chanho Park Date: Thu, 22 Nov 2018 18:23:47 +0900 Subject: [PATCH 2091/2269] tty: do not set TTY_IO_ERROR flag if console port commit 2a48602615e0a2f563549c7d5c8d507f904cf96e upstream. Since Commit 761ed4a94582 ('tty: serial_core: convert uart_close to use tty_port_close') and Commit 4dda864d7307 ('tty: serial_core: Fix serial console crash on port shutdown), a serial port which is used as console can be stuck when logging out if there is a remained process. After logged out, agetty will try to grab the serial port but it will be failed because the previous process did not release the port correctly. To fix this, TTY_IO_ERROR bit should not be enabled of tty_port_close if the port is console port. Reproduce step: - Run background processes from serial console $ while true; do sleep 10; done & - Log out $ logout -> Stuck - Read journal log by journalctl | tail Jan 28 16:07:01 ubuntu systemd[1]: Stopped Serial Getty on ttyAMA0. Jan 28 16:07:01 ubuntu systemd[1]: Started Serial Getty on ttyAMA0. Jan 28 16:07:02 ubuntu agetty[1643]: /dev/ttyAMA0: not a tty Fixes: 761ed4a94582 ("tty: serial_core: convert uart_close to use tty_port_close") Cc: Geert Uytterhoeven Cc: Rob Herring Cc: Jiri Slaby Signed-off-by: Chanho Park Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_port.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c index 6b137194069fe..c93a33701d32a 100644 --- a/drivers/tty/tty_port.c +++ b/drivers/tty/tty_port.c @@ -639,7 +639,8 @@ void tty_port_close(struct tty_port *port, struct tty_struct *tty, if (tty_port_close_start(port, tty, filp) == 0) return; tty_port_shutdown(port, tty); - set_bit(TTY_IO_ERROR, &tty->flags); + if (!port->console) + set_bit(TTY_IO_ERROR, &tty->flags); tty_port_close_end(port, tty); tty_port_tty_set(port, NULL); } -- GitLab From b919b4842b42a148ad12754019c58cfc939ae955 Mon Sep 17 00:00:00 2001 From: Macpaul Lin Date: Wed, 17 Oct 2018 23:08:38 +0800 Subject: [PATCH 2092/2269] kgdboc: fix KASAN global-out-of-bounds bug in param_set_kgdboc_var() commit dada6a43b0402eba438a17ac86fdc64ac56a4607 upstream. This patch is trying to fix KE issue due to "BUG: KASAN: global-out-of-bounds in param_set_kgdboc_var+0x194/0x198" reported by Syzkaller scan." [26364:syz-executor0][name:report8t]BUG: KASAN: global-out-of-bounds in param_set_kgdboc_var+0x194/0x198 [26364:syz-executor0][name:report&]Read of size 1 at addr ffffff900e44f95f by task syz-executor0/26364 [26364:syz-executor0][name:report&] [26364:syz-executor0]CPU: 7 PID: 26364 Comm: syz-executor0 Tainted: G W 0 [26364:syz-executor0]Call trace: [26364:syz-executor0][] dump_bacIctrace+Ox0/0x470 [26364:syz-executor0][] show_stack+0x20/0x30 [26364:syz-executor0][] dump_stack+Oxd8/0x128 [26364:syz-executor0][] print_address_description +0x80/0x4a8 [26364:syz-executor0][] kasan_report+Ox178/0x390 [26364:syz-executor0][] _asan_report_loadi_noabort+Ox18/0x20 [26364:syz-executor0][] param_set_kgdboc_var+Ox194/0x198 [26364:syz-executor0][] param_attr_store+Ox14c/0x270 [26364:syz-executor0][] module_attr_store+0x60/0x90 [26364:syz-executor0][] sysfs_kl_write+Ox100/0x158 [26364:syz-executor0][] kernfs_fop_write+0x27c/0x3a8 [26364:syz-executor0][] do_loop_readv_writev+0x114/0x1b0 [26364:syz-executor0][] do_readv_writev+0x4f8/0x5e0 [26364:syz-executor0][] vfs_writev+0x7c/Oxb8 [26364:syz-executor0][] SyS_writev+Oxcc/0x208 [26364:syz-executor0][] elO_svc_naked +0x24/0x28 [26364:syz-executor0][name:report&] [26364:syz-executor0][name:report&]The buggy address belongs to the variable: [26364:syz-executor0][name:report&] kgdb_tty_line+Ox3f/0x40 [26364:syz-executor0][name:report&] [26364:syz-executor0][name:report&]Memory state around the buggy address: [26364:syz-executor0] ffffff900e44f800: 00 00 00 00 00 04 fa fa fa fa fa fa 00 fa fa fa [26364:syz-executor0] ffffff900e44f880: fa fa fa fa 00 fa fa fa fa fa fa fa 00 fa fa fa [26364:syz-executor0]> ffffff900e44f900: fa fa fa fa 04 fa fa fa fa fa fa fa 00 00 00 00 [26364:syz-executor0][name:report&] ^ [26364:syz-executor0] ffffff900e44f980: 00 fa fa fa fa fa fa fa 04 fa fa fa fa fa fa fa [26364:syz-executor0] ffffff900e44fa00: 04 fa fa fa fa fa fa fa 00 fa fa fa fa fa fa fa [26364:syz-executor0][name:report&] [26364:syz-executor0][name:panic&]Disabling lock debugging due to kernel taint [26364:syz-executor0]------------[cut here]------------ After checking the source code, we've found there might be an out-of-bounds access to "config[len - 1]" array when the variable "len" is zero. Signed-off-by: Macpaul Lin Acked-by: Daniel Thompson Cc: stable Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/kgdboc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c index c448225ef5ca2..f2b0d8cee8efc 100644 --- a/drivers/tty/serial/kgdboc.c +++ b/drivers/tty/serial/kgdboc.c @@ -232,7 +232,7 @@ static void kgdboc_put_char(u8 chr) static int param_set_kgdboc_var(const char *kmessage, struct kernel_param *kp) { - int len = strlen(kmessage); + size_t len = strlen(kmessage); if (len >= MAX_CONFIG_LEN) { printk(KERN_ERR "kgdboc: config string too long\n"); @@ -254,7 +254,7 @@ static int param_set_kgdboc_var(const char *kmessage, struct kernel_param *kp) strcpy(config, kmessage); /* Chop out \n char as a result of echo */ - if (config[len - 1] == '\n') + if (len && config[len - 1] == '\n') config[len - 1] = '\0'; if (configured == 1) -- GitLab From 4794d94b44c4903514a878b9097e99417a5e6dbe Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 24 Nov 2018 10:47:04 -0800 Subject: [PATCH 2093/2269] libnvdimm, pfn: Pad pfn namespaces relative to other regions commit ae86cbfef3818300f1972e52f67a93211acb0e24 upstream. Commit cfe30b872058 "libnvdimm, pmem: adjust for section collisions with 'System RAM'" enabled Linux to workaround occasions where platform firmware arranges for "System RAM" and "Persistent Memory" to collide within a single section boundary. Unfortunately, as reported in this issue [1], platform firmware can inflict the same collision between persistent memory regions. The approach of interrogating iomem_resource does not work in this case because platform firmware may merge multiple regions into a single iomem_resource range. Instead provide a method to interrogate regions that share the same parent bus. This is a stop-gap until the core-MM can grow support for hotplug on sub-section boundaries. [1]: https://github.com/pmem/ndctl/issues/76 Fixes: cfe30b872058 ("libnvdimm, pmem: adjust for section collisions with...") Cc: Reported-by: Patrick Geary Tested-by: Patrick Geary Reviewed-by: Vishal Verma Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/nd-core.h | 2 ++ drivers/nvdimm/pfn_devs.c | 64 +++++++++++++++++++++--------------- drivers/nvdimm/region_devs.c | 41 +++++++++++++++++++++++ 3 files changed, 80 insertions(+), 27 deletions(-) diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 86bc19ae30da3..f7b0c39ac3391 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -105,6 +105,8 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region, struct nd_mapping *nd_mapping, resource_size_t *overlap); resource_size_t nd_blk_available_dpa(struct nd_region *nd_region); resource_size_t nd_region_available_dpa(struct nd_region *nd_region); +int nd_region_conflict(struct nd_region *nd_region, resource_size_t start, + resource_size_t size); resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd, struct nd_label_id *label_id); int alias_dpa_busy(struct device *dev, void *data); diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 2adada1a58551..6d38191ff0daa 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -589,14 +589,47 @@ static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys) ALIGN_DOWN(phys, nd_pfn->align)); } +/* + * Check if pmem collides with 'System RAM', or other regions when + * section aligned. Trim it accordingly. + */ +static void trim_pfn_device(struct nd_pfn *nd_pfn, u32 *start_pad, u32 *end_trunc) +{ + struct nd_namespace_common *ndns = nd_pfn->ndns; + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + struct nd_region *nd_region = to_nd_region(nd_pfn->dev.parent); + const resource_size_t start = nsio->res.start; + const resource_size_t end = start + resource_size(&nsio->res); + resource_size_t adjust, size; + + *start_pad = 0; + *end_trunc = 0; + + adjust = start - PHYS_SECTION_ALIGN_DOWN(start); + size = resource_size(&nsio->res) + adjust; + if (region_intersects(start - adjust, size, IORESOURCE_SYSTEM_RAM, + IORES_DESC_NONE) == REGION_MIXED + || nd_region_conflict(nd_region, start - adjust, size)) + *start_pad = PHYS_SECTION_ALIGN_UP(start) - start; + + /* Now check that end of the range does not collide. */ + adjust = PHYS_SECTION_ALIGN_UP(end) - end; + size = resource_size(&nsio->res) + adjust; + if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, + IORES_DESC_NONE) == REGION_MIXED + || !IS_ALIGNED(end, nd_pfn->align) + || nd_region_conflict(nd_region, start, size + adjust)) + *end_trunc = end - phys_pmem_align_down(nd_pfn, end); +} + static int nd_pfn_init(struct nd_pfn *nd_pfn) { u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0; struct nd_namespace_common *ndns = nd_pfn->ndns; - u32 start_pad = 0, end_trunc = 0; + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); resource_size_t start, size; - struct nd_namespace_io *nsio; struct nd_region *nd_region; + u32 start_pad, end_trunc; struct nd_pfn_sb *pfn_sb; unsigned long npfns; phys_addr_t offset; @@ -628,30 +661,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) memset(pfn_sb, 0, sizeof(*pfn_sb)); - /* - * Check if pmem collides with 'System RAM' when section aligned and - * trim it accordingly - */ - nsio = to_nd_namespace_io(&ndns->dev); - start = PHYS_SECTION_ALIGN_DOWN(nsio->res.start); - size = resource_size(&nsio->res); - if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, - IORES_DESC_NONE) == REGION_MIXED) { - start = nsio->res.start; - start_pad = PHYS_SECTION_ALIGN_UP(start) - start; - } - - start = nsio->res.start; - size = PHYS_SECTION_ALIGN_UP(start + size) - start; - if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, - IORES_DESC_NONE) == REGION_MIXED - || !IS_ALIGNED(start + resource_size(&nsio->res), - nd_pfn->align)) { - size = resource_size(&nsio->res); - end_trunc = start + size - phys_pmem_align_down(nd_pfn, - start + size); - } - + trim_pfn_device(nd_pfn, &start_pad, &end_trunc); if (start_pad + end_trunc) dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n", dev_name(&ndns->dev), start_pad + end_trunc); @@ -662,7 +672,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) * implementation will limit the pfns advertised through * ->direct_access() to those that are included in the memmap. */ - start += start_pad; + start = nsio->res.start + start_pad; size = resource_size(&nsio->res); npfns = PFN_SECTION_ALIGN_UP((size - start_pad - end_trunc - SZ_8K) / PAGE_SIZE); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 050deb56ee62a..708043d20d0df 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -1112,6 +1112,47 @@ int nvdimm_has_cache(struct nd_region *nd_region) } EXPORT_SYMBOL_GPL(nvdimm_has_cache); +struct conflict_context { + struct nd_region *nd_region; + resource_size_t start, size; +}; + +static int region_conflict(struct device *dev, void *data) +{ + struct nd_region *nd_region; + struct conflict_context *ctx = data; + resource_size_t res_end, region_end, region_start; + + if (!is_memory(dev)) + return 0; + + nd_region = to_nd_region(dev); + if (nd_region == ctx->nd_region) + return 0; + + res_end = ctx->start + ctx->size; + region_start = nd_region->ndr_start; + region_end = region_start + nd_region->ndr_size; + if (ctx->start >= region_start && ctx->start < region_end) + return -EBUSY; + if (res_end > region_start && res_end <= region_end) + return -EBUSY; + return 0; +} + +int nd_region_conflict(struct nd_region *nd_region, resource_size_t start, + resource_size_t size) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev); + struct conflict_context ctx = { + .nd_region = nd_region, + .start = start, + .size = size, + }; + + return device_for_each_child(&nvdimm_bus->dev, &ctx, region_conflict); +} + void __exit nd_region_devs_exit(void) { ida_destroy(®ion_ida); -- GitLab From 199a81acf2bb66b5a640e6484aacd87bdd2a4854 Mon Sep 17 00:00:00 2001 From: Vasyl Vavrychuk Date: Thu, 18 Oct 2018 01:02:12 +0300 Subject: [PATCH 2094/2269] mac80211_hwsim: Timer should be initialized before device registered commit a1881c9b8a1edef0a5ae1d5c1b61406fe3402114 upstream. Otherwise if network manager starts configuring Wi-Fi interface immidiatelly after getting notification of its creation, we will get NULL pointer dereference: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] hrtimer_active+0x28/0x50 ... Call Trace: [] ? hrtimer_try_to_cancel+0x27/0x110 [] ? hrtimer_cancel+0x15/0x20 [] ? mac80211_hwsim_config+0x140/0x1c0 [mac80211_hwsim] Cc: stable@vger.kernel.org Signed-off-by: Vasyl Vavrychuk Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mac80211_hwsim.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 477f9f2f6626c..670224be3c8b4 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2698,6 +2698,10 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, wiphy_ext_feature_set(hw->wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST); + tasklet_hrtimer_init(&data->beacon_timer, + mac80211_hwsim_beacon, + CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + err = ieee80211_register_hw(hw); if (err < 0) { printk(KERN_DEBUG "mac80211_hwsim: ieee80211_register_hw failed (%d)\n", @@ -2722,10 +2726,6 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, data->debugfs, data, &hwsim_simulate_radar); - tasklet_hrtimer_init(&data->beacon_timer, - mac80211_hwsim_beacon, - CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - spin_lock_bh(&hwsim_radio_lock); list_add_tail(&data->list, &hwsim_radios); spin_unlock_bh(&hwsim_radio_lock); -- GitLab From 9e82abdd0c0341b880f510fc8b19464410912069 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Tue, 23 Oct 2018 13:36:52 -0700 Subject: [PATCH 2095/2269] mac80211: Clear beacon_int in ieee80211_do_stop commit 5c21e8100dfd57c806e833ae905e26efbb87840f upstream. This fixes stale beacon-int values that would keep a netdev from going up. To reproduce: Create two VAP on one radio. vap1 has beacon-int 100, start it. vap2 has beacon-int 240, start it (and it will fail because beacon-int mismatch). reconfigure vap2 to have beacon-int 100 and start it. It will fail because the stale beacon-int 240 will be used in the ifup path and hostapd never gets a chance to set the new beacon interval. Cc: stable@vger.kernel.org Signed-off-by: Ben Greear Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/iface.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index e4912858b72ca..222c063244f56 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1032,6 +1032,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, if (local->open_count == 0) ieee80211_clear_tx_pending(local); + sdata->vif.bss_conf.beacon_int = 0; + /* * If the interface goes down while suspended, presumably because * the device was unplugged and that happens before our resume, -- GitLab From a2c934fbbd43b5c349157c05374b9598f53a65f1 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 13 Nov 2018 20:32:13 +0100 Subject: [PATCH 2096/2269] mac80211: ignore tx status for PS stations in ieee80211_tx_status_ext commit a317e65face482371de30246b6494feb093ff7f9 upstream. Make it behave like regular ieee80211_tx_status calls, except for the lack of filtered frame processing. This fixes spurious low-ack triggered disconnections with powersave clients connected to an AP. Fixes: f027c2aca0cf4 ("mac80211: add ieee80211_tx_status_noskb") Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/status.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index bdf131ed5ce87..35912270087c9 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -953,6 +953,8 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, /* Track when last TDLS packet was ACKed */ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH)) sta->status_stats.last_tdls_pkt_time = jiffies; + } else if (test_sta_flag(sta, WLAN_STA_PS_STA)) { + return; } else { ieee80211_lost_packet(sta, info); } -- GitLab From bbc4242a9c2e3c97a5a8e7822b9c27915599f0c4 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 28 Nov 2018 22:39:16 +0100 Subject: [PATCH 2097/2269] mac80211: fix reordering of buffered broadcast packets commit 9ec1190d065998650fd9260dea8cf3e1f56c0e8c upstream. If the buffered broadcast queue contains packets, letting new packets bypass that queue can lead to heavy reordering, since the driver is probably throttling transmission of buffered multicast packets after beacons. Keep buffering packets until the buffer has been cleared (and no client is in powersave mode). Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/tx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index a17a56032a215..6b9bf9c027a2e 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -435,8 +435,8 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) if (ieee80211_hw_check(&tx->local->hw, QUEUE_CONTROL)) info->hw_queue = tx->sdata->vif.cab_queue; - /* no stations in PS mode */ - if (!atomic_read(&ps->num_sta_ps)) + /* no stations in PS mode and no buffered packets */ + if (!atomic_read(&ps->num_sta_ps) && skb_queue_empty(&ps->bc_buf)) return TX_CONTINUE; info->flags |= IEEE80211_TX_CTL_SEND_AFTER_DTIM; -- GitLab From 7b6d932725ec18c1d84892b73c9da4d624939955 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 3 Dec 2018 21:16:07 +0200 Subject: [PATCH 2098/2269] mac80211: ignore NullFunc frames in the duplicate detection commit 990d71846a0b7281bd933c34d734e6afc7408e7e upstream. NullFunc packets should never be duplicate just like QoS-NullFunc packets. We saw a client that enters / exits power save with NullFunc frames (and not with QoS-NullFunc) despite the fact that the association supports HT. This specific client also re-uses a non-zero sequence number for different NullFunc frames. At some point, the client had to send a retransmission of the NullFunc frame and we dropped it, leading to a misalignment in the power save state. Fix this by never consider a NullFunc frame as duplicate, just like we do for QoS NullFunc frames. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=201449 CC: Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/rx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index dddd498e13381..9e19ddbcb06ec 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1254,6 +1254,7 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx) return RX_CONTINUE; if (ieee80211_is_ctl(hdr->frame_control) || + ieee80211_is_nullfunc(hdr->frame_control) || ieee80211_is_qos_nullfunc(hdr->frame_control) || is_multicast_ether_addr(hdr->addr1)) return RX_CONTINUE; -- GitLab From 1bb538a39cf959009d4e424ea4e590a1f58b2ed6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 13 Dec 2018 09:18:54 +0100 Subject: [PATCH 2099/2269] Linux 4.14.88 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 322484348f3e6..3fdee40861a16 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 87 +SUBLEVEL = 88 EXTRAVERSION = NAME = Petit Gorille -- GitLab From da4b692991fc0f4ac74cb1546f52e67a8fe54801 Mon Sep 17 00:00:00 2001 From: Jiri Wiesner Date: Wed, 5 Dec 2018 16:55:29 +0100 Subject: [PATCH 2100/2269] ipv4: ipv6: netfilter: Adjust the frag mem limit when truesize changes [ Upstream commit ebaf39e6032faf77218220707fc3fa22487784e0 ] The *_frag_reasm() functions are susceptible to miscalculating the byte count of packet fragments in case the truesize of a head buffer changes. The truesize member may be changed by the call to skb_unclone(), leaving the fragment memory limit counter unbalanced even if all fragments are processed. This miscalculation goes unnoticed as long as the network namespace which holds the counter is not destroyed. Should an attempt be made to destroy a network namespace that holds an unbalanced fragment memory limit counter the cleanup of the namespace never finishes. The thread handling the cleanup gets stuck in inet_frags_exit_net() waiting for the percpu counter to reach zero. The thread is usually in running state with a stacktrace similar to: PID: 1073 TASK: ffff880626711440 CPU: 1 COMMAND: "kworker/u48:4" #5 [ffff880621563d48] _raw_spin_lock at ffffffff815f5480 #6 [ffff880621563d48] inet_evict_bucket at ffffffff8158020b #7 [ffff880621563d80] inet_frags_exit_net at ffffffff8158051c #8 [ffff880621563db0] ops_exit_list at ffffffff814f5856 #9 [ffff880621563dd8] cleanup_net at ffffffff814f67c0 #10 [ffff880621563e38] process_one_work at ffffffff81096f14 It is not possible to create new network namespaces, and processes that call unshare() end up being stuck in uninterruptible sleep state waiting to acquire the net_mutex. The bug was observed in the IPv6 netfilter code by Per Sundstrom. I thank him for his analysis of the problem. The parts of this patch that apply to IPv4 and IPv6 fragment reassembly are preemptive measures. Signed-off-by: Jiri Wiesner Reported-by: Per Sundstrom Acked-by: Peter Oskolkov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_fragment.c | 7 +++++++ net/ipv6/netfilter/nf_conntrack_reasm.c | 8 +++++++- net/ipv6/reassembly.c | 8 +++++++- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index cb8fa5d7afe17..f686d7761acb3 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -513,6 +513,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, struct rb_node *rbn; int len; int ihlen; + int delta; int err; u8 ecn; @@ -554,10 +555,16 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, if (len > 65535) goto out_oversize; + delta = - head->truesize; + /* Head of list must not be cloned. */ if (skb_unclone(head, GFP_ATOMIC)) goto out_nomem; + delta += head->truesize; + if (delta) + add_frag_mem_limit(qp->q.net, delta); + /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part * and the second, holding only fragments. */ diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 611d406c4656d..237fb04c67165 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -349,7 +349,7 @@ static bool nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) { struct sk_buff *fp, *head = fq->q.fragments; - int payload_len; + int payload_len, delta; u8 ecn; inet_frag_kill(&fq->q); @@ -371,10 +371,16 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic return false; } + delta = - head->truesize; + /* Head of list must not be cloned. */ if (skb_unclone(head, GFP_ATOMIC)) return false; + delta += head->truesize; + if (delta) + add_frag_mem_limit(fq->q.net, delta); + /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part * and the second, holding only fragments. */ diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index ede0061b6f5df..2a8c680b67cd0 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -348,7 +348,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, { struct net *net = container_of(fq->q.net, struct net, ipv6.frags); struct sk_buff *fp, *head = fq->q.fragments; - int payload_len; + int payload_len, delta; unsigned int nhoff; int sum_truesize; u8 ecn; @@ -389,10 +389,16 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, if (payload_len > IPV6_MAXPLEN) goto out_oversize; + delta = - head->truesize; + /* Head of list must not be cloned. */ if (skb_unclone(head, GFP_ATOMIC)) goto out_oom; + delta += head->truesize; + if (delta) + add_frag_mem_limit(fq->q.net, delta); + /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part * and the second, holding only fragments. */ -- GitLab From ece6a4b7efa8ec7764bda17ad37f46be5e141d16 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 6 Dec 2018 19:30:36 +0100 Subject: [PATCH 2101/2269] ipv6: Check available headroom in ip6_xmit() even without options [ Upstream commit 66033f47ca60294a95fc85ec3a3cc909dab7b765 ] Even if we send an IPv6 packet without options, MAX_HEADER might not be enough to account for the additional headroom required by alignment of hardware headers. On a configuration without HYPERV_NET, WLAN, AX25, and with IPV6_TUNNEL, sending short SCTP packets over IPv4 over L2TP over IPv6, we start with 100 bytes of allocated headroom in sctp_packet_transmit(), end up with 54 bytes after l2tp_xmit_skb(), and 14 bytes in ip6_finish_output2(). Those would be enough to append our 14 bytes header, but we're going to align that to 16 bytes, and write 2 bytes out of the allocated slab in neigh_hh_output(). KASan says: [ 264.967848] ================================================================== [ 264.967861] BUG: KASAN: slab-out-of-bounds in ip6_finish_output2+0x1aec/0x1c70 [ 264.967866] Write of size 16 at addr 000000006af1c7fe by task netperf/6201 [ 264.967870] [ 264.967876] CPU: 0 PID: 6201 Comm: netperf Not tainted 4.20.0-rc4+ #1 [ 264.967881] Hardware name: IBM 2827 H43 400 (z/VM 6.4.0) [ 264.967887] Call Trace: [ 264.967896] ([<00000000001347d6>] show_stack+0x56/0xa0) [ 264.967903] [<00000000017e379c>] dump_stack+0x23c/0x290 [ 264.967912] [<00000000007bc594>] print_address_description+0xf4/0x290 [ 264.967919] [<00000000007bc8fc>] kasan_report+0x13c/0x240 [ 264.967927] [<000000000162f5e4>] ip6_finish_output2+0x1aec/0x1c70 [ 264.967935] [<000000000163f890>] ip6_finish_output+0x430/0x7f0 [ 264.967943] [<000000000163fe44>] ip6_output+0x1f4/0x580 [ 264.967953] [<000000000163882a>] ip6_xmit+0xfea/0x1ce8 [ 264.967963] [<00000000017396e2>] inet6_csk_xmit+0x282/0x3f8 [ 264.968033] [<000003ff805fb0ba>] l2tp_xmit_skb+0xe02/0x13e0 [l2tp_core] [ 264.968037] [<000003ff80631192>] l2tp_eth_dev_xmit+0xda/0x150 [l2tp_eth] [ 264.968041] [<0000000001220020>] dev_hard_start_xmit+0x268/0x928 [ 264.968069] [<0000000001330e8e>] sch_direct_xmit+0x7ae/0x1350 [ 264.968071] [<000000000122359c>] __dev_queue_xmit+0x2b7c/0x3478 [ 264.968075] [<00000000013d2862>] ip_finish_output2+0xce2/0x11a0 [ 264.968078] [<00000000013d9b14>] ip_finish_output+0x56c/0x8c8 [ 264.968081] [<00000000013ddd1e>] ip_output+0x226/0x4c0 [ 264.968083] [<00000000013dbd6c>] __ip_queue_xmit+0x894/0x1938 [ 264.968100] [<000003ff80bc3a5c>] sctp_packet_transmit+0x29d4/0x3648 [sctp] [ 264.968116] [<000003ff80b7bf68>] sctp_outq_flush_ctrl.constprop.5+0x8d0/0xe50 [sctp] [ 264.968131] [<000003ff80b7c716>] sctp_outq_flush+0x22e/0x7d8 [sctp] [ 264.968146] [<000003ff80b35c68>] sctp_cmd_interpreter.isra.16+0x530/0x6800 [sctp] [ 264.968161] [<000003ff80b3410a>] sctp_do_sm+0x222/0x648 [sctp] [ 264.968177] [<000003ff80bbddac>] sctp_primitive_ASSOCIATE+0xbc/0xf8 [sctp] [ 264.968192] [<000003ff80b93328>] __sctp_connect+0x830/0xc20 [sctp] [ 264.968208] [<000003ff80bb11ce>] sctp_inet_connect+0x2e6/0x378 [sctp] [ 264.968212] [<0000000001197942>] __sys_connect+0x21a/0x450 [ 264.968215] [<000000000119aff8>] sys_socketcall+0x3d0/0xb08 [ 264.968218] [<000000000184ea7a>] system_call+0x2a2/0x2c0 [...] Just like ip_finish_output2() does for IPv4, check that we have enough headroom in ip6_xmit(), and reallocate it if we don't. This issue is older than git history. Reported-by: Jianlin Shi Signed-off-by: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_output.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 9ab1e0fcbc13c..7ca8264cbdf94 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -195,37 +195,37 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, const struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl6->daddr; struct dst_entry *dst = skb_dst(skb); + unsigned int head_room; struct ipv6hdr *hdr; u8 proto = fl6->flowi6_proto; int seg_len = skb->len; int hlimit = -1; u32 mtu; - if (opt) { - unsigned int head_room; + head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); + if (opt) + head_room += opt->opt_nflen + opt->opt_flen; - /* First: exthdrs may take lots of space (~8K for now) - MAX_HEADER is not enough. - */ - head_room = opt->opt_nflen + opt->opt_flen; - seg_len += head_room; - head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); - - if (skb_headroom(skb) < head_room) { - struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); - if (!skb2) { - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTDISCARDS); - kfree_skb(skb); - return -ENOBUFS; - } - if (skb->sk) - skb_set_owner_w(skb2, skb->sk); - consume_skb(skb); - skb = skb2; + if (unlikely(skb_headroom(skb) < head_room)) { + struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); + if (!skb2) { + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTDISCARDS); + kfree_skb(skb); + return -ENOBUFS; } + if (skb->sk) + skb_set_owner_w(skb2, skb->sk); + consume_skb(skb); + skb = skb2; + } + + if (opt) { + seg_len += opt->opt_nflen + opt->opt_flen; + if (opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); + if (opt->opt_nflen) ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop, &fl6->saddr); -- GitLab From 9380dc7c5101c0ac539c282babd5d3c91394b94a Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 6 Dec 2018 19:30:37 +0100 Subject: [PATCH 2102/2269] neighbour: Avoid writing before skb->head in neigh_hh_output() [ Upstream commit e6ac64d4c4d095085d7dd71cbd05704ac99829b2 ] While skb_push() makes the kernel panic if the skb headroom is less than the unaligned hardware header size, it will proceed normally in case we copy more than that because of alignment, and we'll silently corrupt adjacent slabs. In the case fixed by the previous patch, "ipv6: Check available headroom in ip6_xmit() even without options", we end up in neigh_hh_output() with 14 bytes headroom, 14 bytes hardware header and write 16 bytes, starting 2 bytes before the allocated buffer. Always check we're not writing before skb->head and, if the headroom is not enough, warn and drop the packet. v2: - instead of panicking with BUG_ON(), WARN_ON_ONCE() and drop the packet (Eric Dumazet) - if we avoid the panic, though, we need to explicitly check the headroom before the memcpy(), otherwise we'll have corrupted slabs on a running kernel, after we warn - use __skb_push() instead of skb_push(), as the headroom check is already implemented here explicitly (Eric Dumazet) Signed-off-by: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/neighbour.h | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index a964366a7ef52..393099b1901ad 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -452,6 +452,7 @@ static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb) { + unsigned int hh_alen = 0; unsigned int seq; unsigned int hh_len; @@ -459,16 +460,33 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb seq = read_seqbegin(&hh->hh_lock); hh_len = hh->hh_len; if (likely(hh_len <= HH_DATA_MOD)) { - /* this is inlined by gcc */ - memcpy(skb->data - HH_DATA_MOD, hh->hh_data, HH_DATA_MOD); + hh_alen = HH_DATA_MOD; + + /* skb_push() would proceed silently if we have room for + * the unaligned size but not for the aligned size: + * check headroom explicitly. + */ + if (likely(skb_headroom(skb) >= HH_DATA_MOD)) { + /* this is inlined by gcc */ + memcpy(skb->data - HH_DATA_MOD, hh->hh_data, + HH_DATA_MOD); + } } else { - unsigned int hh_alen = HH_DATA_ALIGN(hh_len); + hh_alen = HH_DATA_ALIGN(hh_len); - memcpy(skb->data - hh_alen, hh->hh_data, hh_alen); + if (likely(skb_headroom(skb) >= hh_alen)) { + memcpy(skb->data - hh_alen, hh->hh_data, + hh_alen); + } } } while (read_seqretry(&hh->hh_lock, seq)); - skb_push(skb, hh_len); + if (WARN_ON_ONCE(skb_headroom(skb) < hh_alen)) { + kfree_skb(skb); + return NET_XMIT_DROP; + } + + __skb_push(skb, hh_len); return dev_queue_xmit(skb); } -- GitLab From 25f111f2432d69a5014503dc27cf729d6adea285 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Fri, 7 Dec 2018 09:50:17 +0200 Subject: [PATCH 2103/2269] ipv6: sr: properly initialize flowi6 prior passing to ip6_route_output [ Upstream commit 1b4e5ad5d6b9f15cd0b5121f86d4719165958417 ] In 'seg6_output', stack variable 'struct flowi6 fl6' was missing initialization. Fixes: 6c8702c60b88 ("ipv6: sr: add support for SRH encapsulation and injection with lwtunnels") Signed-off-by: Shmulik Ladkani Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/seg6_iptunnel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index bf4763fd68c22..cf9342bfe95a3 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -327,6 +327,7 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) struct ipv6hdr *hdr = ipv6_hdr(skb); struct flowi6 fl6; + memset(&fl6, 0, sizeof(fl6)); fl6.daddr = hdr->daddr; fl6.saddr = hdr->saddr; fl6.flowlabel = ip6_flowinfo(hdr); -- GitLab From 4c65c73cf124dedd9bb64374851f7466d5704750 Mon Sep 17 00:00:00 2001 From: Su Yanjun Date: Mon, 3 Dec 2018 15:33:07 +0800 Subject: [PATCH 2104/2269] net: 8139cp: fix a BUG triggered by changing mtu with network traffic [ Upstream commit a5d4a89245ead1f37ed135213653c5beebea4237 ] When changing mtu many times with traffic, a bug is triggered: [ 1035.684037] kernel BUG at lib/dynamic_queue_limits.c:26! [ 1035.684042] invalid opcode: 0000 [#1] SMP [ 1035.684049] Modules linked in: loop binfmt_misc 8139cp(OE) macsec tcp_diag udp_diag inet_diag unix_diag af_packet_diag netlink_diag tcp_lp fuse uinput xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 tun bridge stp llc ebtable_filter ebtables ip6table_filter devlink ip6_tables iptable_filter sunrpc snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep ppdev snd_seq iosf_mbi crc32_pclmul parport_pc snd_seq_device ghash_clmulni_intel parport snd_pcm aesni_intel joydev lrw snd_timer virtio_balloon sg gf128mul glue_helper ablk_helper cryptd snd soundcore i2c_piix4 pcspkr ip_tables xfs libcrc32c sr_mod sd_mod cdrom crc_t10dif crct10dif_generic ata_generic [ 1035.684102] pata_acpi virtio_console qxl drm_kms_helper syscopyarea sysfillrect sysimgblt floppy fb_sys_fops crct10dif_pclmul crct10dif_common ttm crc32c_intel serio_raw ata_piix drm libata 8139too virtio_pci drm_panel_orientation_quirks virtio_ring virtio mii dm_mirror dm_region_hash dm_log dm_mod [last unloaded: 8139cp] [ 1035.684132] CPU: 9 PID: 25140 Comm: if-mtu-change Kdump: loaded Tainted: G OE ------------ T 3.10.0-957.el7.x86_64 #1 [ 1035.684134] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 [ 1035.684136] task: ffff8f59b1f5a080 ti: ffff8f5a2e32c000 task.ti: ffff8f5a2e32c000 [ 1035.684149] RIP: 0010:[] [] dql_completed+0x180/0x190 [ 1035.684162] RSP: 0000:ffff8f5a75483e50 EFLAGS: 00010093 [ 1035.684162] RAX: 00000000000000c2 RBX: ffff8f5a6f91c000 RCX: 0000000000000000 [ 1035.684162] RDX: 0000000000000000 RSI: 0000000000000184 RDI: ffff8f599fea3ec0 [ 1035.684162] RBP: ffff8f5a75483ea8 R08: 00000000000000c2 R09: 0000000000000000 [ 1035.684162] R10: 00000000000616ef R11: ffff8f5a75483b56 R12: ffff8f599fea3e00 [ 1035.684162] R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000184 [ 1035.684162] FS: 00007fa8434de740(0000) GS:ffff8f5a75480000(0000) knlGS:0000000000000000 [ 1035.684162] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1035.684162] CR2: 00000000004305d0 CR3: 000000024eb66000 CR4: 00000000001406e0 [ 1035.684162] Call Trace: [ 1035.684162] [ 1035.684162] [] ? cp_interrupt+0x478/0x580 [8139cp] [ 1035.684162] [] __handle_irq_event_percpu+0x44/0x1c0 [ 1035.684162] [] handle_irq_event_percpu+0x32/0x80 [ 1035.684162] [] handle_irq_event+0x3c/0x60 [ 1035.684162] [] handle_fasteoi_irq+0x59/0x110 [ 1035.684162] [] handle_irq+0xe4/0x1a0 [ 1035.684162] [] do_IRQ+0x4d/0xf0 [ 1035.684162] [] common_interrupt+0x162/0x162 [ 1035.684162] [ 1035.684162] [] ? __wake_up_bit+0x24/0x70 [ 1035.684162] [] ? do_set_pte+0xd5/0x120 [ 1035.684162] [] unlock_page+0x2b/0x30 [ 1035.684162] [] do_read_fault.isra.61+0x139/0x1b0 [ 1035.684162] [] handle_pte_fault+0x2f4/0xd10 [ 1035.684162] [] handle_mm_fault+0x39d/0x9b0 [ 1035.684162] [] __do_page_fault+0x203/0x500 [ 1035.684162] [] trace_do_page_fault+0x56/0x150 [ 1035.684162] [] do_async_page_fault+0x22/0xf0 [ 1035.684162] [] async_page_fault+0x28/0x30 [ 1035.684162] Code: 54 c7 47 54 ff ff ff ff 44 0f 49 ce 48 8b 35 48 2f 9c 00 48 89 77 58 e9 fe fe ff ff 0f 1f 80 00 00 00 00 41 89 d1 e9 ef fe ff ff <0f> 0b 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 55 8d 42 ff 48 [ 1035.684162] RIP [] dql_completed+0x180/0x190 [ 1035.684162] RSP It's not the same as in 7fe0ee09 patch described. As 8139cp uses shared irq mode, other device irq will trigger cp_interrupt to execute. cp_change_mtu -> cp_close -> cp_open In cp_close routine just before free_irq(), some interrupt may occur. In my environment, cp_interrupt exectutes and IntrStatus is 0x4, exactly TxOk. That will cause cp_tx to wake device queue. As device queue is started, cp_start_xmit and cp_open will run at same time which will cause kernel BUG. For example: [#] for tx descriptor At start: [#][#][#] num_queued=3 After cp_init_hw->cp_start_hw->netdev_reset_queue: [#][#][#] num_queued=0 When 8139cp starts to work then cp_tx will check num_queued mismatchs the complete_bytes. The patch will check IntrMask before check IntrStatus in cp_interrupt. When 8139cp interrupt is disabled, just return. Signed-off-by: Su Yanjun Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/8139cp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index e7ab23e87de2f..d1e88712a2758 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -571,6 +571,7 @@ static irqreturn_t cp_interrupt (int irq, void *dev_instance) struct cp_private *cp; int handled = 0; u16 status; + u16 mask; if (unlikely(dev == NULL)) return IRQ_NONE; @@ -578,6 +579,10 @@ static irqreturn_t cp_interrupt (int irq, void *dev_instance) spin_lock(&cp->lock); + mask = cpr16(IntrMask); + if (!mask) + goto out_unlock; + status = cpr16(IntrStatus); if (!status || (status == 0xFFFF)) goto out_unlock; -- GitLab From 76f67ede4df921f5e4f0c02129a6407c977cab57 Mon Sep 17 00:00:00 2001 From: Tarick Bedeir Date: Fri, 7 Dec 2018 00:30:26 -0800 Subject: [PATCH 2105/2269] net/mlx4_core: Correctly set PFC param if global pause is turned off. [ Upstream commit bd5122cd1e0644d8bd8dd84517c932773e999766 ] rx_ppp and tx_ppp can be set between 0 and 255, so don't clamp to 1. Fixes: 6e8814ceb7e8 ("net/mlx4_en: Fix mixed PFC and Global pause user control requests") Signed-off-by: Tarick Bedeir Reviewed-by: Eran Ben Elisha Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 5fe56dc4cfae0..5363cee88a0a4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1070,8 +1070,8 @@ static int mlx4_en_set_pauseparam(struct net_device *dev, tx_pause = !!(pause->tx_pause); rx_pause = !!(pause->rx_pause); - rx_ppp = priv->prof->rx_ppp && !(tx_pause || rx_pause); - tx_ppp = priv->prof->tx_ppp && !(tx_pause || rx_pause); + rx_ppp = (tx_pause || rx_pause) ? 0 : priv->prof->rx_ppp; + tx_ppp = (tx_pause || rx_pause) ? 0 : priv->prof->tx_ppp; err = mlx4_SET_PORT_general(mdev->dev, priv->port, priv->rx_skb_size + ETH_FCS_LEN, -- GitLab From 583e170ce81654554243790ac2e340ebe2180d14 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Sun, 2 Dec 2018 14:34:36 +0200 Subject: [PATCH 2106/2269] net/mlx4_en: Change min MTU size to ETH_MIN_MTU [ Upstream commit 24be19e47779d604d1492c114459dca9a92acf78 ] NIC driver minimal MTU size shall be set to ETH_MIN_MTU, as defined in the RFC791 and in the network stack. Remove old mlx4_en only define for it, which was set to wrong value. Fixes: b80f71f5816f ("ethernet/mellanox: use core min/max MTU checking") Signed-off-by: Eran Ben Elisha Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 4 ++-- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index faa4bd21f148a..0fb85d71c11b5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -3505,8 +3505,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, dev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; } - /* MTU range: 46 - hw-specific max */ - dev->min_mtu = MLX4_EN_MIN_MTU; + /* MTU range: 68 - hw-specific max */ + dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = priv->max_mtu; mdev->pndev[port] = dev; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 09f4764a3f392..bdd87438a354f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -157,7 +157,6 @@ #define HEADER_COPY_SIZE (128 - NET_IP_ALIGN) #define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETH_HLEN) -#define MLX4_EN_MIN_MTU 46 /* VLAN_HLEN is added twice,to support skb vlan tagged with multiple * headers. (For example: ETH_P_8021Q and ETH_P_8021AD). */ -- GitLab From e44c8cbd09da1c3a9f2c752492b8efa3a53487f9 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 3 Dec 2018 08:19:33 +0100 Subject: [PATCH 2107/2269] net: phy: don't allow __set_phy_supported to add unsupported modes [ Upstream commit d2a36971ef595069b7a600d1144c2e0881a930a1 ] Currently __set_phy_supported allows to add modes w/o checking whether the PHY supports them. This is wrong, it should never add modes but only remove modes we don't want to support. The commit marked as fixed didn't do anything wrong, it just copied existing functionality to the helper which is being fixed now. Fixes: f3a6bd393c2c ("phylib: Add phy_set_max_speed helper") Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy_device.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index fe76e2c4022af..5b56a86e88ffc 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1703,20 +1703,17 @@ EXPORT_SYMBOL(genphy_loopback); static int __set_phy_supported(struct phy_device *phydev, u32 max_speed) { - phydev->supported &= ~(PHY_1000BT_FEATURES | PHY_100BT_FEATURES | - PHY_10BT_FEATURES); - switch (max_speed) { - default: - return -ENOTSUPP; - case SPEED_1000: - phydev->supported |= PHY_1000BT_FEATURES; + case SPEED_10: + phydev->supported &= ~PHY_100BT_FEATURES; /* fall through */ case SPEED_100: - phydev->supported |= PHY_100BT_FEATURES; - /* fall through */ - case SPEED_10: - phydev->supported |= PHY_10BT_FEATURES; + phydev->supported &= ~PHY_1000BT_FEATURES; + break; + case SPEED_1000: + break; + default: + return -ENOTSUPP; } return 0; -- GitLab From 56ed9f33e03f0a4070a8f3aba9d1ccc443d180a8 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Thu, 29 Nov 2018 16:01:04 -0800 Subject: [PATCH 2108/2269] net: Prevent invalid access to skb->prev in __qdisc_drop_all [ Upstream commit 9410d386d0a829ace9558336263086c2fbbe8aed ] __qdisc_drop_all() accesses skb->prev to get to the tail of the segment-list. With commit 68d2f84a1368 ("net: gro: properly remove skb from list") the skb-list handling has been changed to set skb->next to NULL and set the list-poison on skb->prev. With that change, __qdisc_drop_all() will panic when it tries to dereference skb->prev. Since commit 992cba7e276d ("net: Add and use skb_list_del_init().") __list_del_entry is used, leaving skb->prev unchanged (thus, pointing to the list-head if it's the first skb of the list). This will make __qdisc_drop_all modify the next-pointer of the list-head and result in a panic later on: [ 34.501053] general protection fault: 0000 [#1] SMP KASAN PTI [ 34.501968] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.20.0-rc2.mptcp #108 [ 34.502887] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.5.1 01/01/2011 [ 34.504074] RIP: 0010:dev_gro_receive+0x343/0x1f90 [ 34.504751] Code: e0 48 c1 e8 03 42 80 3c 30 00 0f 85 4a 1c 00 00 4d 8b 24 24 4c 39 65 d0 0f 84 0a 04 00 00 49 8d 7c 24 38 48 89 f8 48 c1 e8 03 <42> 0f b6 04 30 84 c0 74 08 3c 04 [ 34.507060] RSP: 0018:ffff8883af507930 EFLAGS: 00010202 [ 34.507761] RAX: 0000000000000007 RBX: ffff8883970b2c80 RCX: 1ffff11072e165a6 [ 34.508640] RDX: 1ffff11075867008 RSI: ffff8883ac338040 RDI: 0000000000000038 [ 34.509493] RBP: ffff8883af5079d0 R08: ffff8883970b2d40 R09: 0000000000000062 [ 34.510346] R10: 0000000000000034 R11: 0000000000000000 R12: 0000000000000000 [ 34.511215] R13: 0000000000000000 R14: dffffc0000000000 R15: ffff8883ac338008 [ 34.512082] FS: 0000000000000000(0000) GS:ffff8883af500000(0000) knlGS:0000000000000000 [ 34.513036] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 34.513741] CR2: 000055ccc3e9d020 CR3: 00000003abf32000 CR4: 00000000000006e0 [ 34.514593] Call Trace: [ 34.514893] [ 34.515157] napi_gro_receive+0x93/0x150 [ 34.515632] receive_buf+0x893/0x3700 [ 34.516094] ? __netif_receive_skb+0x1f/0x1a0 [ 34.516629] ? virtnet_probe+0x1b40/0x1b40 [ 34.517153] ? __stable_node_chain+0x4d0/0x850 [ 34.517684] ? kfree+0x9a/0x180 [ 34.518067] ? __kasan_slab_free+0x171/0x190 [ 34.518582] ? detach_buf+0x1df/0x650 [ 34.519061] ? lapic_next_event+0x5a/0x90 [ 34.519539] ? virtqueue_get_buf_ctx+0x280/0x7f0 [ 34.520093] virtnet_poll+0x2df/0xd60 [ 34.520533] ? receive_buf+0x3700/0x3700 [ 34.521027] ? qdisc_watchdog_schedule_ns+0xd5/0x140 [ 34.521631] ? htb_dequeue+0x1817/0x25f0 [ 34.522107] ? sch_direct_xmit+0x142/0xf30 [ 34.522595] ? virtqueue_napi_schedule+0x26/0x30 [ 34.523155] net_rx_action+0x2f6/0xc50 [ 34.523601] ? napi_complete_done+0x2f0/0x2f0 [ 34.524126] ? kasan_check_read+0x11/0x20 [ 34.524608] ? _raw_spin_lock+0x7d/0xd0 [ 34.525070] ? _raw_spin_lock_bh+0xd0/0xd0 [ 34.525563] ? kvm_guest_apic_eoi_write+0x6b/0x80 [ 34.526130] ? apic_ack_irq+0x9e/0xe0 [ 34.526567] __do_softirq+0x188/0x4b5 [ 34.527015] irq_exit+0x151/0x180 [ 34.527417] do_IRQ+0xdb/0x150 [ 34.527783] common_interrupt+0xf/0xf [ 34.528223] This patch makes sure that skb->prev is set to NULL when entering netem_enqueue. Cc: Prashant Bhole Cc: Tyler Hicks Cc: Eric Dumazet Fixes: 68d2f84a1368 ("net: gro: properly remove skb from list") Suggested-by: Eric Dumazet Signed-off-by: Christoph Paasch Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_netem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 3d325b8408022..3f4f0b946798a 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -436,6 +436,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, int count = 1; int rc = NET_XMIT_SUCCESS; + /* Do not fool qdisc_drop_all() */ + skb->prev = NULL; + /* Random duplication */ if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) ++count; -- GitLab From 22fa15e6f5b7b0cdf5ccd6beb5e2bf48aa6ac306 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 4 Dec 2018 09:40:35 -0800 Subject: [PATCH 2109/2269] rtnetlink: ndo_dflt_fdb_dump() only work for ARPHRD_ETHER devices [ Upstream commit 688838934c231bb08f46db687e57f6d8bf82709c ] kmsan was able to trigger a kernel-infoleak using a gre device [1] nlmsg_populate_fdb_fill() has a hard coded assumption that dev->addr_len is ETH_ALEN, as normally guaranteed for ARPHRD_ETHER devices. A similar issue was fixed recently in commit da71577545a5 ("rtnetlink: Disallow FDB configuration for non-Ethernet device") [1] BUG: KMSAN: kernel-infoleak in copyout lib/iov_iter.c:143 [inline] BUG: KMSAN: kernel-infoleak in _copy_to_iter+0x4c0/0x2700 lib/iov_iter.c:576 CPU: 0 PID: 6697 Comm: syz-executor310 Not tainted 4.20.0-rc3+ #95 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x32d/0x480 lib/dump_stack.c:113 kmsan_report+0x12c/0x290 mm/kmsan/kmsan.c:683 kmsan_internal_check_memory+0x32a/0xa50 mm/kmsan/kmsan.c:743 kmsan_copy_to_user+0x78/0xd0 mm/kmsan/kmsan_hooks.c:634 copyout lib/iov_iter.c:143 [inline] _copy_to_iter+0x4c0/0x2700 lib/iov_iter.c:576 copy_to_iter include/linux/uio.h:143 [inline] skb_copy_datagram_iter+0x4e2/0x1070 net/core/datagram.c:431 skb_copy_datagram_msg include/linux/skbuff.h:3316 [inline] netlink_recvmsg+0x6f9/0x19d0 net/netlink/af_netlink.c:1975 sock_recvmsg_nosec net/socket.c:794 [inline] sock_recvmsg+0x1d1/0x230 net/socket.c:801 ___sys_recvmsg+0x444/0xae0 net/socket.c:2278 __sys_recvmsg net/socket.c:2327 [inline] __do_sys_recvmsg net/socket.c:2337 [inline] __se_sys_recvmsg+0x2fa/0x450 net/socket.c:2334 __x64_sys_recvmsg+0x4a/0x70 net/socket.c:2334 do_syscall_64+0xcf/0x110 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 RIP: 0033:0x441119 Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 db 0a fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fffc7f008a8 EFLAGS: 00000207 ORIG_RAX: 000000000000002f RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000441119 RDX: 0000000000000040 RSI: 00000000200005c0 RDI: 0000000000000003 RBP: 00000000006cc018 R08: 0000000000000100 R09: 0000000000000100 R10: 0000000000000100 R11: 0000000000000207 R12: 0000000000402080 R13: 0000000000402110 R14: 0000000000000000 R15: 0000000000000000 Uninit was stored to memory at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:246 [inline] kmsan_save_stack mm/kmsan/kmsan.c:261 [inline] kmsan_internal_chain_origin+0x13d/0x240 mm/kmsan/kmsan.c:469 kmsan_memcpy_memmove_metadata+0x1a9/0xf70 mm/kmsan/kmsan.c:344 kmsan_memcpy_metadata+0xb/0x10 mm/kmsan/kmsan.c:362 __msan_memcpy+0x61/0x70 mm/kmsan/kmsan_instr.c:162 __nla_put lib/nlattr.c:744 [inline] nla_put+0x20a/0x2d0 lib/nlattr.c:802 nlmsg_populate_fdb_fill+0x444/0x810 net/core/rtnetlink.c:3466 nlmsg_populate_fdb net/core/rtnetlink.c:3775 [inline] ndo_dflt_fdb_dump+0x73a/0x960 net/core/rtnetlink.c:3807 rtnl_fdb_dump+0x1318/0x1cb0 net/core/rtnetlink.c:3979 netlink_dump+0xc79/0x1c90 net/netlink/af_netlink.c:2244 __netlink_dump_start+0x10c4/0x11d0 net/netlink/af_netlink.c:2352 netlink_dump_start include/linux/netlink.h:216 [inline] rtnetlink_rcv_msg+0x141b/0x1540 net/core/rtnetlink.c:4910 netlink_rcv_skb+0x394/0x640 net/netlink/af_netlink.c:2477 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4965 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0x1699/0x1740 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x13c7/0x1440 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xe3b/0x1240 net/socket.c:2116 __sys_sendmsg net/socket.c:2154 [inline] __do_sys_sendmsg net/socket.c:2163 [inline] __se_sys_sendmsg+0x305/0x460 net/socket.c:2161 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161 do_syscall_64+0xcf/0x110 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:246 [inline] kmsan_internal_poison_shadow+0x6d/0x130 mm/kmsan/kmsan.c:170 kmsan_kmalloc+0xa1/0x100 mm/kmsan/kmsan_hooks.c:186 __kmalloc+0x14c/0x4d0 mm/slub.c:3825 kmalloc include/linux/slab.h:551 [inline] __hw_addr_create_ex net/core/dev_addr_lists.c:34 [inline] __hw_addr_add_ex net/core/dev_addr_lists.c:80 [inline] __dev_mc_add+0x357/0x8a0 net/core/dev_addr_lists.c:670 dev_mc_add+0x6d/0x80 net/core/dev_addr_lists.c:687 ip_mc_filter_add net/ipv4/igmp.c:1128 [inline] igmp_group_added+0x4d4/0xb80 net/ipv4/igmp.c:1311 __ip_mc_inc_group+0xea9/0xf70 net/ipv4/igmp.c:1444 ip_mc_inc_group net/ipv4/igmp.c:1453 [inline] ip_mc_up+0x1c3/0x400 net/ipv4/igmp.c:1775 inetdev_event+0x1d03/0x1d80 net/ipv4/devinet.c:1522 notifier_call_chain kernel/notifier.c:93 [inline] __raw_notifier_call_chain kernel/notifier.c:394 [inline] raw_notifier_call_chain+0x13d/0x240 kernel/notifier.c:401 __dev_notify_flags+0x3da/0x860 net/core/dev.c:1733 dev_change_flags+0x1ac/0x230 net/core/dev.c:7569 do_setlink+0x165f/0x5ea0 net/core/rtnetlink.c:2492 rtnl_newlink+0x2ad7/0x35a0 net/core/rtnetlink.c:3111 rtnetlink_rcv_msg+0x1148/0x1540 net/core/rtnetlink.c:4947 netlink_rcv_skb+0x394/0x640 net/netlink/af_netlink.c:2477 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4965 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0x1699/0x1740 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x13c7/0x1440 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xe3b/0x1240 net/socket.c:2116 __sys_sendmsg net/socket.c:2154 [inline] __do_sys_sendmsg net/socket.c:2163 [inline] __se_sys_sendmsg+0x305/0x460 net/socket.c:2161 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161 do_syscall_64+0xcf/0x110 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 Bytes 36-37 of 105 are uninitialized Memory access of size 105 starts at ffff88819686c000 Data copied to user address 0000000020000380 Fixes: d83b06036048 ("net: add fdb generic dump routine") Signed-off-by: Eric Dumazet Cc: John Fastabend Cc: Ido Schimmel Cc: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c392a77ff7881..925af6b430179 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3280,6 +3280,9 @@ int ndo_dflt_fdb_dump(struct sk_buff *skb, { int err; + if (dev->type != ARPHRD_ETHER) + return -EINVAL; + netif_addr_lock_bh(dev); err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->uc); if (err) -- GitLab From 862b5ab951551cd14ee13c8ab0bde21b63b93bd6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 1 Dec 2018 01:36:59 +0800 Subject: [PATCH 2110/2269] sctp: kfree_rcu asoc [ Upstream commit fb6df5a6234c38a9c551559506a49a677ac6f07a ] In sctp_hash_transport/sctp_epaddr_lookup_transport, it dereferences a transport's asoc under rcu_read_lock while asoc is freed not after a grace period, which leads to a use-after-free panic. This patch fixes it by calling kfree_rcu to make asoc be freed after a grace period. Note that only the asoc's memory is delayed to free in the patch, it won't cause sk to linger longer. Thanks Neil and Marcelo to make this clear. Fixes: 7fda702f9315 ("sctp: use new rhlist interface on sctp transport rhashtable") Fixes: cd2b70875058 ("sctp: check duplicate node before inserting a new transport") Reported-by: syzbot+0b05d8aa7cb185107483@syzkaller.appspotmail.com Reported-by: syzbot+aad231d51b1923158444@syzkaller.appspotmail.com Suggested-by: Neil Horman Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sctp/structs.h | 2 ++ net/sctp/associola.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 8e1e1dc490fd9..94c775773f58c 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1902,6 +1902,8 @@ struct sctp_association { __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; + + struct rcu_head rcu; }; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 4982b31fec8ef..23fec3817e0cc 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -432,7 +432,7 @@ static void sctp_association_destroy(struct sctp_association *asoc) WARN_ON(atomic_read(&asoc->rmem_alloc)); - kfree(asoc); + kfree_rcu(asoc, rcu); SCTP_DBG_OBJCNT_DEC(assoc); } -- GitLab From 5567e5fefaa904d051ce03a330183125f92d2d37 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Dec 2018 14:24:31 -0800 Subject: [PATCH 2111/2269] tcp: Do not underestimate rwnd_limited [ Upstream commit 41727549de3e7281feb174d568c6e46823db8684 ] If available rwnd is too small, tcp_tso_should_defer() can decide it is worth waiting before splitting a TSO packet. This really means we are rwnd limited. Fixes: 5615f88614a4 ("tcp: instrument how long TCP is limited by receive window") Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b2ead31afcbab..12cd64382768d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2328,8 +2328,11 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, } else { if (!push_one && tcp_tso_should_defer(sk, skb, &is_cwnd_limited, - max_segs)) + max_segs)) { + if (!is_cwnd_limited) + is_rwnd_limited = true; break; + } } limit = mss_now; -- GitLab From 6293016fbd9ba1b5ecc7d1afe67fb359d4c05512 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 5 Dec 2018 14:38:38 -0800 Subject: [PATCH 2112/2269] tcp: fix NULL ref in tail loss probe [ Upstream commit b2b7af861122a0c0f6260155c29a1b2e594cd5b5 ] TCP loss probe timer may fire when the retranmission queue is empty but has a non-zero tp->packets_out counter. tcp_send_loss_probe will call tcp_rearm_rto which triggers NULL pointer reference by fetching the retranmission queue head in its sub-routines. Add a more detailed warning to help catch the root cause of the inflight accounting inconsistency. Reported-by: Rafael Tinoco Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 12cd64382768d..1b31b0a1c7faf 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2476,14 +2476,18 @@ void tcp_send_loss_probe(struct sock *sk) skb = tcp_write_queue_tail(sk); } + if (unlikely(!skb)) { + WARN_ONCE(tp->packets_out, + "invalid inflight: %u state %u cwnd %u mss %d\n", + tp->packets_out, sk->sk_state, tp->snd_cwnd, mss); + inet_csk(sk)->icsk_pending = 0; + return; + } + /* At most one outstanding TLP retransmission. */ if (tp->tlp_high_seq) goto rearm_timer; - /* Retransmit last segment. */ - if (WARN_ON(!skb)) - goto rearm_timer; - if (skb_still_in_host_queue(sk, skb)) goto rearm_timer; -- GitLab From 1d2cda4e8ffef837ff0f780633d536eb233a14d5 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 29 Nov 2018 14:45:39 +0100 Subject: [PATCH 2113/2269] tun: forbid iface creation with rtnl ops [ Upstream commit 35b827b6d06199841a83839e8bb69c0cd13a28be ] It's not supported right now (the goal of the initial patch was to support 'ip link del' only). Before the patch: $ ip link add foo type tun [ 239.632660] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 [snip] [ 239.636410] RIP: 0010:register_netdevice+0x8e/0x3a0 This panic occurs because dev->netdev_ops is not set by tun_setup(). But to have something usable, it will require more than just setting netdev_ops. Fixes: f019a7a594d9 ("tun: Implement ip link del tunXXX") CC: Eric W. Biederman Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 0a008d136aae5..2956bb6cda72a 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1818,9 +1818,9 @@ static void tun_setup(struct net_device *dev) static int tun_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - if (!data) - return 0; - return -EINVAL; + NL_SET_ERR_MSG(extack, + "tun/tap creation via rtnetlink is not supported."); + return -EOPNOTSUPP; } static struct rtnl_link_ops tun_link_ops __read_mostly = { -- GitLab From 520e8609ecf23a3a3ac4ba88f084cb11d00f082f Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 29 Nov 2018 13:53:16 +0800 Subject: [PATCH 2114/2269] virtio-net: keep vnet header zeroed after processing XDP [ Upstream commit 436c9453a1ac0944b82870ef2e0d9be956b396d9 ] We copy vnet header unconditionally in page_to_skb() this is wrong since XDP may modify the packet data. So let's keep a zeroed vnet header for not confusing the conversion between vnet header and skb metadata. In the future, we should able to detect whether or not the packet was modified and keep using the vnet header when packet was not touched. Fixes: f600b6905015 ("virtio_net: Add XDP support") Reported-by: Pavel Popa Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/virtio_net.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 0e8e3be503327..215696f21d676 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -309,7 +309,8 @@ static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) static struct sk_buff *page_to_skb(struct virtnet_info *vi, struct receive_queue *rq, struct page *page, unsigned int offset, - unsigned int len, unsigned int truesize) + unsigned int len, unsigned int truesize, + bool hdr_valid) { struct sk_buff *skb; struct virtio_net_hdr_mrg_rxbuf *hdr; @@ -331,7 +332,8 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, else hdr_padded_len = sizeof(struct padded_vnet_hdr); - memcpy(hdr, p, hdr_len); + if (hdr_valid) + memcpy(hdr, p, hdr_len); len -= hdr_len; offset += hdr_padded_len; @@ -594,7 +596,8 @@ static struct sk_buff *receive_big(struct net_device *dev, unsigned int len) { struct page *page = buf; - struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE); + struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, + PAGE_SIZE, true); if (unlikely(!skb)) goto err; @@ -678,7 +681,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, rcu_read_unlock(); put_page(page); head_skb = page_to_skb(vi, rq, xdp_page, - offset, len, PAGE_SIZE); + offset, len, + PAGE_SIZE, false); ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); return head_skb; } @@ -712,7 +716,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, goto err_skb; } - head_skb = page_to_skb(vi, rq, page, offset, len, truesize); + head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog); curr_skb = head_skb; if (unlikely(!curr_skb)) -- GitLab From 635e16fc7402e663c03f45e92df5aa6acdf079fc Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 17 Oct 2018 17:54:00 -0700 Subject: [PATCH 2115/2269] ARM: OMAP2+: prm44xx: Fix section annotation on omap44xx_prm_enable_io_wakeup [ Upstream commit eef3dc34a1e0b01d53328b88c25237bcc7323777 ] When building the kernel with Clang, the following section mismatch warning appears: WARNING: vmlinux.o(.text+0x38b3c): Section mismatch in reference from the function omap44xx_prm_late_init() to the function .init.text:omap44xx_prm_enable_io_wakeup() The function omap44xx_prm_late_init() references the function __init omap44xx_prm_enable_io_wakeup(). This is often because omap44xx_prm_late_init lacks a __init annotation or the annotation of omap44xx_prm_enable_io_wakeup is wrong. Remove the __init annotation from omap44xx_prm_enable_io_wakeup so there is no more mismatch. Signed-off-by: Nathan Chancellor Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/mach-omap2/prm44xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/prm44xx.c b/arch/arm/mach-omap2/prm44xx.c index 1c0c1663f078a..5affa9f5300bc 100644 --- a/arch/arm/mach-omap2/prm44xx.c +++ b/arch/arm/mach-omap2/prm44xx.c @@ -344,7 +344,7 @@ static void omap44xx_prm_reconfigure_io_chain(void) * to occur, WAKEUPENABLE bits must be set in the pad mux registers, and * omap44xx_prm_reconfigure_io_chain() must be called. No return value. */ -static void __init omap44xx_prm_enable_io_wakeup(void) +static void omap44xx_prm_enable_io_wakeup(void) { s32 inst = omap4_prmst_get_prm_dev_inst(); -- GitLab From b34888a95334876d971199f94a0f44e818b4a396 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 31 Oct 2018 00:48:12 +0000 Subject: [PATCH 2116/2269] ASoC: rsnd: fixup clock start checker [ Upstream commit 3ee9a76a8c5a10e1bfb04b81db767c6d562ddaf3 ] commit 4d230d12710646 ("ASoC: rsnd: fixup not to call clk_get/set under non-atomic") fixuped clock start timing. But it exchanged clock start checker from ssi->usrcnt to ssi->rate. Current rsnd_ssi_master_clk_start() is called from .prepare, but some player (for example GStreamer) might calls it many times. In such case, the checker might returns error even though it was not error. It should check ssi->usrcnt instead of ssi->rate. This patch fixup it. Without this patch, GStreamer can't switch 48kHz / 44.1kHz. Reported-by: Yusuke Goda Signed-off-by: Kuninori Morimoto Tested-by: Yusuke Goda Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sh/rcar/ssi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index 34223c8c28a87..0db2791f70350 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -280,7 +280,7 @@ static int rsnd_ssi_master_clk_start(struct rsnd_mod *mod, if (rsnd_ssi_is_multi_slave(mod, io)) return 0; - if (ssi->rate) { + if (ssi->usrcnt > 1) { if (ssi->rate != rate) { dev_err(dev, "SSI parent/child should use same rate\n"); return -EINVAL; -- GitLab From f55ad8d2a215da47b4f1a265a4b1b850ba7167a1 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 17 Oct 2018 10:15:34 +0200 Subject: [PATCH 2117/2269] staging: rtl8723bs: Fix the return value in case of error in 'rtw_wx_read32()' [ Upstream commit c3e43d8b958bd6849817393483e805d8638a8ab7 ] We return 0 unconditionally in 'rtw_wx_read32()'. However, 'ret' is set to some error codes in several error handling paths. Return 'ret' instead to propagate the error code. Fixes: 554c0a3abf216 ("staging: Add rtl8723bs sdio wifi driver") Signed-off-by: Christophe JAILLET Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/staging/rtl8723bs/os_dep/ioctl_linux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c index d5e5f830f2a16..1b61da61690b6 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c @@ -2383,7 +2383,7 @@ static int rtw_wx_read32(struct net_device *dev, exit: kfree(ptmp); - return 0; + return ret; } static int rtw_wx_write32(struct net_device *dev, -- GitLab From 5b2afd6261aeaf17da4953e07b731ca71bcabd17 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sun, 28 Oct 2018 15:29:27 -0500 Subject: [PATCH 2118/2269] ARM: dts: logicpd-somlv: Fix interrupt on mmc3_dat1 [ Upstream commit 3d8b804bc528d3720ec0c39c212af92dafaf6e84 ] The interrupt on mmc3_dat1 is wrong which prevents this from appearing in /proc/interrupts. Fixes: ab8dd3aed011 ("ARM: DTS: Add minimal Support for Logic PD DM3730 SOM-LV") #Kernel 4.9+ Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/logicpd-som-lv.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi index c335b923753a3..a7883676f675c 100644 --- a/arch/arm/boot/dts/logicpd-som-lv.dtsi +++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi @@ -123,7 +123,7 @@ }; &mmc3 { - interrupts-extended = <&intc 94 &omap3_pmx_core2 0x46>; + interrupts-extended = <&intc 94 &omap3_pmx_core 0x136>; pinctrl-0 = <&mmc3_pins &wl127x_gpio>; pinctrl-names = "default"; vmmc-supply = <&wl12xx_vmmc>; -- GitLab From 21ffeda14e83665ccd350005e3d51fdb0793d248 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Wed, 7 Nov 2018 22:30:31 +0100 Subject: [PATCH 2119/2269] ARM: OMAP1: ams-delta: Fix possible use of uninitialized field [ Upstream commit cec83ff1241ec98113a19385ea9e9cfa9aa4125b ] While playing with initialization order of modem device, it has been discovered that under some circumstances (early console init, I believe) its .pm() callback may be called before the uart_port->private_data pointer is initialized from plat_serial8250_port->private_data, resulting in NULL pointer dereference. Fix it by checking for uninitialized pointer before using it in modem_pm(). Fixes: aabf31737a6a ("ARM: OMAP1: ams-delta: update the modem to use regulator API") Signed-off-by: Janusz Krzysztofik Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/mach-omap1/board-ams-delta.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index 6cbc69c92913d..4174fa86bfb1c 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -512,6 +512,9 @@ static void modem_pm(struct uart_port *port, unsigned int state, unsigned old) struct modem_private_data *priv = port->private_data; int ret; + if (!priv) + return; + if (IS_ERR(priv->regulator)) return; -- GitLab From 2ac368e8d504d66c315483b16cb9a3750b4cfb15 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 10 Nov 2018 04:13:24 +0000 Subject: [PATCH 2120/2269] sysv: return 'err' instead of 0 in __sysv_write_inode [ Upstream commit c4b7d1ba7d263b74bb72e9325262a67139605cde ] Fixes gcc '-Wunused-but-set-variable' warning: fs/sysv/inode.c: In function '__sysv_write_inode': fs/sysv/inode.c:239:6: warning: variable 'err' set but not used [-Wunused-but-set-variable] __sysv_write_inode should return 'err' instead of 0 Fixes: 05459ca81ac3 ("repair sysv_write_inode(), switch sysv to simple_fsync()") Signed-off-by: YueHaibing Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- fs/sysv/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 3c47b7d5d4cf8..9e0874d1524ce 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -275,7 +275,7 @@ static int __sysv_write_inode(struct inode *inode, int wait) } } brelse(bh); - return 0; + return err; } int sysv_write_inode(struct inode *inode, struct writeback_control *wbc) -- GitLab From cae5446b33f65162fe21fdb633a63619633e7ea1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 31 Oct 2018 18:26:21 +0100 Subject: [PATCH 2121/2269] selftests: add script to stress-test nft packet path vs. control plane [ Upstream commit 25d8bcedbf4329895dbaf9dd67baa6f18dad918c ] Start flood ping for each cpu while loading/flushing rulesets to make sure we do not access already-free'd rules from nf_tables evaluation loop. Also add this to TARGETS so 'make run_tests' in selftest dir runs it automatically. This would have caught the bug fixed in previous change ("netfilter: nf_tables: do not skip inactive chains during generation update") sooner. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- tools/testing/selftests/Makefile | 1 + tools/testing/selftests/netfilter/Makefile | 6 ++ tools/testing/selftests/netfilter/config | 2 + .../selftests/netfilter/nft_trans_stress.sh | 78 +++++++++++++++++++ 4 files changed, 87 insertions(+) create mode 100644 tools/testing/selftests/netfilter/Makefile create mode 100644 tools/testing/selftests/netfilter/config create mode 100755 tools/testing/selftests/netfilter/nft_trans_stress.sh diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index ea300e7818a70..10b89f5b9af7a 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -20,6 +20,7 @@ TARGETS += memory-hotplug TARGETS += mount TARGETS += mqueue TARGETS += net +TARGETS += netfilter TARGETS += nsfs TARGETS += powerpc TARGETS += pstore diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile new file mode 100644 index 0000000000000..47ed6cef93fb8 --- /dev/null +++ b/tools/testing/selftests/netfilter/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for netfilter selftests + +TEST_PROGS := nft_trans_stress.sh + +include ../lib.mk diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config new file mode 100644 index 0000000000000..1017313e41a85 --- /dev/null +++ b/tools/testing/selftests/netfilter/config @@ -0,0 +1,2 @@ +CONFIG_NET_NS=y +NF_TABLES_INET=y diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh new file mode 100755 index 0000000000000..f1affd12c4b17 --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_trans_stress.sh @@ -0,0 +1,78 @@ +#!/bin/bash +# +# This test is for stress-testing the nf_tables config plane path vs. +# packet path processing: Make sure we never release rules that are +# still visible to other cpus. +# +# set -e + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +testns=testns1 +tables="foo bar baz quux" + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +tmp=$(mktemp) + +for table in $tables; do + echo add table inet "$table" >> "$tmp" + echo flush table inet "$table" >> "$tmp" + + echo "add chain inet $table INPUT { type filter hook input priority 0; }" >> "$tmp" + echo "add chain inet $table OUTPUT { type filter hook output priority 0; }" >> "$tmp" + for c in $(seq 1 400); do + chain=$(printf "chain%03u" "$c") + echo "add chain inet $table $chain" >> "$tmp" + done + + for c in $(seq 1 400); do + chain=$(printf "chain%03u" "$c") + for BASE in INPUT OUTPUT; do + echo "add rule inet $table $BASE counter jump $chain" >> "$tmp" + done + echo "add rule inet $table $chain counter return" >> "$tmp" + done +done + +ip netns add "$testns" +ip -netns "$testns" link set lo up + +lscpu | grep ^CPU\(s\): | ( read cpu cpunum ; +cpunum=$((cpunum-1)) +for i in $(seq 0 $cpunum);do + mask=$(printf 0x%x $((1<<$i))) + ip netns exec "$testns" taskset $mask ping -4 127.0.0.1 -fq > /dev/null & + ip netns exec "$testns" taskset $mask ping -6 ::1 -fq > /dev/null & +done) + +sleep 1 + +for i in $(seq 1 10) ; do ip netns exec "$testns" nft -f "$tmp" & done + +for table in $tables;do + randsleep=$((RANDOM%10)) + sleep $randsleep + ip netns exec "$testns" nft delete table inet $table 2>/dev/null +done + +randsleep=$((RANDOM%10)) +sleep $randsleep + +pkill -9 ping + +wait + +rm -f "$tmp" +ip netns del "$testns" -- GitLab From 38e22947994de227bc1ac57ec82314c24a605605 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 12 Nov 2018 22:43:45 +0100 Subject: [PATCH 2122/2269] netfilter: nf_tables: fix use-after-free when deleting compat expressions [ Upstream commit 29e3880109e357fdc607b4393f8308cef6af9413 ] nft_compat ops do not have static storage duration, unlike all other expressions. When nf_tables_expr_destroy() returns, expr->ops might have been free'd already, so we need to store next address before calling expression destructor. For same reason, we can't deref match pointer after nft_xt_put(). This can be easily reproduced by adding msleep() before nft_match_destroy() returns. Fixes: 0ca743a55991 ("netfilter: nf_tables: add compatibility layer for x_tables") Reported-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 5 +++-- net/netfilter/nft_compat.c | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3ae365f92bff5..ea1e57daf50ed 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2252,7 +2252,7 @@ err: static void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule) { - struct nft_expr *expr; + struct nft_expr *expr, *next; /* * Careful: some expressions might not be initialized in case this @@ -2260,8 +2260,9 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, */ expr = nft_expr_first(rule); while (expr != nft_expr_last(rule) && expr->ops) { + next = nft_expr_next(expr); nf_tables_expr_destroy(ctx, expr); - expr = nft_expr_next(expr); + expr = next; } kfree(rule); } diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 6da1cec1494a9..7533c2fd6b769 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -497,6 +497,7 @@ __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr, void *info) { struct xt_match *match = expr->ops->data; + struct module *me = match->me; struct xt_mtdtor_param par; par.net = ctx->net; @@ -507,7 +508,7 @@ __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr, par.match->destroy(&par); if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops))) - module_put(match->me); + module_put(me); } static void -- GitLab From de2324a00979257c7fff8cf9f2bdc3023957fef8 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 9 Nov 2018 16:42:14 -0800 Subject: [PATCH 2123/2269] hwmon (ina2xx) Fix NULL id pointer in probe() [ Upstream commit 70df9ebbd82c794ddfbb49d45b337f18d5588dc2 ] When using DT configurations, the id pointer might turn out to be NULL. Then the driver encounters NULL pointer access: Unable to handle kernel read from unreadable memory at vaddr 00000018 [...] PC is at ina2xx_probe+0x114/0x200 LR is at ina2xx_probe+0x10c/0x200 [...] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b The reason is that i2c core returns the id pointer by matching id_table with client->name, while the client->name is actually using the name from the first string in the DT compatible list, not the best one. So i2c core would fail to match the id_table if the best matched compatible string isn't the first one, and then would return a NULL id pointer. This probably should be fixed in i2c core. But it doesn't hurt to make the driver robust. So this patch fixes it by using the "chip" that's added to unify both DT and non-DT configurations. Additionally, since id pointer could be null, so as id->name: ina2xx 10-0047: power monitor (null) (Rshunt = 1000 uOhm) ina2xx 10-0048: power monitor (null) (Rshunt = 10000 uOhm) So this patch also fixes NULL name pointer, using client->name to play safe and to align with hwmon->name. Fixes: bd0ddd4d0883 ("hwmon: (ina2xx) Add OF device ID table") Signed-off-by: Nicolin Chen Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/ina2xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c index 71d3445ba869c..c2252cf452f50 100644 --- a/drivers/hwmon/ina2xx.c +++ b/drivers/hwmon/ina2xx.c @@ -491,7 +491,7 @@ static int ina2xx_probe(struct i2c_client *client, } data->groups[group++] = &ina2xx_group; - if (id->driver_data == ina226) + if (chip == ina226) data->groups[group++] = &ina226_group; hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name, @@ -500,7 +500,7 @@ static int ina2xx_probe(struct i2c_client *client, return PTR_ERR(hwmon_dev); dev_info(dev, "power monitor %s (Rshunt = %li uOhm)\n", - id->name, data->rshunt); + client->name, data->rshunt); return 0; } -- GitLab From 5391e32b9d0692abb1996ba3c3fdf2e0c97b72e4 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 12 Nov 2018 13:36:38 +0000 Subject: [PATCH 2124/2269] ASoC: wm_adsp: Fix dma-unsafe read of scratch registers [ Upstream commit 20e00db2f59bdddf8a8e241473ef8be94631d3ae ] Stack memory isn't DMA-safe so it isn't safe to use either regmap_raw_read or regmap_bulk_read to read into stack memory. The two functions to read the scratch registers were using stack memory and regmap_raw_read. It's not worth allocating memory just for this trivial read, and it isn't time-critical. A simple regmap_read for each register is sufficient. Signed-off-by: Richard Fitzgerald Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm_adsp.c | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 989d093abda7e..67330b6ab204c 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -787,38 +787,41 @@ static unsigned int wm_adsp_region_to_reg(struct wm_adsp_region const *mem, static void wm_adsp2_show_fw_status(struct wm_adsp *dsp) { - u16 scratch[4]; + unsigned int scratch[4]; + unsigned int addr = dsp->base + ADSP2_SCRATCH0; + unsigned int i; int ret; - ret = regmap_raw_read(dsp->regmap, dsp->base + ADSP2_SCRATCH0, - scratch, sizeof(scratch)); - if (ret) { - adsp_err(dsp, "Failed to read SCRATCH regs: %d\n", ret); - return; + for (i = 0; i < ARRAY_SIZE(scratch); ++i) { + ret = regmap_read(dsp->regmap, addr + i, &scratch[i]); + if (ret) { + adsp_err(dsp, "Failed to read SCRATCH%u: %d\n", i, ret); + return; + } } adsp_dbg(dsp, "FW SCRATCH 0:0x%x 1:0x%x 2:0x%x 3:0x%x\n", - be16_to_cpu(scratch[0]), - be16_to_cpu(scratch[1]), - be16_to_cpu(scratch[2]), - be16_to_cpu(scratch[3])); + scratch[0], scratch[1], scratch[2], scratch[3]); } static void wm_adsp2v2_show_fw_status(struct wm_adsp *dsp) { - u32 scratch[2]; + unsigned int scratch[2]; int ret; - ret = regmap_raw_read(dsp->regmap, dsp->base + ADSP2V2_SCRATCH0_1, - scratch, sizeof(scratch)); - + ret = regmap_read(dsp->regmap, dsp->base + ADSP2V2_SCRATCH0_1, + &scratch[0]); if (ret) { - adsp_err(dsp, "Failed to read SCRATCH regs: %d\n", ret); + adsp_err(dsp, "Failed to read SCRATCH0_1: %d\n", ret); return; } - scratch[0] = be32_to_cpu(scratch[0]); - scratch[1] = be32_to_cpu(scratch[1]); + ret = regmap_read(dsp->regmap, dsp->base + ADSP2V2_SCRATCH2_3, + &scratch[1]); + if (ret) { + adsp_err(dsp, "Failed to read SCRATCH2_3: %d\n", ret); + return; + } adsp_dbg(dsp, "FW SCRATCH 0:0x%x 1:0x%x 2:0x%x 3:0x%x\n", scratch[0] & 0xFFFF, -- GitLab From 250dea0dfb12e2803d6de0105fae7177e4bd1b80 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 13 Nov 2018 15:38:22 +0000 Subject: [PATCH 2125/2269] s390/cpum_cf: Reject request for sampling in event initialization [ Upstream commit 613a41b0d16e617f46776a93b975a1eeea96417c ] On s390 command perf top fails [root@s35lp76 perf] # ./perf top -F100000 --stdio Error: cycles: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat' [root@s35lp76 perf] # Using event -e rb0000 works as designed. Event rb0000 is the event number of the sampling facility for basic sampling. During system start up the following PMUs are installed in the kernel's PMU list (from head to tail): cpum_cf --> s390 PMU counter facility device driver cpum_sf --> s390 PMU sampling facility device driver uprobe kprobe tracepoint task_clock cpu_clock Perf top executes following functions and calls perf_event_open(2) system call with different parameters many times: cmd_top --> __cmd_top --> perf_evlist__add_default --> __perf_evlist__add_default --> perf_evlist__new_cycles (creates event type:0 (HW) config 0 (CPU_CYCLES) --> perf_event_attr__set_max_precise_ip Uses perf_event_open(2) to detect correct precise_ip level. Fails 3 times on s390 which is ok. Then functions cmd_top --> __cmd_top --> perf_top__start_counters -->perf_evlist__config --> perf_can_comm_exec --> perf_probe_api This functions test support for the following events: "cycles:u", "instructions:u", "cpu-clock:u" using --> perf_do_probe_api --> perf_event_open_cloexec Test the close on exec flag support with perf_event_open(2). perf_do_probe_api returns true if the event is supported. The function returns true because event cpu-clock is supported by the PMU cpu_clock. This is achieved by many calls to perf_event_open(2). Function perf_top__start_counters now calls perf_evsel__open() for every event, which is the default event cpu_cycles (config:0) and type HARDWARE (type:0) which a predfined frequence of 4000. Given the above order of the PMU list, the PMU cpum_cf gets called first and returns 0, which indicates support for this sampling. The event is fully allocated in the function perf_event_open (file kernel/event/core.c near line 10521 and the following check fails: event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, NULL, NULL, cgroup_fd); if (IS_ERR(event)) { err = PTR_ERR(event); goto err_cred; } if (is_sampling_event(event)) { if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) { err = -EOPNOTSUPP; goto err_alloc; } } The check for the interrupt capabilities fails and the system call perf_event_open() returns -EOPNOTSUPP (-95). Add a check to return -ENODEV when sampling is requested in PMU cpum_cf. This allows common kernel code in the perf_event_open() system call to test the next PMU in above list. Fixes: 97b1198fece0 (" "s390, perf: Use common PMU interrupt disabled code") Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin --- arch/s390/kernel/perf_cpum_cf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 61e91fee8467c..edf6a61f0a64e 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -349,6 +349,8 @@ static int __hw_perf_event_init(struct perf_event *event) break; case PERF_TYPE_HARDWARE: + if (is_sampling_event(event)) /* No sampling support */ + return -ENOENT; ev = attr->config; /* Count user space (problem-state) only */ if (!attr->exclude_user && attr->exclude_kernel) { -- GitLab From fd2c1ba142277bfd6f3488e6105c9eb643246ddc Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 13 Nov 2018 19:48:54 -0800 Subject: [PATCH 2126/2269] hwmon: (ina2xx) Fix current value calculation [ Upstream commit 38cd989ee38c16388cde89db5b734f9d55b905f9 ] The current register (04h) has a sign bit at MSB. The comments for this calculation also mention that it's a signed register. However, the regval is unsigned type so result of calculation turns out to be an incorrect value when current is negative. This patch simply fixes this by adding a casting to s16. Fixes: 5d389b125186c ("hwmon: (ina2xx) Make calibration register value fixed") Signed-off-by: Nicolin Chen Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/ina2xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c index c2252cf452f50..07ee19573b3f0 100644 --- a/drivers/hwmon/ina2xx.c +++ b/drivers/hwmon/ina2xx.c @@ -274,7 +274,7 @@ static int ina2xx_get_value(struct ina2xx_data *data, u8 reg, break; case INA2XX_CURRENT: /* signed register, result in mA */ - val = regval * data->current_lsb_uA; + val = (s16)regval * data->current_lsb_uA; val = DIV_ROUND_CLOSEST(val, 1000); break; case INA2XX_CALIBRATION: -- GitLab From b65b4228bdaa8e9d67d10a9206f84df9a20c292d Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 14 Nov 2018 14:58:20 +0200 Subject: [PATCH 2127/2269] ASoC: omap-abe-twl6040: Fix missing audio card caused by deferred probing [ Upstream commit 76836fd354922ebe4798a64fda01f8dc6a8b0984 ] The machine driver fails to probe in next-20181113 with: [ 2.539093] omap-abe-twl6040 sound: ASoC: CODEC DAI twl6040-legacy not registered [ 2.546630] omap-abe-twl6040 sound: devm_snd_soc_register_card() failed: -517 ... [ 3.693206] omap-abe-twl6040 sound: ASoC: Both platform name/of_node are set for TWL6040 [ 3.701446] omap-abe-twl6040 sound: ASoC: failed to init link TWL6040 [ 3.708007] omap-abe-twl6040 sound: devm_snd_soc_register_card() failed: -22 [ 3.715148] omap-abe-twl6040: probe of sound failed with error -22 Bisect pointed to a merge commit: first bad commit: [0f688ab20a540aafa984c5dbd68a71debebf4d7f] Merge remote-tracking branch 'net-next/master' and a diff between a working kernel does not reveal anything which would explain the change in behavior. Further investigation showed that on the second try of loading fails because the dai_link->platform is no longer NULL and it might be pointing to uninitialized memory. The fix is to move the snd_soc_dai_link and snd_soc_card inside of the abe_twl6040 struct, which is dynamically allocated every time the driver probes. Signed-off-by: Peter Ujfalusi Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/omap/omap-abe-twl6040.c | 67 +++++++++++++------------------ 1 file changed, 29 insertions(+), 38 deletions(-) diff --git a/sound/soc/omap/omap-abe-twl6040.c b/sound/soc/omap/omap-abe-twl6040.c index 614b18d2f631b..6fd143799534d 100644 --- a/sound/soc/omap/omap-abe-twl6040.c +++ b/sound/soc/omap/omap-abe-twl6040.c @@ -36,6 +36,8 @@ #include "../codecs/twl6040.h" struct abe_twl6040 { + struct snd_soc_card card; + struct snd_soc_dai_link dai_links[2]; int jack_detection; /* board can detect jack events */ int mclk_freq; /* MCLK frequency speed for twl6040 */ }; @@ -208,40 +210,10 @@ static int omap_abe_dmic_init(struct snd_soc_pcm_runtime *rtd) ARRAY_SIZE(dmic_audio_map)); } -/* Digital audio interface glue - connects codec <--> CPU */ -static struct snd_soc_dai_link abe_twl6040_dai_links[] = { - { - .name = "TWL6040", - .stream_name = "TWL6040", - .codec_dai_name = "twl6040-legacy", - .codec_name = "twl6040-codec", - .init = omap_abe_twl6040_init, - .ops = &omap_abe_ops, - }, - { - .name = "DMIC", - .stream_name = "DMIC Capture", - .codec_dai_name = "dmic-hifi", - .codec_name = "dmic-codec", - .init = omap_abe_dmic_init, - .ops = &omap_abe_dmic_ops, - }, -}; - -/* Audio machine driver */ -static struct snd_soc_card omap_abe_card = { - .owner = THIS_MODULE, - - .dapm_widgets = twl6040_dapm_widgets, - .num_dapm_widgets = ARRAY_SIZE(twl6040_dapm_widgets), - .dapm_routes = audio_map, - .num_dapm_routes = ARRAY_SIZE(audio_map), -}; - static int omap_abe_probe(struct platform_device *pdev) { struct device_node *node = pdev->dev.of_node; - struct snd_soc_card *card = &omap_abe_card; + struct snd_soc_card *card; struct device_node *dai_node; struct abe_twl6040 *priv; int num_links = 0; @@ -252,12 +224,18 @@ static int omap_abe_probe(struct platform_device *pdev) return -ENODEV; } - card->dev = &pdev->dev; - priv = devm_kzalloc(&pdev->dev, sizeof(struct abe_twl6040), GFP_KERNEL); if (priv == NULL) return -ENOMEM; + card = &priv->card; + card->dev = &pdev->dev; + card->owner = THIS_MODULE; + card->dapm_widgets = twl6040_dapm_widgets; + card->num_dapm_widgets = ARRAY_SIZE(twl6040_dapm_widgets); + card->dapm_routes = audio_map; + card->num_dapm_routes = ARRAY_SIZE(audio_map); + if (snd_soc_of_parse_card_name(card, "ti,model")) { dev_err(&pdev->dev, "Card name is not provided\n"); return -ENODEV; @@ -274,14 +252,27 @@ static int omap_abe_probe(struct platform_device *pdev) dev_err(&pdev->dev, "McPDM node is not provided\n"); return -EINVAL; } - abe_twl6040_dai_links[0].cpu_of_node = dai_node; - abe_twl6040_dai_links[0].platform_of_node = dai_node; + + priv->dai_links[0].name = "DMIC"; + priv->dai_links[0].stream_name = "TWL6040"; + priv->dai_links[0].cpu_of_node = dai_node; + priv->dai_links[0].platform_of_node = dai_node; + priv->dai_links[0].codec_dai_name = "twl6040-legacy"; + priv->dai_links[0].codec_name = "twl6040-codec"; + priv->dai_links[0].init = omap_abe_twl6040_init; + priv->dai_links[0].ops = &omap_abe_ops; dai_node = of_parse_phandle(node, "ti,dmic", 0); if (dai_node) { num_links = 2; - abe_twl6040_dai_links[1].cpu_of_node = dai_node; - abe_twl6040_dai_links[1].platform_of_node = dai_node; + priv->dai_links[1].name = "TWL6040"; + priv->dai_links[1].stream_name = "DMIC Capture"; + priv->dai_links[1].cpu_of_node = dai_node; + priv->dai_links[1].platform_of_node = dai_node; + priv->dai_links[1].codec_dai_name = "dmic-hifi"; + priv->dai_links[1].codec_name = "dmic-codec"; + priv->dai_links[1].init = omap_abe_dmic_init; + priv->dai_links[1].ops = &omap_abe_dmic_ops; } else { num_links = 1; } @@ -300,7 +291,7 @@ static int omap_abe_probe(struct platform_device *pdev) return -ENODEV; } - card->dai_link = abe_twl6040_dai_links; + card->dai_link = priv->dai_links; card->num_links = num_links; snd_soc_card_set_drvdata(card, priv); -- GitLab From 55b648d1d624b9beced6384c8e2895b35dc69878 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Wed, 14 Nov 2018 17:06:13 +0800 Subject: [PATCH 2128/2269] ASoC: dapm: Recalculate audio map forcely when card instantiated [ Upstream commit 882eab6c28d23a970ae73b7eb831b169a672d456 ] Audio map are possible in wrong state before card->instantiated has been set to true. Imaging the following examples: time 1: at the beginning in:-1 in:-1 in:-1 in:-1 out:-1 out:-1 out:-1 out:-1 SIGGEN A B Spk time 2: after someone called snd_soc_dapm_new_widgets() (e.g. create_fill_widget_route_map() in sound/soc/codecs/hdac_hdmi.c) in:1 in:0 in:0 in:0 out:0 out:0 out:0 out:1 SIGGEN A B Spk time 3: routes added in:1 in:0 in:0 in:0 out:0 out:0 out:0 out:1 SIGGEN -----> A -----> B ---> Spk In the end, the path should be powered on but it did not. At time 3, "in" of SIGGEN and "out" of Spk did not propagate to their neighbors because snd_soc_dapm_add_path() will not invalidate the paths if the card has not instantiated (i.e. card->instantiated is false). To correct the state of audio map, recalculate the whole map forcely. Signed-off-by: Tzung-Bi Shih Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index fee4b0ef5566c..42c2a3065b779 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -2307,6 +2307,7 @@ static int snd_soc_instantiate_card(struct snd_soc_card *card) } card->instantiated = 1; + dapm_mark_endpoints_dirty(card); snd_soc_dapm_sync(&card->dapm); mutex_unlock(&card->mutex); mutex_unlock(&client_mutex); -- GitLab From aa17c9c00ce006de2dd7b0eabed9e45b48d3d044 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 31 Oct 2018 15:20:05 +0100 Subject: [PATCH 2129/2269] iio/hid-sensors: Fix IIO_CHAN_INFO_RAW returning wrong values for signed numbers [ Upstream commit 0145b50566e7de5637e80ecba96c7f0e6fff1aad ] Before this commit sensor_hub_input_attr_get_raw_value() failed to take the signedness of 16 and 8 bit values into account, returning e.g. 65436 instead of -100 for the z-axis reading of an accelerometer. This commit adds a new is_signed parameter to the function and makes all callers pass the appropriate value for this. While at it, this commit also fixes up some neighboring lines where statements were needlessly split over 2 lines to improve readability. Signed-off-by: Hans de Goede Acked-by: Srinivas Pandruvada Acked-by: Benjamin Tissoires Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/hid/hid-sensor-custom.c | 2 +- drivers/hid/hid-sensor-hub.c | 13 ++++++++++--- drivers/iio/accel/hid-sensor-accel-3d.c | 5 ++++- drivers/iio/gyro/hid-sensor-gyro-3d.c | 5 ++++- drivers/iio/humidity/hid-sensor-humidity.c | 3 ++- drivers/iio/light/hid-sensor-als.c | 8 +++++--- drivers/iio/light/hid-sensor-prox.c | 8 +++++--- drivers/iio/magnetometer/hid-sensor-magn-3d.c | 8 +++++--- drivers/iio/orientation/hid-sensor-incl-3d.c | 8 +++++--- drivers/iio/pressure/hid-sensor-press.c | 8 +++++--- drivers/iio/temperature/hid-sensor-temperature.c | 3 ++- drivers/rtc/rtc-hid-sensor-time.c | 2 +- include/linux/hid-sensor-hub.h | 4 +++- 13 files changed, 52 insertions(+), 25 deletions(-) diff --git a/drivers/hid/hid-sensor-custom.c b/drivers/hid/hid-sensor-custom.c index 0bcf041368c71..574126b649e9f 100644 --- a/drivers/hid/hid-sensor-custom.c +++ b/drivers/hid/hid-sensor-custom.c @@ -358,7 +358,7 @@ static ssize_t show_value(struct device *dev, struct device_attribute *attr, sensor_inst->hsdev, sensor_inst->hsdev->usage, usage, report_id, - SENSOR_HUB_SYNC); + SENSOR_HUB_SYNC, false); } else if (!strncmp(name, "units", strlen("units"))) value = sensor_inst->fields[field_index].attribute.units; else if (!strncmp(name, "unit-expo", strlen("unit-expo"))) diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c index faba542d1b074..b5bd5cb7d5324 100644 --- a/drivers/hid/hid-sensor-hub.c +++ b/drivers/hid/hid-sensor-hub.c @@ -299,7 +299,8 @@ EXPORT_SYMBOL_GPL(sensor_hub_get_feature); int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev, u32 usage_id, u32 attr_usage_id, u32 report_id, - enum sensor_hub_read_flags flag) + enum sensor_hub_read_flags flag, + bool is_signed) { struct sensor_hub_data *data = hid_get_drvdata(hsdev->hdev); unsigned long flags; @@ -331,10 +332,16 @@ int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev, &hsdev->pending.ready, HZ*5); switch (hsdev->pending.raw_size) { case 1: - ret_val = *(u8 *)hsdev->pending.raw_data; + if (is_signed) + ret_val = *(s8 *)hsdev->pending.raw_data; + else + ret_val = *(u8 *)hsdev->pending.raw_data; break; case 2: - ret_val = *(u16 *)hsdev->pending.raw_data; + if (is_signed) + ret_val = *(s16 *)hsdev->pending.raw_data; + else + ret_val = *(u16 *)hsdev->pending.raw_data; break; case 4: ret_val = *(u32 *)hsdev->pending.raw_data; diff --git a/drivers/iio/accel/hid-sensor-accel-3d.c b/drivers/iio/accel/hid-sensor-accel-3d.c index 2238a26aba637..f573d9c61fc38 100644 --- a/drivers/iio/accel/hid-sensor-accel-3d.c +++ b/drivers/iio/accel/hid-sensor-accel-3d.c @@ -149,6 +149,7 @@ static int accel_3d_read_raw(struct iio_dev *indio_dev, int report_id = -1; u32 address; int ret_type; + s32 min; struct hid_sensor_hub_device *hsdev = accel_state->common_attributes.hsdev; @@ -158,12 +159,14 @@ static int accel_3d_read_raw(struct iio_dev *indio_dev, case 0: hid_sensor_power_state(&accel_state->common_attributes, true); report_id = accel_state->accel[chan->scan_index].report_id; + min = accel_state->accel[chan->scan_index].logical_minimum; address = accel_3d_addresses[chan->scan_index]; if (report_id >= 0) *val = sensor_hub_input_attr_get_raw_value( accel_state->common_attributes.hsdev, hsdev->usage, address, report_id, - SENSOR_HUB_SYNC); + SENSOR_HUB_SYNC, + min < 0); else { *val = 0; hid_sensor_power_state(&accel_state->common_attributes, diff --git a/drivers/iio/gyro/hid-sensor-gyro-3d.c b/drivers/iio/gyro/hid-sensor-gyro-3d.c index c67ce2ac47153..d9192eb41131f 100644 --- a/drivers/iio/gyro/hid-sensor-gyro-3d.c +++ b/drivers/iio/gyro/hid-sensor-gyro-3d.c @@ -111,6 +111,7 @@ static int gyro_3d_read_raw(struct iio_dev *indio_dev, int report_id = -1; u32 address; int ret_type; + s32 min; *val = 0; *val2 = 0; @@ -118,13 +119,15 @@ static int gyro_3d_read_raw(struct iio_dev *indio_dev, case 0: hid_sensor_power_state(&gyro_state->common_attributes, true); report_id = gyro_state->gyro[chan->scan_index].report_id; + min = gyro_state->gyro[chan->scan_index].logical_minimum; address = gyro_3d_addresses[chan->scan_index]; if (report_id >= 0) *val = sensor_hub_input_attr_get_raw_value( gyro_state->common_attributes.hsdev, HID_USAGE_SENSOR_GYRO_3D, address, report_id, - SENSOR_HUB_SYNC); + SENSOR_HUB_SYNC, + min < 0); else { *val = 0; hid_sensor_power_state(&gyro_state->common_attributes, diff --git a/drivers/iio/humidity/hid-sensor-humidity.c b/drivers/iio/humidity/hid-sensor-humidity.c index 6e09c1acfe516..e53914d51ec36 100644 --- a/drivers/iio/humidity/hid-sensor-humidity.c +++ b/drivers/iio/humidity/hid-sensor-humidity.c @@ -75,7 +75,8 @@ static int humidity_read_raw(struct iio_dev *indio_dev, HID_USAGE_SENSOR_HUMIDITY, HID_USAGE_SENSOR_ATMOSPHERIC_HUMIDITY, humid_st->humidity_attr.report_id, - SENSOR_HUB_SYNC); + SENSOR_HUB_SYNC, + humid_st->humidity_attr.logical_minimum < 0); hid_sensor_power_state(&humid_st->common_attributes, false); return IIO_VAL_INT; diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c index 059d964772c73..95ca86f504348 100644 --- a/drivers/iio/light/hid-sensor-als.c +++ b/drivers/iio/light/hid-sensor-als.c @@ -93,6 +93,7 @@ static int als_read_raw(struct iio_dev *indio_dev, int report_id = -1; u32 address; int ret_type; + s32 min; *val = 0; *val2 = 0; @@ -102,8 +103,8 @@ static int als_read_raw(struct iio_dev *indio_dev, case CHANNEL_SCAN_INDEX_INTENSITY: case CHANNEL_SCAN_INDEX_ILLUM: report_id = als_state->als_illum.report_id; - address = - HID_USAGE_SENSOR_LIGHT_ILLUM; + min = als_state->als_illum.logical_minimum; + address = HID_USAGE_SENSOR_LIGHT_ILLUM; break; default: report_id = -1; @@ -116,7 +117,8 @@ static int als_read_raw(struct iio_dev *indio_dev, als_state->common_attributes.hsdev, HID_USAGE_SENSOR_ALS, address, report_id, - SENSOR_HUB_SYNC); + SENSOR_HUB_SYNC, + min < 0); hid_sensor_power_state(&als_state->common_attributes, false); } else { diff --git a/drivers/iio/light/hid-sensor-prox.c b/drivers/iio/light/hid-sensor-prox.c index 73fced8a63b71..8c017abc4ee25 100644 --- a/drivers/iio/light/hid-sensor-prox.c +++ b/drivers/iio/light/hid-sensor-prox.c @@ -73,6 +73,7 @@ static int prox_read_raw(struct iio_dev *indio_dev, int report_id = -1; u32 address; int ret_type; + s32 min; *val = 0; *val2 = 0; @@ -81,8 +82,8 @@ static int prox_read_raw(struct iio_dev *indio_dev, switch (chan->scan_index) { case CHANNEL_SCAN_INDEX_PRESENCE: report_id = prox_state->prox_attr.report_id; - address = - HID_USAGE_SENSOR_HUMAN_PRESENCE; + min = prox_state->prox_attr.logical_minimum; + address = HID_USAGE_SENSOR_HUMAN_PRESENCE; break; default: report_id = -1; @@ -95,7 +96,8 @@ static int prox_read_raw(struct iio_dev *indio_dev, prox_state->common_attributes.hsdev, HID_USAGE_SENSOR_PROX, address, report_id, - SENSOR_HUB_SYNC); + SENSOR_HUB_SYNC, + min < 0); hid_sensor_power_state(&prox_state->common_attributes, false); } else { diff --git a/drivers/iio/magnetometer/hid-sensor-magn-3d.c b/drivers/iio/magnetometer/hid-sensor-magn-3d.c index 0e791b02ed4a7..b495107bd1730 100644 --- a/drivers/iio/magnetometer/hid-sensor-magn-3d.c +++ b/drivers/iio/magnetometer/hid-sensor-magn-3d.c @@ -163,21 +163,23 @@ static int magn_3d_read_raw(struct iio_dev *indio_dev, int report_id = -1; u32 address; int ret_type; + s32 min; *val = 0; *val2 = 0; switch (mask) { case 0: hid_sensor_power_state(&magn_state->magn_flux_attributes, true); - report_id = - magn_state->magn[chan->address].report_id; + report_id = magn_state->magn[chan->address].report_id; + min = magn_state->magn[chan->address].logical_minimum; address = magn_3d_addresses[chan->address]; if (report_id >= 0) *val = sensor_hub_input_attr_get_raw_value( magn_state->magn_flux_attributes.hsdev, HID_USAGE_SENSOR_COMPASS_3D, address, report_id, - SENSOR_HUB_SYNC); + SENSOR_HUB_SYNC, + min < 0); else { *val = 0; hid_sensor_power_state( diff --git a/drivers/iio/orientation/hid-sensor-incl-3d.c b/drivers/iio/orientation/hid-sensor-incl-3d.c index fd1b3696ee42f..16c744bef0212 100644 --- a/drivers/iio/orientation/hid-sensor-incl-3d.c +++ b/drivers/iio/orientation/hid-sensor-incl-3d.c @@ -111,21 +111,23 @@ static int incl_3d_read_raw(struct iio_dev *indio_dev, int report_id = -1; u32 address; int ret_type; + s32 min; *val = 0; *val2 = 0; switch (mask) { case IIO_CHAN_INFO_RAW: hid_sensor_power_state(&incl_state->common_attributes, true); - report_id = - incl_state->incl[chan->scan_index].report_id; + report_id = incl_state->incl[chan->scan_index].report_id; + min = incl_state->incl[chan->scan_index].logical_minimum; address = incl_3d_addresses[chan->scan_index]; if (report_id >= 0) *val = sensor_hub_input_attr_get_raw_value( incl_state->common_attributes.hsdev, HID_USAGE_SENSOR_INCLINOMETER_3D, address, report_id, - SENSOR_HUB_SYNC); + SENSOR_HUB_SYNC, + min < 0); else { hid_sensor_power_state(&incl_state->common_attributes, false); diff --git a/drivers/iio/pressure/hid-sensor-press.c b/drivers/iio/pressure/hid-sensor-press.c index 6848d8c80effc..1c49ef78f8883 100644 --- a/drivers/iio/pressure/hid-sensor-press.c +++ b/drivers/iio/pressure/hid-sensor-press.c @@ -77,6 +77,7 @@ static int press_read_raw(struct iio_dev *indio_dev, int report_id = -1; u32 address; int ret_type; + s32 min; *val = 0; *val2 = 0; @@ -85,8 +86,8 @@ static int press_read_raw(struct iio_dev *indio_dev, switch (chan->scan_index) { case CHANNEL_SCAN_INDEX_PRESSURE: report_id = press_state->press_attr.report_id; - address = - HID_USAGE_SENSOR_ATMOSPHERIC_PRESSURE; + min = press_state->press_attr.logical_minimum; + address = HID_USAGE_SENSOR_ATMOSPHERIC_PRESSURE; break; default: report_id = -1; @@ -99,7 +100,8 @@ static int press_read_raw(struct iio_dev *indio_dev, press_state->common_attributes.hsdev, HID_USAGE_SENSOR_PRESSURE, address, report_id, - SENSOR_HUB_SYNC); + SENSOR_HUB_SYNC, + min < 0); hid_sensor_power_state(&press_state->common_attributes, false); } else { diff --git a/drivers/iio/temperature/hid-sensor-temperature.c b/drivers/iio/temperature/hid-sensor-temperature.c index c01efeca4002e..6ed5cd5742f14 100644 --- a/drivers/iio/temperature/hid-sensor-temperature.c +++ b/drivers/iio/temperature/hid-sensor-temperature.c @@ -76,7 +76,8 @@ static int temperature_read_raw(struct iio_dev *indio_dev, HID_USAGE_SENSOR_TEMPERATURE, HID_USAGE_SENSOR_DATA_ENVIRONMENTAL_TEMPERATURE, temp_st->temperature_attr.report_id, - SENSOR_HUB_SYNC); + SENSOR_HUB_SYNC, + temp_st->temperature_attr.logical_minimum < 0); hid_sensor_power_state( &temp_st->common_attributes, false); diff --git a/drivers/rtc/rtc-hid-sensor-time.c b/drivers/rtc/rtc-hid-sensor-time.c index 2751dba850c61..3e1abb4554721 100644 --- a/drivers/rtc/rtc-hid-sensor-time.c +++ b/drivers/rtc/rtc-hid-sensor-time.c @@ -213,7 +213,7 @@ static int hid_rtc_read_time(struct device *dev, struct rtc_time *tm) /* get a report with all values through requesting one value */ sensor_hub_input_attr_get_raw_value(time_state->common_attributes.hsdev, HID_USAGE_SENSOR_TIME, hid_time_addresses[0], - time_state->info[0].report_id, SENSOR_HUB_SYNC); + time_state->info[0].report_id, SENSOR_HUB_SYNC, false); /* wait for all values (event) */ ret = wait_for_completion_killable_timeout( &time_state->comp_last_time, HZ*6); diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index fc7aae64dcde8..000de6da3b1bb 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -177,6 +177,7 @@ int sensor_hub_input_get_attribute_info(struct hid_sensor_hub_device *hsdev, * @attr_usage_id: Attribute usage id as per spec * @report_id: Report id to look for * @flag: Synchronous or asynchronous read +* @is_signed: If true then fields < 32 bits will be sign-extended * * Issues a synchronous or asynchronous read request for an input attribute. * Returns data upto 32 bits. @@ -190,7 +191,8 @@ enum sensor_hub_read_flags { int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev, u32 usage_id, u32 attr_usage_id, u32 report_id, - enum sensor_hub_read_flags flag + enum sensor_hub_read_flags flag, + bool is_signed ); /** -- GitLab From f500a4ef7e5d7feddee380edfb1a1d91fc8fa627 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 16 Nov 2018 21:32:35 +0900 Subject: [PATCH 2130/2269] netfilter: xt_hashlimit: fix a possible memory leak in htable_create() [ Upstream commit b4e955e9f372035361fbc6f07b21fe2cc6a5be4a ] In the htable_create(), hinfo is allocated by vmalloc() So that if error occurred, hinfo should be freed. Fixes: 11d5f15723c9 ("netfilter: xt_hashlimit: Create revision 2 to support higher pps rates") Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/xt_hashlimit.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 0c034597b9b87..fe8e8a1622b59 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -295,9 +295,10 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg, /* copy match config into hashtable config */ ret = cfg_copy(&hinfo->cfg, (void *)cfg, 3); - - if (ret) + if (ret) { + vfree(hinfo); return ret; + } hinfo->cfg.size = size; if (hinfo->cfg.max == 0) @@ -814,7 +815,6 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) int ret; ret = cfg_copy(&cfg, (void *)&info->cfg, 1); - if (ret) return ret; @@ -830,7 +830,6 @@ hashlimit_mt_v2(const struct sk_buff *skb, struct xt_action_param *par) int ret; ret = cfg_copy(&cfg, (void *)&info->cfg, 2); - if (ret) return ret; @@ -920,7 +919,6 @@ static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par) return ret; ret = cfg_copy(&cfg, (void *)&info->cfg, 1); - if (ret) return ret; @@ -939,7 +937,6 @@ static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par) return ret; ret = cfg_copy(&cfg, (void *)&info->cfg, 2); - if (ret) return ret; -- GitLab From ca0bcef7c60ba2cb12ca99e706adc3f024a4f8a7 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 15 Nov 2018 10:44:57 +0800 Subject: [PATCH 2131/2269] hwmon: (w83795) temp4_type has writable permission [ Upstream commit 09aaf6813cfca4c18034fda7a43e68763f34abb1 ] Both datasheet and comments of store_temp_mode() tell us that temp1~4_type is writable, so fix it. Signed-off-by: Yao Wang Signed-off-by: Huacai Chen Fixes: 39deb6993e7c (" hwmon: (w83795) Simplify temperature sensor type handling") Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/w83795.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/w83795.c b/drivers/hwmon/w83795.c index 49276bbdac3dd..1bb80f992aa86 100644 --- a/drivers/hwmon/w83795.c +++ b/drivers/hwmon/w83795.c @@ -1691,7 +1691,7 @@ store_sf_setup(struct device *dev, struct device_attribute *attr, * somewhere else in the code */ #define SENSOR_ATTR_TEMP(index) { \ - SENSOR_ATTR_2(temp##index##_type, S_IRUGO | (index < 4 ? S_IWUSR : 0), \ + SENSOR_ATTR_2(temp##index##_type, S_IRUGO | (index < 5 ? S_IWUSR : 0), \ show_temp_mode, store_temp_mode, NOT_USED, index - 1), \ SENSOR_ATTR_2(temp##index##_input, S_IRUGO, show_temp, \ NULL, TEMP_READ, index - 1), \ -- GitLab From d36cc6073623904cfc937c649370b56bc0e7dd88 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 1 Nov 2018 18:00:01 +0100 Subject: [PATCH 2132/2269] perf tools: Restore proper cwd on return from mnt namespace [ Upstream commit b01c1f69c8660eaeab7d365cd570103c5c073a02 ] When reporting on 'record' server we try to retrieve/use the mnt namespace of the profiled tasks. We use following API with cookie to hold the return namespace, roughly: nsinfo__mountns_enter(struct nsinfo *nsi, struct nscookie *nc) setns(newns, 0); ... new ns related open.. ... nsinfo__mountns_exit(struct nscookie *nc) setns(nc->oldns) Once finished we setns to old namespace, which also sets the current working directory (cwd) to "/", trashing the cwd we had. This is mostly fine, because we use absolute paths almost everywhere, but it screws up 'perf diff': # perf diff failed to open perf.data: No such file or directory (try 'perf record' first) ... Adding the current working directory to be part of the cookie and restoring it in the nsinfo__mountns_exit call. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Krister Johansen Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: 843ff37bb59e ("perf symbols: Find symbols in different mount namespace") Link: http://lkml.kernel.org/r/20181101170001.30019-1-jolsa@kernel.org [ No need to check for NULL args for free(), use zfree() for struct members ] Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/namespaces.c | 17 +++++++++++++++-- tools/perf/util/namespaces.h | 1 + 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c index 1ef0049860a8b..eadc7ddacbf68 100644 --- a/tools/perf/util/namespaces.c +++ b/tools/perf/util/namespaces.c @@ -17,6 +17,7 @@ #include #include #include +#include struct namespaces *namespaces__new(struct namespaces_event *event) { @@ -185,6 +186,7 @@ void nsinfo__mountns_enter(struct nsinfo *nsi, char curpath[PATH_MAX]; int oldns = -1; int newns = -1; + char *oldcwd = NULL; if (nc == NULL) return; @@ -198,9 +200,13 @@ void nsinfo__mountns_enter(struct nsinfo *nsi, if (snprintf(curpath, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX) return; + oldcwd = get_current_dir_name(); + if (!oldcwd) + return; + oldns = open(curpath, O_RDONLY); if (oldns < 0) - return; + goto errout; newns = open(nsi->mntns_path, O_RDONLY); if (newns < 0) @@ -209,11 +215,13 @@ void nsinfo__mountns_enter(struct nsinfo *nsi, if (setns(newns, CLONE_NEWNS) < 0) goto errout; + nc->oldcwd = oldcwd; nc->oldns = oldns; nc->newns = newns; return; errout: + free(oldcwd); if (oldns > -1) close(oldns); if (newns > -1) @@ -222,11 +230,16 @@ errout: void nsinfo__mountns_exit(struct nscookie *nc) { - if (nc == NULL || nc->oldns == -1 || nc->newns == -1) + if (nc == NULL || nc->oldns == -1 || nc->newns == -1 || !nc->oldcwd) return; setns(nc->oldns, CLONE_NEWNS); + if (nc->oldcwd) { + WARN_ON_ONCE(chdir(nc->oldcwd)); + zfree(&nc->oldcwd); + } + if (nc->oldns > -1) { close(nc->oldns); nc->oldns = -1; diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h index 05d82601c9a62..23584a6dd0489 100644 --- a/tools/perf/util/namespaces.h +++ b/tools/perf/util/namespaces.h @@ -36,6 +36,7 @@ struct nsinfo { struct nscookie { int oldns; int newns; + char *oldcwd; }; int nsinfo__init(struct nsinfo *nsi); -- GitLab From 7d14117bd218a81f9f174af94b7c4f26aca16ad3 Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Mon, 5 Nov 2018 18:11:36 +0000 Subject: [PATCH 2133/2269] PCI: imx6: Fix link training status detection in link up check [ Upstream commit 68bc10bf992180f269816ff3d22eb30383138577 ] This bug was introduced in the interaction for two commits on either branch of the merge commit 562df5c8521e ("Merge branch 'pci/host-designware' into next"). Commit 4d107d3b5a68 ("PCI: imx6: Move link up check into imx6_pcie_wait_for_link()"), changed imx6_pcie_wait_for_link() to poll the link status register directly, checking for link up and not training, and made imx6_pcie_link_up() only check the link up bit (once, not a polling loop). While commit 886bc5ceb5cc ("PCI: designware: Add generic dw_pcie_wait_for_link()"), replaced the loop in imx6_pcie_wait_for_link() with a call to a new dwc core function, which polled imx6_pcie_link_up(), which still checked both link up and not training in a loop. When these two commits were merged, the version of imx6_pcie_wait_for_link() from 886bc5ceb5cc was kept, which eliminated the link training check placed there by 4d107d3b5a68. However, the version of imx6_pcie_link_up() from 4d107d3b5a68 was kept, which eliminated the link training check that had been there and was moved to imx6_pcie_wait_for_link(). The result was the link training check got lost for the imx6 driver. Eliminate imx6_pcie_link_up() so that the default handler, dw_pcie_link_up(), is used instead. The default handler has the correct code, which checks for link up and also that it still is not training, fixing the regression. Fixes: 562df5c8521e ("Merge branch 'pci/host-designware' into next") Signed-off-by: Trent Piepho [lorenzo.pieralisi@arm.com: rewrote the commit log] Signed-off-by: Lorenzo Pieralisi Reviewed-by: Lucas Stach Cc: Bjorn Helgaas Cc: Joao Pinto Cc: Lorenzo Pieralisi Cc: Richard Zhu Signed-off-by: Sasha Levin --- drivers/pci/dwc/pci-imx6.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/pci/dwc/pci-imx6.c b/drivers/pci/dwc/pci-imx6.c index b73483534a5ba..1f1069b70e45a 100644 --- a/drivers/pci/dwc/pci-imx6.c +++ b/drivers/pci/dwc/pci-imx6.c @@ -83,8 +83,6 @@ struct imx6_pcie { #define PCIE_PL_PFLR_FORCE_LINK (1 << 15) #define PCIE_PHY_DEBUG_R0 (PL_OFFSET + 0x28) #define PCIE_PHY_DEBUG_R1 (PL_OFFSET + 0x2c) -#define PCIE_PHY_DEBUG_R1_XMLH_LINK_IN_TRAINING (1 << 29) -#define PCIE_PHY_DEBUG_R1_XMLH_LINK_UP (1 << 4) #define PCIE_PHY_CTRL (PL_OFFSET + 0x114) #define PCIE_PHY_CTRL_DATA_LOC 0 @@ -653,12 +651,6 @@ static int imx6_pcie_host_init(struct pcie_port *pp) return 0; } -static int imx6_pcie_link_up(struct dw_pcie *pci) -{ - return dw_pcie_readl_dbi(pci, PCIE_PHY_DEBUG_R1) & - PCIE_PHY_DEBUG_R1_XMLH_LINK_UP; -} - static const struct dw_pcie_host_ops imx6_pcie_host_ops = { .host_init = imx6_pcie_host_init, }; @@ -701,7 +693,7 @@ static int imx6_add_pcie_port(struct imx6_pcie *imx6_pcie, } static const struct dw_pcie_ops dw_pcie_ops = { - .link_up = imx6_pcie_link_up, + /* No special ops needed, but pcie-designware still expects this struct */ }; static int imx6_pcie_probe(struct platform_device *pdev) -- GitLab From de21975e71fe3fbb96b42fe6155d8a74fc74e4ca Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Tue, 20 Nov 2018 11:52:15 -0600 Subject: [PATCH 2134/2269] objtool: Fix double-free in .cold detection error path [ Upstream commit 0b9301fb632f7111a3293a30cc5b20f1b82ed08d ] If read_symbols() fails during second list traversal (the one dealing with ".cold" subfunctions) it frees the symbol, but never deletes it from the list/hash_table resulting in symbol being freed again in elf_close(). Fix it by just returning an error, leaving cleanup to elf_close(). Signed-off-by: Artem Savkov Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 13810435b9a7 ("objtool: Support GCC 8's cold subfunctions") Link: http://lkml.kernel.org/r/beac5a9b7da9e8be90223459dcbe07766ae437dd.1542736240.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- tools/objtool/elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 0d1acb704f641..3616d626991e9 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -312,7 +312,7 @@ static int read_symbols(struct elf *elf) if (!pfunc) { WARN("%s(): can't find parent function", sym->name); - goto err; + return -1; } sym->pfunc = pfunc; -- GitLab From 5ce4164fff6735d7f26b3e465deb7133cea558e9 Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Tue, 20 Nov 2018 11:52:16 -0600 Subject: [PATCH 2135/2269] objtool: Fix segfault in .cold detection with -ffunction-sections [ Upstream commit 22566c1603030f0a036ad564634b064ad1a55db2 ] Because find_symbol_by_name() traverses the same lists as read_symbols(), changing sym->name in place without copying it affects the result of find_symbol_by_name(). In the case where a ".cold" function precedes its parent in sec->symbol_list, it can result in a function being considered a parent of itself. This leads to function length being set to 0 and other consequent side-effects including a segfault in add_switch_table(). The effects of this bug are only visible when building with -ffunction-sections in KCFLAGS. Fix by copying the search string instead of modifying it in place. Signed-off-by: Artem Savkov Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 13810435b9a7 ("objtool: Support GCC 8's cold subfunctions") Link: http://lkml.kernel.org/r/910abd6b5a4945130fd44f787c24e07b9e07c8da.1542736240.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- tools/objtool/elf.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 3616d626991e9..dd4ed7c3c0626 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -31,6 +31,8 @@ #include "elf.h" #include "warn.h" +#define MAX_NAME_LEN 128 + struct section *find_section_by_name(struct elf *elf, const char *name) { struct section *sec; @@ -298,6 +300,8 @@ static int read_symbols(struct elf *elf) /* Create parent/child links for any cold subfunctions */ list_for_each_entry(sec, &elf->sections, list) { list_for_each_entry(sym, &sec->symbol_list, list) { + char pname[MAX_NAME_LEN + 1]; + size_t pnamelen; if (sym->type != STT_FUNC) continue; sym->pfunc = sym->cfunc = sym; @@ -305,9 +309,16 @@ static int read_symbols(struct elf *elf) if (!coldstr) continue; - coldstr[0] = '\0'; - pfunc = find_symbol_by_name(elf, sym->name); - coldstr[0] = '.'; + pnamelen = coldstr - sym->name; + if (pnamelen > MAX_NAME_LEN) { + WARN("%s(): parent function name exceeds maximum length of %d characters", + sym->name, MAX_NAME_LEN); + return -1; + } + + strncpy(pname, sym->name, pnamelen); + pname[pnamelen] = '\0'; + pfunc = find_symbol_by_name(elf, pname); if (!pfunc) { WARN("%s(): can't find parent function", -- GitLab From b254f11cc9304b757357460643003ea7087e6c5c Mon Sep 17 00:00:00 2001 From: Romain Izard Date: Tue, 20 Nov 2018 17:57:37 +0100 Subject: [PATCH 2136/2269] ARM: dts: at91: sama5d2: use the divided clock for SMC [ Upstream commit 4ab7ca092c3c7ac8b16aa28eba723a8868f82f14 ] The SAMA5D2 is different from SAMA5D3 and SAMA5D4, as there are two different clocks for the peripherals in the SoC. The Static Memory controller is connected to the divided master clock. Unfortunately, the device tree does not correctly show this and uses the master clock directly. This clock is then used by the code for the NAND controller to calculate the timings for the controller, and we end up with slow NAND Flash access. Fix the device tree, and the performance of Flash access is improved. Signed-off-by: Romain Izard Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- arch/arm/boot/dts/sama5d2.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index b1a26b42d1904..a8e4b89097d9c 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -308,7 +308,7 @@ 0x1 0x0 0x60000000 0x10000000 0x2 0x0 0x70000000 0x10000000 0x3 0x0 0x80000000 0x10000000>; - clocks = <&mck>; + clocks = <&h32ck>; status = "disabled"; nand_controller: nand-controller { -- GitLab From 67080eb258b4329ff55eefcd11bdee295ce905e5 Mon Sep 17 00:00:00 2001 From: Robbie Ko Date: Wed, 14 Nov 2018 18:32:37 +0000 Subject: [PATCH 2137/2269] Btrfs: send, fix infinite loop due to directory rename dependencies [ Upstream commit a4390aee72713d9e73f1132bcdeb17d72fbbf974 ] When doing an incremental send, due to the need of delaying directory move (rename) operations we can end up in infinite loop at apply_children_dir_moves(). An example scenario that triggers this problem is described below, where directory names correspond to the numbers of their respective inodes. Parent snapshot: . |--- 261/ |--- 271/ |--- 266/ |--- 259/ |--- 260/ | |--- 267 | |--- 264/ | |--- 258/ | |--- 257/ | |--- 265/ |--- 268/ |--- 269/ | |--- 262/ | |--- 270/ |--- 272/ | |--- 263/ | |--- 275/ | |--- 274/ |--- 273/ Send snapshot: . |-- 275/ |-- 274/ |-- 273/ |-- 262/ |-- 269/ |-- 258/ |-- 271/ |-- 268/ |-- 267/ |-- 270/ |-- 259/ | |-- 265/ | |-- 272/ |-- 257/ |-- 260/ |-- 264/ |-- 263/ |-- 261/ |-- 266/ When processing inode 257 we delay its move (rename) operation because its new parent in the send snapshot, inode 272, was not yet processed. Then when processing inode 272, we delay the move operation for that inode because inode 274 is its ancestor in the send snapshot. Finally we delay the move operation for inode 274 when processing it because inode 275 is its new parent in the send snapshot and was not yet moved. When finishing processing inode 275, we start to do the move operations that were previously delayed (at apply_children_dir_moves()), resulting in the following iterations: 1) We issue the move operation for inode 274; 2) Because inode 262 depended on the move operation of inode 274 (it was delayed because 274 is its ancestor in the send snapshot), we issue the move operation for inode 262; 3) We issue the move operation for inode 272, because it was delayed by inode 274 too (ancestor of 272 in the send snapshot); 4) We issue the move operation for inode 269 (it was delayed by 262); 5) We issue the move operation for inode 257 (it was delayed by 272); 6) We issue the move operation for inode 260 (it was delayed by 272); 7) We issue the move operation for inode 258 (it was delayed by 269); 8) We issue the move operation for inode 264 (it was delayed by 257); 9) We issue the move operation for inode 271 (it was delayed by 258); 10) We issue the move operation for inode 263 (it was delayed by 264); 11) We issue the move operation for inode 268 (it was delayed by 271); 12) We verify if we can issue the move operation for inode 270 (it was delayed by 271). We detect a path loop in the current state, because inode 267 needs to be moved first before we can issue the move operation for inode 270. So we delay again the move operation for inode 270, this time we will attempt to do it after inode 267 is moved; 13) We issue the move operation for inode 261 (it was delayed by 263); 14) We verify if we can issue the move operation for inode 266 (it was delayed by 263). We detect a path loop in the current state, because inode 270 needs to be moved first before we can issue the move operation for inode 266. So we delay again the move operation for inode 266, this time we will attempt to do it after inode 270 is moved (its move operation was delayed in step 12); 15) We issue the move operation for inode 267 (it was delayed by 268); 16) We verify if we can issue the move operation for inode 266 (it was delayed by 270). We detect a path loop in the current state, because inode 270 needs to be moved first before we can issue the move operation for inode 266. So we delay again the move operation for inode 266, this time we will attempt to do it after inode 270 is moved (its move operation was delayed in step 12). So here we added again the same delayed move operation that we added in step 14; 17) We attempt again to see if we can issue the move operation for inode 266, and as in step 16, we realize we can not due to a path loop in the current state due to a dependency on inode 270. Again we delay inode's 266 rename to happen after inode's 270 move operation, adding the same dependency to the empty stack that we did in steps 14 and 16. The next iteration will pick the same move dependency on the stack (the only entry) and realize again there is still a path loop and then again the same dependency to the stack, over and over, resulting in an infinite loop. So fix this by preventing adding the same move dependency entries to the stack by removing each pending move record from the red black tree of pending moves. This way the next call to get_pending_dir_moves() will not return anything for the current parent inode. A test case for fstests, with this reproducer, follows soon. Signed-off-by: Robbie Ko Reviewed-by: Filipe Manana [Wrote changelog with example and more clear explanation] Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/send.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index baf5a4cd7ffcf..3f22af96d63bd 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -3354,7 +3354,8 @@ static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m) kfree(m); } -static void tail_append_pending_moves(struct pending_dir_move *moves, +static void tail_append_pending_moves(struct send_ctx *sctx, + struct pending_dir_move *moves, struct list_head *stack) { if (list_empty(&moves->list)) { @@ -3365,6 +3366,10 @@ static void tail_append_pending_moves(struct pending_dir_move *moves, list_add_tail(&moves->list, stack); list_splice_tail(&list, stack); } + if (!RB_EMPTY_NODE(&moves->node)) { + rb_erase(&moves->node, &sctx->pending_dir_moves); + RB_CLEAR_NODE(&moves->node); + } } static int apply_children_dir_moves(struct send_ctx *sctx) @@ -3379,7 +3384,7 @@ static int apply_children_dir_moves(struct send_ctx *sctx) return 0; INIT_LIST_HEAD(&stack); - tail_append_pending_moves(pm, &stack); + tail_append_pending_moves(sctx, pm, &stack); while (!list_empty(&stack)) { pm = list_first_entry(&stack, struct pending_dir_move, list); @@ -3390,7 +3395,7 @@ static int apply_children_dir_moves(struct send_ctx *sctx) goto out; pm = get_pending_dir_moves(sctx, parent_ino); if (pm) - tail_append_pending_moves(pm, &stack); + tail_append_pending_moves(sctx, pm, &stack); } return 0; -- GitLab From 9692eefbee777b18e22998f0dc2852cf8f34174e Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Mon, 5 Nov 2018 08:07:37 +0200 Subject: [PATCH 2138/2269] RDMA/mlx5: Fix fence type for IB_WR_LOCAL_INV WR [ Upstream commit 074fca3a18e7e1e0d4d7dcc9d7badc43b90232f4 ] Currently, for IB_WR_LOCAL_INV WR, when the next fence is None, the current fence will be SMALL instead of Normal Fence. Without this patch krping doesn't work on CX-5 devices and throws following error: The error messages are from CX5 driver are: (from server side) [ 710.434014] mlx5_0:dump_cqe:278:(pid 2712): dump error cqe [ 710.434016] 00000000 00000000 00000000 00000000 [ 710.434016] 00000000 00000000 00000000 00000000 [ 710.434017] 00000000 00000000 00000000 00000000 [ 710.434018] 00000000 93003204 100000b8 000524d2 [ 710.434019] krping: cq completion failed with wr_id 0 status 4 opcode 128 vender_err 32 Fixed the logic to set the correct fence type. Fixes: 6e8484c5cf07 ("RDMA/mlx5: set UMR wqe fence according to HCA cap") Signed-off-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/qp.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index dfc1900551673..964c3a0bbf166 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3928,17 +3928,18 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - if (wr->opcode == IB_WR_LOCAL_INV || - wr->opcode == IB_WR_REG_MR) { + if (wr->opcode == IB_WR_REG_MR) { fence = dev->umr_fence; next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; - } else if (wr->send_flags & IB_SEND_FENCE) { - if (qp->next_fence) - fence = MLX5_FENCE_MODE_SMALL_AND_FENCE; - else - fence = MLX5_FENCE_MODE_FENCE; - } else { - fence = qp->next_fence; + } else { + if (wr->send_flags & IB_SEND_FENCE) { + if (qp->next_fence) + fence = MLX5_FENCE_MODE_SMALL_AND_FENCE; + else + fence = MLX5_FENCE_MODE_FENCE; + } else { + fence = qp->next_fence; + } } switch (ibqp->qp_type) { -- GitLab From a625d3e3ce05d05f8f2e1e5d4546d12c94feec4d Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 15 Nov 2018 09:49:38 -0800 Subject: [PATCH 2139/2269] RDMA/rdmavt: Fix rvt_create_ah function signature [ Upstream commit 4f32fb921b153ae9ea280e02a3e91509fffc03d3 ] rdmavt uses a crazy system that looses the type checking when assinging functions to struct ib_device function pointers. Because of this the signature to this function was not changed when the below commit revised things. Fix the signature so we are not calling a function pointer with a mismatched signature. Fixes: 477864c8fcd9 ("IB/core: Let create_ah return extended response to user") Signed-off-by: Kamal Heib Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rdmavt/ah.c | 4 +++- drivers/infiniband/sw/rdmavt/ah.h | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index ba3639a0d77cf..48ea5b8207f0f 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -91,13 +91,15 @@ EXPORT_SYMBOL(rvt_check_ah); * rvt_create_ah - create an address handle * @pd: the protection domain * @ah_attr: the attributes of the AH + * @udata: pointer to user's input output buffer information. * * This may be called from interrupt context. * * Return: newly allocated ah */ struct ib_ah *rvt_create_ah(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr) + struct rdma_ah_attr *ah_attr, + struct ib_udata *udata) { struct rvt_ah *ah; struct rvt_dev_info *dev = ib_to_rvt(pd->device); diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h index 16105af991890..25271b48a6830 100644 --- a/drivers/infiniband/sw/rdmavt/ah.h +++ b/drivers/infiniband/sw/rdmavt/ah.h @@ -51,7 +51,8 @@ #include struct ib_ah *rvt_create_ah(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr); + struct rdma_ah_attr *ah_attr, + struct ib_udata *udata); int rvt_destroy_ah(struct ib_ah *ibah); int rvt_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); int rvt_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -- GitLab From 4a3fa2a5fed133fd2f26e5f37acdb23ad6e0900c Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 14 Nov 2018 13:06:21 +0200 Subject: [PATCH 2140/2269] ASoC: omap-mcbsp: Fix latency value calculation for pm_qos [ Upstream commit dd2f52d8991af9fe0928d59ec502ba52be7bc38d ] The latency number is in usec for the pm_qos. Correct the calculation to give us the time in usec Signed-off-by: Peter Ujfalusi Acked-by: Jarkko Nikula Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/omap/omap-mcbsp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/omap/omap-mcbsp.c b/sound/soc/omap/omap-mcbsp.c index 6b40bdbef3369..47c2ed5ca492d 100644 --- a/sound/soc/omap/omap-mcbsp.c +++ b/sound/soc/omap/omap-mcbsp.c @@ -308,9 +308,9 @@ static int omap_mcbsp_dai_hw_params(struct snd_pcm_substream *substream, pkt_size = channels; } - latency = ((((buffer_size - pkt_size) / channels) * 1000) - / (params->rate_num / params->rate_den)); - + latency = (buffer_size - pkt_size) / channels; + latency = latency * USEC_PER_SEC / + (params->rate_num / params->rate_den); mcbsp->latency[substream->stream] = latency; omap_mcbsp_set_threshold(substream, pkt_size); -- GitLab From 997a8d6c47fae788131e3b113841169d89ecf803 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 14 Nov 2018 13:06:22 +0200 Subject: [PATCH 2141/2269] ASoC: omap-mcpdm: Add pm_qos handling to avoid under/overruns with CPU_IDLE [ Upstream commit 373a500e34aea97971c9d71e45edad458d3da98f ] We need to block sleep states which would require longer time to leave than the time the DMA must react to the DMA request in order to keep the FIFO serviced without under of overrun. Signed-off-by: Peter Ujfalusi Acked-by: Jarkko Nikula Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/omap/omap-mcpdm.c | 43 ++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/sound/soc/omap/omap-mcpdm.c b/sound/soc/omap/omap-mcpdm.c index 64609c77a79d1..44ffeb71cd1d2 100644 --- a/sound/soc/omap/omap-mcpdm.c +++ b/sound/soc/omap/omap-mcpdm.c @@ -54,6 +54,8 @@ struct omap_mcpdm { unsigned long phys_base; void __iomem *io_base; int irq; + struct pm_qos_request pm_qos_req; + int latency[2]; struct mutex mutex; @@ -277,6 +279,9 @@ static void omap_mcpdm_dai_shutdown(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai); + int tx = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK); + int stream1 = tx ? SNDRV_PCM_STREAM_PLAYBACK : SNDRV_PCM_STREAM_CAPTURE; + int stream2 = tx ? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK; mutex_lock(&mcpdm->mutex); @@ -289,6 +294,14 @@ static void omap_mcpdm_dai_shutdown(struct snd_pcm_substream *substream, } } + if (mcpdm->latency[stream2]) + pm_qos_update_request(&mcpdm->pm_qos_req, + mcpdm->latency[stream2]); + else if (mcpdm->latency[stream1]) + pm_qos_remove_request(&mcpdm->pm_qos_req); + + mcpdm->latency[stream1] = 0; + mutex_unlock(&mcpdm->mutex); } @@ -300,7 +313,7 @@ static int omap_mcpdm_dai_hw_params(struct snd_pcm_substream *substream, int stream = substream->stream; struct snd_dmaengine_dai_dma_data *dma_data; u32 threshold; - int channels; + int channels, latency; int link_mask = 0; channels = params_channels(params); @@ -340,14 +353,25 @@ static int omap_mcpdm_dai_hw_params(struct snd_pcm_substream *substream, dma_data->maxburst = (MCPDM_DN_THRES_MAX - threshold) * channels; + latency = threshold; } else { /* If playback is not running assume a stereo stream to come */ if (!mcpdm->config[!stream].link_mask) mcpdm->config[!stream].link_mask = (0x3 << 3); dma_data->maxburst = threshold * channels; + latency = (MCPDM_DN_THRES_MAX - threshold); } + /* + * The DMA must act to a DMA request within latency time (usec) to avoid + * under/overflow + */ + mcpdm->latency[stream] = latency * USEC_PER_SEC / params_rate(params); + + if (!mcpdm->latency[stream]) + mcpdm->latency[stream] = 10; + /* Check if we need to restart McPDM with this stream */ if (mcpdm->config[stream].link_mask && mcpdm->config[stream].link_mask != link_mask) @@ -362,6 +386,20 @@ static int omap_mcpdm_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai); + struct pm_qos_request *pm_qos_req = &mcpdm->pm_qos_req; + int tx = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK); + int stream1 = tx ? SNDRV_PCM_STREAM_PLAYBACK : SNDRV_PCM_STREAM_CAPTURE; + int stream2 = tx ? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK; + int latency = mcpdm->latency[stream2]; + + /* Prevent omap hardware from hitting off between FIFO fills */ + if (!latency || mcpdm->latency[stream1] < latency) + latency = mcpdm->latency[stream1]; + + if (pm_qos_request_active(pm_qos_req)) + pm_qos_update_request(pm_qos_req, latency); + else if (latency) + pm_qos_add_request(pm_qos_req, PM_QOS_CPU_DMA_LATENCY, latency); if (!omap_mcpdm_active(mcpdm)) { omap_mcpdm_start(mcpdm); @@ -423,6 +461,9 @@ static int omap_mcpdm_remove(struct snd_soc_dai *dai) free_irq(mcpdm->irq, (void *)mcpdm); pm_runtime_disable(mcpdm->dev); + if (pm_qos_request_active(&mcpdm->pm_qos_req)) + pm_qos_remove_request(&mcpdm->pm_qos_req); + return 0; } -- GitLab From bdafd18e24c8fcf704950dc48849e271cceee19f Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 14 Nov 2018 13:06:23 +0200 Subject: [PATCH 2142/2269] ASoC: omap-dmic: Add pm_qos handling to avoid overruns with CPU_IDLE [ Upstream commit ffdcc3638c58d55a6fa68b6e5dfd4fb4109652eb ] We need to block sleep states which would require longer time to leave than the time the DMA must react to the DMA request in order to keep the FIFO serviced without overrun. Signed-off-by: Peter Ujfalusi Acked-by: Jarkko Nikula Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/omap/omap-dmic.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/soc/omap/omap-dmic.c b/sound/soc/omap/omap-dmic.c index 09db2aec12a30..776e809a8aab0 100644 --- a/sound/soc/omap/omap-dmic.c +++ b/sound/soc/omap/omap-dmic.c @@ -48,6 +48,8 @@ struct omap_dmic { struct device *dev; void __iomem *io_base; struct clk *fclk; + struct pm_qos_request pm_qos_req; + int latency; int fclk_freq; int out_freq; int clk_div; @@ -124,6 +126,8 @@ static void omap_dmic_dai_shutdown(struct snd_pcm_substream *substream, mutex_lock(&dmic->mutex); + pm_qos_remove_request(&dmic->pm_qos_req); + if (!dai->active) dmic->active = 0; @@ -226,6 +230,8 @@ static int omap_dmic_dai_hw_params(struct snd_pcm_substream *substream, /* packet size is threshold * channels */ dma_data = snd_soc_dai_get_dma_data(dai, substream); dma_data->maxburst = dmic->threshold * channels; + dmic->latency = (OMAP_DMIC_THRES_MAX - dmic->threshold) * USEC_PER_SEC / + params_rate(params); return 0; } @@ -236,6 +242,9 @@ static int omap_dmic_dai_prepare(struct snd_pcm_substream *substream, struct omap_dmic *dmic = snd_soc_dai_get_drvdata(dai); u32 ctrl; + if (pm_qos_request_active(&dmic->pm_qos_req)) + pm_qos_update_request(&dmic->pm_qos_req, dmic->latency); + /* Configure uplink threshold */ omap_dmic_write(dmic, OMAP_DMIC_FIFO_CTRL_REG, dmic->threshold); -- GitLab From 42cb0b9efc9e18e8b81e9a296082381fc8b70071 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Fri, 23 Nov 2018 15:56:33 +0800 Subject: [PATCH 2143/2269] exportfs: do not read dentry after free [ Upstream commit 2084ac6c505a58f7efdec13eba633c6aaa085ca5 ] The function dentry_connected calls dput(dentry) to drop the previously acquired reference to dentry. In this case, dentry can be released. After that, IS_ROOT(dentry) checks the condition (dentry == dentry->d_parent), which may result in a use-after-free bug. This patch directly compares dentry with its parent obtained before dropping the reference. Fixes: a056cc8934c("exportfs: stop retrying once we race with rename/remove") Signed-off-by: Pan Bian Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- fs/exportfs/expfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 329a5d1038461..c22cc9d2a5c9a 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -77,7 +77,7 @@ static bool dentry_connected(struct dentry *dentry) struct dentry *parent = dget_parent(dentry); dput(dentry); - if (IS_ROOT(dentry)) { + if (dentry == parent) { dput(parent); return false; } -- GitLab From 9209043b29881a7ede5e34610c4feadd29e59e40 Mon Sep 17 00:00:00 2001 From: Martynas Pumputis Date: Fri, 23 Nov 2018 17:43:26 +0100 Subject: [PATCH 2144/2269] bpf: fix check of allowed specifiers in bpf_trace_printk [ Upstream commit 1efb6ee3edea57f57f9fb05dba8dcb3f7333f61f ] A format string consisting of "%p" or "%s" followed by an invalid specifier (e.g. "%p%\n" or "%s%") could pass the check which would make format_decode (lib/vsprintf.c) to warn. Fixes: 9c959c863f82 ("tracing: Allow BPF programs to call bpf_trace_printk()") Reported-by: syzbot+1ec5c5ec949c4adaa0c4@syzkaller.appspotmail.com Signed-off-by: Martynas Pumputis Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- kernel/trace/bpf_trace.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 6350f64d5aa40..f9dd8fd055a6e 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -161,11 +161,13 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, i++; } else if (fmt[i] == 'p' || fmt[i] == 's') { mod[fmt_cnt]++; - i++; - if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0) + /* disallow any further format extensions */ + if (fmt[i + 1] != 0 && + !isspace(fmt[i + 1]) && + !ispunct(fmt[i + 1])) return -EINVAL; fmt_cnt++; - if (fmt[i - 1] == 's') { + if (fmt[i] == 's') { if (str_seen) /* allow only one '%s' per fmt string */ return -EINVAL; -- GitLab From 57dea3802f389e38b4e3f4d439e0540f00359205 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 15 Nov 2018 15:14:30 +0800 Subject: [PATCH 2145/2269] ipvs: call ip_vs_dst_notifier earlier than ipv6_dev_notf [ Upstream commit 2a31e4bd9ad255ee40809b5c798c4b1c2b09703b ] ip_vs_dst_event is supposed to clean up all dst used in ipvs' destinations when a net dev is going down. But it works only when the dst's dev is the same as the dev from the event. Now with the same priority but late registration, ip_vs_dst_notifier is always called later than ipv6_dev_notf where the dst's dev is set to lo for NETDEV_DOWN event. As the dst's dev lo is not the same as the dev from the event in ip_vs_dst_event, ip_vs_dst_notifier doesn't actually work. Also as these dst have to wait for dest_trash_timer to clean them up. It would cause some non-permanent kernel warnings: unregister_netdevice: waiting for br0 to become free. Usage count = 3 To fix it, call ip_vs_dst_notifier earlier than ipv6_dev_notf by increasing its priority to ADDRCONF_NOTIFY_PRIORITY + 5. Note that for ipv4 route fib_netdev_notifier doesn't set dst's dev to lo in NETDEV_DOWN event, so this fix is only needed when IP_VS_IPV6 is defined. Fixes: 7a4f0761fce3 ("IPVS: init and cleanup restructuring") Reported-by: Li Shuang Signed-off-by: Xin Long Acked-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipvs/ip_vs_ctl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 327ebe786eebd..2f45c3ce77ef2 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -4012,6 +4012,9 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) static struct notifier_block ip_vs_dst_notifier = { .notifier_call = ip_vs_dst_event, +#ifdef CONFIG_IP_VS_IPV6 + .priority = ADDRCONF_NOTIFY_PRIORITY + 5, +#endif }; int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) -- GitLab From 11f8633c5a334850e9171f4808f20f5a2039060e Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 25 Nov 2018 00:17:04 +0200 Subject: [PATCH 2146/2269] USB: omap_udc: use devm_request_irq() [ Upstream commit 286afdde1640d8ea8916a0f05e811441fbbf4b9d ] The current code fails to release the third irq on the error path (observed by reading the code), and we get also multiple WARNs with failing gadget drivers due to duplicate IRQ releases. Fix by using devm_request_irq(). Signed-off-by: Aaro Koskinen Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/udc/omap_udc.c | 37 +++++++++---------------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c index f05ba6825bfe6..e515c85ef0c59 100644 --- a/drivers/usb/gadget/udc/omap_udc.c +++ b/drivers/usb/gadget/udc/omap_udc.c @@ -2886,8 +2886,8 @@ bad_on_1710: udc->clr_halt = UDC_RESET_EP; /* USB general purpose IRQ: ep0, state changes, dma, etc */ - status = request_irq(pdev->resource[1].start, omap_udc_irq, - 0, driver_name, udc); + status = devm_request_irq(&pdev->dev, pdev->resource[1].start, + omap_udc_irq, 0, driver_name, udc); if (status != 0) { ERR("can't get irq %d, err %d\n", (int) pdev->resource[1].start, status); @@ -2895,20 +2895,20 @@ bad_on_1710: } /* USB "non-iso" IRQ (PIO for all but ep0) */ - status = request_irq(pdev->resource[2].start, omap_udc_pio_irq, - 0, "omap_udc pio", udc); + status = devm_request_irq(&pdev->dev, pdev->resource[2].start, + omap_udc_pio_irq, 0, "omap_udc pio", udc); if (status != 0) { ERR("can't get irq %d, err %d\n", (int) pdev->resource[2].start, status); - goto cleanup2; + goto cleanup1; } #ifdef USE_ISO - status = request_irq(pdev->resource[3].start, omap_udc_iso_irq, - 0, "omap_udc iso", udc); + status = devm_request_irq(&pdev->dev, pdev->resource[3].start, + omap_udc_iso_irq, 0, "omap_udc iso", udc); if (status != 0) { ERR("can't get irq %d, err %d\n", (int) pdev->resource[3].start, status); - goto cleanup3; + goto cleanup1; } #endif if (cpu_is_omap16xx() || cpu_is_omap7xx()) { @@ -2921,22 +2921,11 @@ bad_on_1710: create_proc_file(); status = usb_add_gadget_udc_release(&pdev->dev, &udc->gadget, omap_udc_release); - if (status) - goto cleanup4; - - return 0; + if (!status) + return 0; -cleanup4: remove_proc_file(); -#ifdef USE_ISO -cleanup3: - free_irq(pdev->resource[2].start, udc); -#endif - -cleanup2: - free_irq(pdev->resource[1].start, udc); - cleanup1: kfree(udc); udc = NULL; @@ -2980,12 +2969,6 @@ static int omap_udc_remove(struct platform_device *pdev) remove_proc_file(); -#ifdef USE_ISO - free_irq(pdev->resource[3].start, udc); -#endif - free_irq(pdev->resource[2].start, udc); - free_irq(pdev->resource[1].start, udc); - if (udc->dc_clk) { if (udc->clk_requested) omap_udc_enable_clock(0); -- GitLab From 09eb2b90d9e00acf0aa314f59b69de3b0037bb21 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 25 Nov 2018 00:17:05 +0200 Subject: [PATCH 2147/2269] USB: omap_udc: fix crashes on probe error and module removal [ Upstream commit 99f700366fcea1aa2fa3c49c99f371670c3c62f8 ] We currently crash if usb_add_gadget_udc_release() fails, since the udc->done is not initialized until in the remove function. Furthermore, on module removal the udc data is accessed although the release function is already triggered by usb_del_gadget_udc() early in the function. Fix by rewriting the release and remove functions, basically moving all the cleanup into the release function, and doing the completion only in the module removal case. The patch fixes omap_udc module probe with a failing gadged, and also allows the removal of omap_udc. Tested by running "modprobe omap_udc; modprobe -r omap_udc" in a loop. Signed-off-by: Aaro Koskinen Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/udc/omap_udc.c | 50 ++++++++++++------------------- 1 file changed, 19 insertions(+), 31 deletions(-) diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c index e515c85ef0c59..d45dc14ef0a2e 100644 --- a/drivers/usb/gadget/udc/omap_udc.c +++ b/drivers/usb/gadget/udc/omap_udc.c @@ -2612,9 +2612,22 @@ omap_ep_setup(char *name, u8 addr, u8 type, static void omap_udc_release(struct device *dev) { - complete(udc->done); + pullup_disable(udc); + if (!IS_ERR_OR_NULL(udc->transceiver)) { + usb_put_phy(udc->transceiver); + udc->transceiver = NULL; + } + omap_writew(0, UDC_SYSCON1); + remove_proc_file(); + if (udc->dc_clk) { + if (udc->clk_requested) + omap_udc_enable_clock(0); + clk_put(udc->hhc_clk); + clk_put(udc->dc_clk); + } + if (udc->done) + complete(udc->done); kfree(udc); - udc = NULL; } static int @@ -2919,12 +2932,8 @@ bad_on_1710: } create_proc_file(); - status = usb_add_gadget_udc_release(&pdev->dev, &udc->gadget, - omap_udc_release); - if (!status) - return 0; - - remove_proc_file(); + return usb_add_gadget_udc_release(&pdev->dev, &udc->gadget, + omap_udc_release); cleanup1: kfree(udc); @@ -2951,36 +2960,15 @@ static int omap_udc_remove(struct platform_device *pdev) { DECLARE_COMPLETION_ONSTACK(done); - if (!udc) - return -ENODEV; - - usb_del_gadget_udc(&udc->gadget); - if (udc->driver) - return -EBUSY; - udc->done = &done; - pullup_disable(udc); - if (!IS_ERR_OR_NULL(udc->transceiver)) { - usb_put_phy(udc->transceiver); - udc->transceiver = NULL; - } - omap_writew(0, UDC_SYSCON1); - - remove_proc_file(); + usb_del_gadget_udc(&udc->gadget); - if (udc->dc_clk) { - if (udc->clk_requested) - omap_udc_enable_clock(0); - clk_put(udc->hhc_clk); - clk_put(udc->dc_clk); - } + wait_for_completion(&done); release_mem_region(pdev->resource[0].start, pdev->resource[0].end - pdev->resource[0].start + 1); - wait_for_completion(&done); - return 0; } -- GitLab From e68f6cb901194b9aa662dbe65b69486c32e58615 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 25 Nov 2018 00:17:06 +0200 Subject: [PATCH 2148/2269] USB: omap_udc: fix omap_udc_start() on 15xx machines [ Upstream commit 6ca6695f576b8453fe68865e84d25946d63b10ad ] On OMAP 15xx machines there are no transceivers, and omap_udc_start() always fails as it forgot to adjust the default return value. Signed-off-by: Aaro Koskinen Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/udc/omap_udc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c index d45dc14ef0a2e..9060b3af27ff5 100644 --- a/drivers/usb/gadget/udc/omap_udc.c +++ b/drivers/usb/gadget/udc/omap_udc.c @@ -2045,7 +2045,7 @@ static inline int machine_without_vbus_sense(void) static int omap_udc_start(struct usb_gadget *g, struct usb_gadget_driver *driver) { - int status = -ENODEV; + int status; struct omap_ep *ep; unsigned long flags; @@ -2083,6 +2083,7 @@ static int omap_udc_start(struct usb_gadget *g, goto done; } } else { + status = 0; if (can_pullup(udc)) pullup_enable(udc); else -- GitLab From 72c982a3b1dddf68c3d5e259447994fdbf23b75a Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 25 Nov 2018 00:17:07 +0200 Subject: [PATCH 2149/2269] USB: omap_udc: fix USB gadget functionality on Palm Tungsten E [ Upstream commit 2c2322fbcab8102b8cadc09d66714700a2da42c2 ] On Palm TE nothing happens when you try to use gadget drivers and plug the USB cable. Fix by adding the board to the vbus sense quirk list. Signed-off-by: Aaro Koskinen Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/udc/omap_udc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c index 9060b3af27ff5..c8facc8aa87ec 100644 --- a/drivers/usb/gadget/udc/omap_udc.c +++ b/drivers/usb/gadget/udc/omap_udc.c @@ -2037,6 +2037,7 @@ static inline int machine_without_vbus_sense(void) { return machine_is_omap_innovator() || machine_is_omap_osk() + || machine_is_omap_palmte() || machine_is_sx1() /* No known omap7xx boards with vbus sense */ || cpu_is_omap7xx(); -- GitLab From ca993dead6b5ca2181185064502ea6b22797a857 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 25 Nov 2018 00:17:08 +0200 Subject: [PATCH 2150/2269] USB: omap_udc: fix rejection of out transfers when DMA is used [ Upstream commit 069caf5950dfa75d0526cd89c439ff9d9d3136d8 ] Commit 387f869d2579 ("usb: gadget: u_ether: conditionally align transfer size") started aligning transfer size only if requested, breaking omap_udc DMA mode. Set quirk_ep_out_aligned_size to restore the old behaviour. Fixes: 387f869d2579 ("usb: gadget: u_ether: conditionally align transfer size") Signed-off-by: Aaro Koskinen Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/udc/omap_udc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c index c8facc8aa87ec..ee0b87a0773cd 100644 --- a/drivers/usb/gadget/udc/omap_udc.c +++ b/drivers/usb/gadget/udc/omap_udc.c @@ -2661,6 +2661,7 @@ omap_udc_setup(struct platform_device *odev, struct usb_phy *xceiv) udc->gadget.speed = USB_SPEED_UNKNOWN; udc->gadget.max_speed = USB_SPEED_FULL; udc->gadget.name = driver_name; + udc->gadget.quirk_ep_out_aligned_size = 1; udc->transceiver = xceiv; /* ep0 is special; put it right after the SETUP buffer */ -- GitLab From f47b8824a83057e9f63d39df86332ffeb703311f Mon Sep 17 00:00:00 2001 From: Christian Hewitt Date: Wed, 21 Nov 2018 13:39:29 +0400 Subject: [PATCH 2151/2269] drm/meson: add support for 1080p25 mode [ Upstream commit 31e1ab494559fb46de304cc6c2aed1528f94b298 ] This essential mode for PAL users is missing, so add it. Fixes: 335e3713afb87 ("drm/meson: Add support for HDMI venc modes and settings") Signed-off-by: Christian Hewitt Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/1542793169-13008-1-git-send-email-christianshewitt@gmail.com Signed-off-by: Sean Paul Signed-off-by: Sasha Levin --- drivers/gpu/drm/meson/meson_venc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/meson/meson_venc.c b/drivers/gpu/drm/meson/meson_venc.c index 9509017dbded1..d5dfe7045cc68 100644 --- a/drivers/gpu/drm/meson/meson_venc.c +++ b/drivers/gpu/drm/meson/meson_venc.c @@ -714,6 +714,7 @@ struct meson_hdmi_venc_vic_mode { { 5, &meson_hdmi_encp_mode_1080i60 }, { 20, &meson_hdmi_encp_mode_1080i50 }, { 32, &meson_hdmi_encp_mode_1080p24 }, + { 33, &meson_hdmi_encp_mode_1080p50 }, { 34, &meson_hdmi_encp_mode_1080p30 }, { 31, &meson_hdmi_encp_mode_1080p50 }, { 16, &meson_hdmi_encp_mode_1080p60 }, -- GitLab From 97f048033a07b199b98d5e8f10f1022f10d42ed4 Mon Sep 17 00:00:00 2001 From: Alin Nastac Date: Wed, 21 Nov 2018 14:00:30 +0100 Subject: [PATCH 2152/2269] netfilter: ipv6: Preserve link scope traffic original oif [ Upstream commit 508b09046c0f21678652fb66fd1e9959d55591d2 ] When ip6_route_me_harder is invoked, it resets outgoing interface of: - link-local scoped packets sent by neighbor discovery - multicast packets sent by MLD host - multicast packets send by MLD proxy daemon that sets outgoing interface through IPV6_PKTINFO ipi6_ifindex Link-local and multicast packets must keep their original oif after ip6_route_me_harder is called. Signed-off-by: Alin Nastac Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/ipv6/netfilter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 9bf260459f833..1f8b1a433b5d6 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -25,7 +25,8 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb) unsigned int hh_len; struct dst_entry *dst; struct flowi6 fl6 = { - .flowi6_oif = sk ? sk->sk_bound_dev_if : 0, + .flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if : + rt6_need_strict(&iph->daddr) ? skb_dst(skb)->dev->ifindex : 0, .flowi6_mark = skb->mark, .flowi6_uid = sock_net_uid(net, sk), .daddr = iph->daddr, -- GitLab From 8f859b2729e980d511f29a68c3474aa27ead72b7 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Sun, 25 Nov 2018 20:34:26 +0200 Subject: [PATCH 2153/2269] IB/mlx5: Fix page fault handling for MW [ Upstream commit 75b7b86bdb0df37e08e44b6c1f99010967f81944 ] Memory windows are implemented with an indirect MKey, when a page fault event comes for a MW Mkey we need to find the MR at the end of the list of the indirect MKeys by iterating on all items from the first to the last. The offset calculated during this process has to be zeroed after the first iteration or the next iteration will start from a wrong address, resulting incorrect ODP faulting behavior. Fixes: db570d7deafb ("IB/mlx5: Add ODP support to MW") Signed-off-by: Artemy Kovalyov Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/odp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 3d701c7a4c914..1ed94b6c0b0a9 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -723,6 +723,7 @@ next_mr: head = frame; bcnt -= frame->bcnt; + offset = 0; } break; -- GitLab From bb3f86913bdad19cc9a0d0165ddd373ff8c930f7 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Thu, 8 Nov 2018 16:48:36 +0800 Subject: [PATCH 2154/2269] KVM: x86: fix empty-body warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 354cb410d87314e2eda344feea84809e4261570a ] We get the following warnings about empty statements when building with 'W=1': arch/x86/kvm/lapic.c:632:53: warning: suggest braces around empty body in an ‘if’ statement [-Wempty-body] arch/x86/kvm/lapic.c:1907:42: warning: suggest braces around empty body in an ‘if’ statement [-Wempty-body] arch/x86/kvm/lapic.c:1936:65: warning: suggest braces around empty body in an ‘if’ statement [-Wempty-body] arch/x86/kvm/lapic.c:1975:44: warning: suggest braces around empty body in an ‘if’ statement [-Wempty-body] Rework the debug helper macro to get rid of these warnings. Signed-off-by: Yi Wang Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/lapic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 13dfb55b84db5..f7c34184342a5 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -55,7 +55,7 @@ #define PRIo64 "o" /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ -#define apic_debug(fmt, arg...) +#define apic_debug(fmt, arg...) do {} while (0) /* 14 is the version for Xeon and Pentium 8.4.8*/ #define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16)) -- GitLab From 7fdd58deeae47d51ce528ef3bfd1f638f3c399be Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Thu, 8 Nov 2018 11:22:21 +0800 Subject: [PATCH 2155/2269] x86/kvm/vmx: fix old-style function declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1e4329ee2c52692ea42cc677fb2133519718b34a ] The inline keyword which is not at the beginning of the function declaration may trigger the following build warnings, so let's fix it: arch/x86/kvm/vmx.c:1309:1: warning: ‘inline’ is not at beginning of declaration [-Wold-style-declaration] arch/x86/kvm/vmx.c:5947:1: warning: ‘inline’ is not at beginning of declaration [-Wold-style-declaration] arch/x86/kvm/vmx.c:5985:1: warning: ‘inline’ is not at beginning of declaration [-Wold-style-declaration] arch/x86/kvm/vmx.c:6023:1: warning: ‘inline’ is not at beginning of declaration [-Wold-style-declaration] Signed-off-by: Yi Wang Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/vmx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ec588cf4fe953..4353580b659a4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1089,7 +1089,7 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, u16 error_code); static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); -static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, +static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr, int type); static DEFINE_PER_CPU(struct vmcs *, vmxarea); @@ -5227,7 +5227,7 @@ static void free_vpid(int vpid) spin_unlock(&vmx_vpid_lock); } -static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, +static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr, int type) { int f = sizeof(unsigned long); @@ -5262,7 +5262,7 @@ static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bit } } -static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, +static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr, int type) { int f = sizeof(unsigned long); @@ -5297,7 +5297,7 @@ static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitm } } -static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, +static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap, u32 msr, int type, bool value) { if (value) -- GitLab From ff791c9ec159ce294f949505bd0fe77881e67e02 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 26 Nov 2018 15:07:16 +0100 Subject: [PATCH 2156/2269] net: thunderx: fix NULL pointer dereference in nic_remove [ Upstream commit 24a6d2dd263bc910de018c78d1148b3e33b94512 ] Fix a possible NULL pointer dereference in nic_remove routine removing the nicpf module if nic_probe fails. The issue can be triggered with the following reproducer: $rmmod nicvf $rmmod nicpf [ 521.412008] Unable to handle kernel access to user memory outside uaccess routines at virtual address 0000000000000014 [ 521.422777] Mem abort info: [ 521.425561] ESR = 0x96000004 [ 521.428624] Exception class = DABT (current EL), IL = 32 bits [ 521.434535] SET = 0, FnV = 0 [ 521.437579] EA = 0, S1PTW = 0 [ 521.440730] Data abort info: [ 521.443603] ISV = 0, ISS = 0x00000004 [ 521.447431] CM = 0, WnR = 0 [ 521.450417] user pgtable: 4k pages, 48-bit VAs, pgdp = 0000000072a3da42 [ 521.457022] [0000000000000014] pgd=0000000000000000 [ 521.461916] Internal error: Oops: 96000004 [#1] SMP [ 521.511801] Hardware name: GIGABYTE H270-T70/MT70-HD0, BIOS T49 02/02/2018 [ 521.518664] pstate: 80400005 (Nzcv daif +PAN -UAO) [ 521.523451] pc : nic_remove+0x24/0x88 [nicpf] [ 521.527808] lr : pci_device_remove+0x48/0xd8 [ 521.532066] sp : ffff000013433cc0 [ 521.535370] x29: ffff000013433cc0 x28: ffff810f6ac50000 [ 521.540672] x27: 0000000000000000 x26: 0000000000000000 [ 521.545974] x25: 0000000056000000 x24: 0000000000000015 [ 521.551274] x23: ffff8007ff89a110 x22: ffff000001667070 [ 521.556576] x21: ffff8007ffb170b0 x20: ffff8007ffb17000 [ 521.561877] x19: 0000000000000000 x18: 0000000000000025 [ 521.567178] x17: 0000000000000000 x16: 000000000000010ffc33ff98 x8 : 0000000000000000 [ 521.593683] x7 : 0000000000000000 x6 : 0000000000000001 [ 521.598983] x5 : 0000000000000002 x4 : 0000000000000003 [ 521.604284] x3 : ffff8007ffb17184 x2 : ffff8007ffb17184 [ 521.609585] x1 : ffff000001662118 x0 : ffff000008557be0 [ 521.614887] Process rmmod (pid: 1897, stack limit = 0x00000000859535c3) [ 521.621490] Call trace: [ 521.623928] nic_remove+0x24/0x88 [nicpf] [ 521.627927] pci_device_remove+0x48/0xd8 [ 521.631847] device_release_driver_internal+0x1b0/0x248 [ 521.637062] driver_detach+0x50/0xc0 [ 521.640628] bus_remove_driver+0x60/0x100 [ 521.644627] driver_unregister+0x34/0x60 [ 521.648538] pci_unregister_driver+0x24/0xd8 [ 521.652798] nic_cleanup_module+0x14/0x111c [nicpf] [ 521.657672] __arm64_sys_delete_module+0x150/0x218 [ 521.662460] el0_svc_handler+0x94/0x110 [ 521.666287] el0_svc+0x8/0xc [ 521.669160] Code: aa1e03e0 9102c295 d503201f f9404eb3 (b9401660) Fixes: 4863dea3fab0 ("net: Adding support for Cavium ThunderX network controller") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/cavium/thunder/nic_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index fb770b0182d37..d89ec4724efd6 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -1376,6 +1376,9 @@ static void nic_remove(struct pci_dev *pdev) { struct nicpf *nic = pci_get_drvdata(pdev); + if (!nic) + return; + if (nic->flags & NIC_SRIOV_ENABLED) pci_disable_sriov(pdev); -- GitLab From b641ee14cfc1fe4d665e8bca98de1d00ed043ba9 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 19 Nov 2018 16:49:05 +0100 Subject: [PATCH 2157/2269] usb: gadget: u_ether: fix unsafe list iteration [ Upstream commit c9287fa657b3328b4549c0ab39ea7f197a3d6a50 ] list_for_each_entry_safe() is not safe for deleting entries from the list if the spin lock, which protects it, is released and reacquired during the list iteration. Fix this issue by replacing this construction with a simple check if list is empty and removing the first entry in each iteration. This is almost equivalent to a revert of the commit mentioned in the Fixes: tag. This patch fixes following issue: --->8--- Unable to handle kernel NULL pointer dereference at virtual address 00000104 pgd = (ptrval) [00000104] *pgd=00000000 Internal error: Oops: 817 [#1] PREEMPT SMP ARM Modules linked in: CPU: 1 PID: 84 Comm: kworker/1:1 Not tainted 4.20.0-rc2-next-20181114-00009-g8266b35ec404 #1061 Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) Workqueue: events eth_work PC is at rx_fill+0x60/0xac LR is at _raw_spin_lock_irqsave+0x50/0x5c pc : [] lr : [] psr: 80000093 sp : ee7fbee8 ip : 00000100 fp : 00000000 r10: 006000c0 r9 : c10b0ab0 r8 : ee7eb5c0 r7 : ee7eb614 r6 : ee7eb5ec r5 : 000000dc r4 : ee12ac00 r3 : ee12ac24 r2 : 00000200 r1 : 60000013 r0 : ee7eb5ec Flags: Nzcv IRQs off FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5387d Table: 6d5dc04a DAC: 00000051 Process kworker/1:1 (pid: 84, stack limit = 0x(ptrval)) Stack: (0xee7fbee8 to 0xee7fc000) ... [] (rx_fill) from [] (process_one_work+0x200/0x738) [] (process_one_work) from [] (worker_thread+0x2c/0x4c8) [] (worker_thread) from [] (kthread+0x128/0x164) [] (kthread) from [] (ret_from_fork+0x14/0x20) Exception stack(0xee7fbfb0 to 0xee7fbff8) ... ---[ end trace 64480bc835eba7d6 ]--- Fixes: fea14e68ff5e ("usb: gadget: u_ether: use better list accessors") Signed-off-by: Marek Szyprowski Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/u_ether.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index bdbc3fdc7c4ff..3a0e4f5d7b839 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -405,12 +405,12 @@ done: static void rx_fill(struct eth_dev *dev, gfp_t gfp_flags) { struct usb_request *req; - struct usb_request *tmp; unsigned long flags; /* fill unused rxq slots with some skb */ spin_lock_irqsave(&dev->req_lock, flags); - list_for_each_entry_safe(req, tmp, &dev->rx_reqs, list) { + while (!list_empty(&dev->rx_reqs)) { + req = list_first_entry(&dev->rx_reqs, struct usb_request, list); list_del_init(&req->list); spin_unlock_irqrestore(&dev->req_lock, flags); @@ -1125,7 +1125,6 @@ void gether_disconnect(struct gether *link) { struct eth_dev *dev = link->ioport; struct usb_request *req; - struct usb_request *tmp; WARN_ON(!dev); if (!dev) @@ -1142,7 +1141,8 @@ void gether_disconnect(struct gether *link) */ usb_ep_disable(link->in_ep); spin_lock(&dev->req_lock); - list_for_each_entry_safe(req, tmp, &dev->tx_reqs, list) { + while (!list_empty(&dev->tx_reqs)) { + req = list_first_entry(&dev->tx_reqs, struct usb_request, list); list_del(&req->list); spin_unlock(&dev->req_lock); @@ -1154,7 +1154,8 @@ void gether_disconnect(struct gether *link) usb_ep_disable(link->out_ep); spin_lock(&dev->req_lock); - list_for_each_entry_safe(req, tmp, &dev->rx_reqs, list) { + while (!list_empty(&dev->rx_reqs)) { + req = list_first_entry(&dev->rx_reqs, struct usb_request, list); list_del(&req->list); spin_unlock(&dev->req_lock); -- GitLab From 397727e77f2f32af322f850b457c1503acdff220 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Wed, 28 Nov 2018 11:27:28 +0900 Subject: [PATCH 2158/2269] netfilter: nf_tables: deactivate expressions in rule replecement routine [ Upstream commit ca08987885a147643817d02bf260bc4756ce8cd4 ] There is no expression deactivation call from the rule replacement path, hence, chain counter is not decremented. A few steps to reproduce the problem: %nft add table ip filter %nft add chain ip filter c1 %nft add chain ip filter c1 %nft add rule ip filter c1 jump c2 %nft replace rule ip filter c1 handle 3 accept %nft flush ruleset expression means immediate NFT_JUMP to chain c2. Reference count of chain c2 is increased when the rule is added. When rule is deleted or replaced, the reference counter of c2 should be decreased via nft_rule_expr_deactivate() which calls nft_immediate_deactivate(). Splat looks like: [ 214.396453] WARNING: CPU: 1 PID: 21 at net/netfilter/nf_tables_api.c:1432 nf_tables_chain_destroy.isra.38+0x2f9/0x3a0 [nf_tables] [ 214.398983] Modules linked in: nf_tables nfnetlink [ 214.398983] CPU: 1 PID: 21 Comm: kworker/1:1 Not tainted 4.20.0-rc2+ #44 [ 214.398983] Workqueue: events nf_tables_trans_destroy_work [nf_tables] [ 214.398983] RIP: 0010:nf_tables_chain_destroy.isra.38+0x2f9/0x3a0 [nf_tables] [ 214.398983] Code: 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 8e 00 00 00 48 8b 7b 58 e8 e1 2c 4e c6 48 89 df e8 d9 2c 4e c6 eb 9a <0f> 0b eb 96 0f 0b e9 7e fe ff ff e8 a7 7e 4e c6 e9 a4 fe ff ff e8 [ 214.398983] RSP: 0018:ffff8881152874e8 EFLAGS: 00010202 [ 214.398983] RAX: 0000000000000001 RBX: ffff88810ef9fc28 RCX: ffff8881152876f0 [ 214.398983] RDX: dffffc0000000000 RSI: 1ffff11022a50ede RDI: ffff88810ef9fc78 [ 214.398983] RBP: 1ffff11022a50e9d R08: 0000000080000000 R09: 0000000000000000 [ 214.398983] R10: 0000000000000000 R11: 0000000000000000 R12: 1ffff11022a50eba [ 214.398983] R13: ffff888114446e08 R14: ffff8881152876f0 R15: ffffed1022a50ed6 [ 214.398983] FS: 0000000000000000(0000) GS:ffff888116400000(0000) knlGS:0000000000000000 [ 214.398983] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 214.398983] CR2: 00007fab9bb5f868 CR3: 000000012aa16000 CR4: 00000000001006e0 [ 214.398983] Call Trace: [ 214.398983] ? nf_tables_table_destroy.isra.37+0x100/0x100 [nf_tables] [ 214.398983] ? __kasan_slab_free+0x145/0x180 [ 214.398983] ? nf_tables_trans_destroy_work+0x439/0x830 [nf_tables] [ 214.398983] ? kfree+0xdb/0x280 [ 214.398983] nf_tables_trans_destroy_work+0x5f5/0x830 [nf_tables] [ ... ] Fixes: bb7b40aecbf7 ("netfilter: nf_tables: bogus EBUSY in chain deletions") Reported by: Christoph Anton Mitterer Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=914505 Link: https://bugzilla.kernel.org/show_bug.cgi?id=201791 Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ea1e57daf50ed..623ec29ade26b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2400,21 +2400,14 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, } if (nlh->nlmsg_flags & NLM_F_REPLACE) { - if (!nft_is_active_next(net, old_rule)) { - err = -ENOENT; - goto err2; - } - trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE, - old_rule); + trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule); if (trans == NULL) { err = -ENOMEM; goto err2; } - nft_deactivate_next(net, old_rule); - chain->use--; - - if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) { - err = -ENOMEM; + err = nft_delrule(&ctx, old_rule); + if (err < 0) { + nft_trans_destroy(trans); goto err2; } -- GitLab From 7b99a0d73be5fdbbdb0a0eef7de211f6359fc210 Mon Sep 17 00:00:00 2001 From: Kiran Kumar Modukuri Date: Mon, 24 Sep 2018 12:02:39 +1000 Subject: [PATCH 2159/2269] cachefiles: Fix page leak in cachefiles_read_backing_file while vmscan is active [ Upstream commit 9a24ce5b66f9c8190d63b15f4473600db4935f1f ] [Description] In a heavily loaded system where the system pagecache is nearing memory limits and fscache is enabled, pages can be leaked by fscache while trying read pages from cachefiles backend. This can happen because two applications can be reading same page from a single mount, two threads can be trying to read the backing page at same time. This results in one of the threads finding that a page for the backing file or netfs file is already in the radix tree. During the error handling cachefiles does not clean up the reference on backing page, leading to page leak. [Fix] The fix is straightforward, to decrement the reference when error is encountered. [dhowells: Note that I've removed the clearance and put of newpage as they aren't attested in the commit message and don't appear to actually achieve anything since a new page is only allocated is newpage!=NULL and any residual new page is cleared before returning.] [Testing] I have tested the fix using following method for 12+ hrs. 1) mkdir -p /mnt/nfs ; mount -o vers=3,fsc :/export /mnt/nfs 2) create 10000 files of 2.8MB in a NFS mount. 3) start a thread to simulate heavy VM presssure (while true ; do echo 3 > /proc/sys/vm/drop_caches ; sleep 1 ; done)& 4) start multiple parallel reader for data set at same time find /mnt/nfs -type f | xargs -P 80 cat > /dev/null & find /mnt/nfs -type f | xargs -P 80 cat > /dev/null & find /mnt/nfs -type f | xargs -P 80 cat > /dev/null & .. .. find /mnt/nfs -type f | xargs -P 80 cat > /dev/null & find /mnt/nfs -type f | xargs -P 80 cat > /dev/null & 5) finally check using cat /proc/fs/fscache/stats | grep -i pages ; free -h , cat /proc/meminfo and page-types -r -b lru to ensure all pages are freed. Reviewed-by: Daniel Axtens Signed-off-by: Shantanu Goel Signed-off-by: Kiran Kumar Modukuri [dja: forward ported to current upstream] Signed-off-by: Daniel Axtens Signed-off-by: David Howells Signed-off-by: Sasha Levin --- fs/cachefiles/rdwr.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 199eb396a1bbb..54379cf7db7f1 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -537,7 +537,10 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, netpage->index, cachefiles_gfp); if (ret < 0) { if (ret == -EEXIST) { + put_page(backpage); + backpage = NULL; put_page(netpage); + netpage = NULL; fscache_retrieval_complete(op, 1); continue; } @@ -610,7 +613,10 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, netpage->index, cachefiles_gfp); if (ret < 0) { if (ret == -EEXIST) { + put_page(backpage); + backpage = NULL; put_page(netpage); + netpage = NULL; fscache_retrieval_complete(op, 1); continue; } -- GitLab From 9966f78ab603ee58909f411c8b034cef4d1b76ae Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Tue, 6 Nov 2018 16:27:12 +0800 Subject: [PATCH 2160/2269] igb: fix uninitialized variables [ Upstream commit e4c39f7926b4de355f7df75651d75003806aae09 ] This patch fixes the variable 'phy_word' may be used uninitialized. Signed-off-by: Yunjian Wang Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igb/e1000_i210.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c index 07d48f2e33699..6766081f5ab95 100644 --- a/drivers/net/ethernet/intel/igb/e1000_i210.c +++ b/drivers/net/ethernet/intel/igb/e1000_i210.c @@ -862,6 +862,7 @@ s32 igb_pll_workaround_i210(struct e1000_hw *hw) nvm_word = E1000_INVM_DEFAULT_AL; tmp_nvm = nvm_word | E1000_INVM_PLL_WO_VAL; igb_write_phy_reg_82580(hw, I347AT4_PAGE_SELECT, E1000_PHY_PLL_FREQ_PAGE); + phy_word = E1000_PHY_PLL_UNCONF; for (i = 0; i < E1000_MAX_PLL_TRIES; i++) { /* check current state directly from internal PHY */ igb_read_phy_reg_82580(hw, E1000_PHY_PLL_FREQ_REG, &phy_word); -- GitLab From 689dee4b72ff02c5e96afee94bac903b7bab5ed0 Mon Sep 17 00:00:00 2001 From: Josh Elsasser Date: Sat, 24 Nov 2018 12:57:33 -0800 Subject: [PATCH 2161/2269] ixgbe: recognize 1000BaseLX SFP modules as 1Gbps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a8bf879af7b1999eba36303ce9cc60e0e7dd816c ] Add the two 1000BaseLX enum values to the X550's check for 1Gbps modules, allowing the core driver code to establish a link over this SFP type. This is done by the out-of-tree driver but the fix wasn't in mainline. Fixes: e23f33367882 ("ixgbe: Fix 1G and 10G link stability for X550EM_x SFP+”) Fixes: 6a14ee0cfb19 ("ixgbe: Add X550 support function pointers") Signed-off-by: Josh Elsasser Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index cf6a245db6d50..a37c951b07530 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -2257,7 +2257,9 @@ static s32 ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw, *autoneg = false; if (hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 || - hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1) { + hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1) { *speed = IXGBE_LINK_SPEED_1GB_FULL; return 0; } -- GitLab From 8a69edb8f7db6908910334399a9829f98163252e Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 28 Nov 2018 15:30:24 +0800 Subject: [PATCH 2162/2269] net: hisilicon: remove unexpected free_netdev [ Upstream commit c758940158bf29fe14e9d0f89d5848f227b48134 ] The net device ndev is freed via free_netdev when failing to register the device. The control flow then jumps to the error handling code block. ndev is used and freed again. Resulting in a use-after-free bug. Signed-off-by: Pan Bian Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hip04_eth.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 0cec06bec63ee..c27054b8ce81b 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -914,10 +914,8 @@ static int hip04_mac_probe(struct platform_device *pdev) } ret = register_netdev(ndev); - if (ret) { - free_netdev(ndev); + if (ret) goto alloc_fail; - } return 0; -- GitLab From cc92ade5f498bb56f2316588c954d7371849b27b Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Thu, 22 Nov 2018 11:45:24 -0500 Subject: [PATCH 2163/2269] drm/amdgpu: Add delay after enable RLC ucode [ Upstream commit ad97d9de45835b6a0f71983b0ae0cffd7306730a ] Driver shouldn't try to access any GFX registers until RLC is idle. During the test, it took 12 seconds for RLC to clear the BUSY bit in RLC_GPM_STAT register which is un-acceptable for driver. As per RLC engineer, it would take RLC Ucode less than 10,000 GFXCLK cycles to finish its critical section. In a lowest 300M enginer clock setting(default from vbios), 50 us delay is enough. This commit fix the hang when RLC introduce the work around for XGMI which requires more cycles to setup more registers than normal Signed-off-by: shaoyunl Acked-by: Felix Kuehling Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 3981915e2311f..b2eecfc9042e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1992,12 +1992,13 @@ static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) #endif WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); + udelay(50); /* carrizo do enable cp interrupt after cp inited */ - if (!(adev->flags & AMD_IS_APU)) + if (!(adev->flags & AMD_IS_APU)) { gfx_v9_0_enable_gui_idle_interrupt(adev, true); - - udelay(50); + udelay(50); + } #ifdef AMDGPU_RLC_DEBUG_RETRY /* RLC_GPM_GENERAL_6 : RLC Ucode version */ -- GitLab From 6ee9b4deecb18d75a533821864beb6fe598be4fa Mon Sep 17 00:00:00 2001 From: "Y.C. Chen" Date: Thu, 22 Nov 2018 11:56:28 +0800 Subject: [PATCH 2164/2269] drm/ast: fixed reading monitor EDID not stable issue [ Upstream commit 300625620314194d9e6d4f6dda71f2dc9cf62d9f ] v1: over-sample data to increase the stability with some specific monitors v2: refine to avoid infinite loop v3: remove un-necessary "volatile" declaration [airlied: fix two checkpatch warnings] Signed-off-by: Y.C. Chen Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/1542858988-1127-1-git-send-email-yc_chen@aspeedtech.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/ast/ast_mode.c | 36 ++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index fae1176b24722..343867b182dd8 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -973,9 +973,21 @@ static int get_clock(void *i2c_priv) { struct ast_i2c_chan *i2c = i2c_priv; struct ast_private *ast = i2c->dev->dev_private; - uint32_t val; + uint32_t val, val2, count, pass; + + count = 0; + pass = 0; + val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4) & 0x01; + do { + val2 = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4) & 0x01; + if (val == val2) { + pass++; + } else { + pass = 0; + val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4) & 0x01; + } + } while ((pass < 5) && (count++ < 0x10000)); - val = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4; return val & 1 ? 1 : 0; } @@ -983,9 +995,21 @@ static int get_data(void *i2c_priv) { struct ast_i2c_chan *i2c = i2c_priv; struct ast_private *ast = i2c->dev->dev_private; - uint32_t val; + uint32_t val, val2, count, pass; + + count = 0; + pass = 0; + val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5) & 0x01; + do { + val2 = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5) & 0x01; + if (val == val2) { + pass++; + } else { + pass = 0; + val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5) & 0x01; + } + } while ((pass < 5) && (count++ < 0x10000)); - val = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5; return val & 1 ? 1 : 0; } @@ -998,7 +1022,7 @@ static void set_clock(void *i2c_priv, int clock) for (i = 0; i < 0x10000; i++) { ujcrb7 = ((clock & 0x01) ? 0 : 1); - ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xfe, ujcrb7); + ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xf4, ujcrb7); jtemp = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x01); if (ujcrb7 == jtemp) break; @@ -1014,7 +1038,7 @@ static void set_data(void *i2c_priv, int data) for (i = 0; i < 0x10000; i++) { ujcrb7 = ((data & 0x01) ? 0 : 1) << 2; - ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xfb, ujcrb7); + ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xf1, ujcrb7); jtemp = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x04); if (ujcrb7 == jtemp) break; -- GitLab From f02e0d5fb6695f69c51c38ef5567a5940791521f Mon Sep 17 00:00:00 2001 From: Srikanth Boddepalli Date: Tue, 27 Nov 2018 19:53:27 +0530 Subject: [PATCH 2165/2269] xen: xlate_mmu: add missing header to fix 'W=1' warning [ Upstream commit 72791ac854fea36034fa7976b748fde585008e78 ] Add a missing header otherwise compiler warns about missed prototype: drivers/xen/xlate_mmu.c:183:5: warning: no previous prototype for 'xen_xlate_unmap_gfn_range?' [-Wmissing-prototypes] int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma, ^~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Srikanth Boddepalli Reviewed-by: Boris Ostrovsky Reviewed-by: Joey Pabalinas Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/xen/xlate_mmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c index 23f1387b3ef79..e7df65d32c918 100644 --- a/drivers/xen/xlate_mmu.c +++ b/drivers/xen/xlate_mmu.c @@ -36,6 +36,7 @@ #include #include +#include #include #include #include -- GitLab From b9c242b4fad864505f679db31531903d1b3779e9 Mon Sep 17 00:00:00 2001 From: Igor Druzhinin Date: Tue, 27 Nov 2018 20:58:21 +0000 Subject: [PATCH 2166/2269] Revert "xen/balloon: Mark unallocated host memory as UNUSABLE" [ Upstream commit 123664101aa2156d05251704fc63f9bcbf77741a ] This reverts commit b3cf8528bb21febb650a7ecbf080d0647be40b9f. That commit unintentionally broke Xen balloon memory hotplug with "hotplug_unpopulated" set to 1. As long as "System RAM" resource got assigned under a new "Unusable memory" resource in IO/Mem tree any attempt to online this memory would fail due to general kernel restrictions on having "System RAM" resources as 1st level only. The original issue that commit has tried to workaround fa564ad96366 ("x86/PCI: Enable a 64bit BAR on AMD Family 15h (Models 00-1f, 30-3f, 60-7f)") also got amended by the following 03a551734 ("x86/PCI: Move and shrink AMD 64-bit window to avoid conflict") which made the original fix to Xen ballooning unnecessary. Signed-off-by: Igor Druzhinin Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- arch/x86/xen/enlighten.c | 78 ---------------------------------------- arch/x86/xen/setup.c | 6 ++-- drivers/xen/balloon.c | 65 +++++---------------------------- include/xen/balloon.h | 5 --- 4 files changed, 13 insertions(+), 141 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index df208af3cd749..515d5e4414c29 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -7,7 +7,6 @@ #include #include -#include #include #include @@ -336,80 +335,3 @@ void xen_arch_unregister_cpu(int num) } EXPORT_SYMBOL(xen_arch_unregister_cpu); #endif - -#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG -void __init arch_xen_balloon_init(struct resource *hostmem_resource) -{ - struct xen_memory_map memmap; - int rc; - unsigned int i, last_guest_ram; - phys_addr_t max_addr = PFN_PHYS(max_pfn); - struct e820_table *xen_e820_table; - const struct e820_entry *entry; - struct resource *res; - - if (!xen_initial_domain()) - return; - - xen_e820_table = kmalloc(sizeof(*xen_e820_table), GFP_KERNEL); - if (!xen_e820_table) - return; - - memmap.nr_entries = ARRAY_SIZE(xen_e820_table->entries); - set_xen_guest_handle(memmap.buffer, xen_e820_table->entries); - rc = HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap); - if (rc) { - pr_warn("%s: Can't read host e820 (%d)\n", __func__, rc); - goto out; - } - - last_guest_ram = 0; - for (i = 0; i < memmap.nr_entries; i++) { - if (xen_e820_table->entries[i].addr >= max_addr) - break; - if (xen_e820_table->entries[i].type == E820_TYPE_RAM) - last_guest_ram = i; - } - - entry = &xen_e820_table->entries[last_guest_ram]; - if (max_addr >= entry->addr + entry->size) - goto out; /* No unallocated host RAM. */ - - hostmem_resource->start = max_addr; - hostmem_resource->end = entry->addr + entry->size; - - /* - * Mark non-RAM regions between the end of dom0 RAM and end of host RAM - * as unavailable. The rest of that region can be used for hotplug-based - * ballooning. - */ - for (; i < memmap.nr_entries; i++) { - entry = &xen_e820_table->entries[i]; - - if (entry->type == E820_TYPE_RAM) - continue; - - if (entry->addr >= hostmem_resource->end) - break; - - res = kzalloc(sizeof(*res), GFP_KERNEL); - if (!res) - goto out; - - res->name = "Unavailable host RAM"; - res->start = entry->addr; - res->end = (entry->addr + entry->size < hostmem_resource->end) ? - entry->addr + entry->size : hostmem_resource->end; - rc = insert_resource(hostmem_resource, res); - if (rc) { - pr_warn("%s: Can't insert [%llx - %llx) (%d)\n", - __func__, res->start, res->end, rc); - kfree(res); - goto out; - } - } - - out: - kfree(xen_e820_table); -} -#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */ diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 6e0d2086eacbf..c114ca767b3b8 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -808,6 +808,7 @@ char * __init xen_memory_setup(void) addr = xen_e820_table.entries[0].addr; size = xen_e820_table.entries[0].size; while (i < xen_e820_table.nr_entries) { + bool discard = false; chunk_size = size; type = xen_e820_table.entries[i].type; @@ -823,10 +824,11 @@ char * __init xen_memory_setup(void) xen_add_extra_mem(pfn_s, n_pfns); xen_max_p2m_pfn = pfn_s + n_pfns; } else - type = E820_TYPE_UNUSABLE; + discard = true; } - xen_align_and_add_e820_region(addr, chunk_size, type); + if (!discard) + xen_align_and_add_e820_region(addr, chunk_size, type); addr += chunk_size; size -= chunk_size; diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 065f0b6073734..f77e499afdddb 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -257,25 +257,10 @@ static void release_memory_resource(struct resource *resource) kfree(resource); } -/* - * Host memory not allocated to dom0. We can use this range for hotplug-based - * ballooning. - * - * It's a type-less resource. Setting IORESOURCE_MEM will make resource - * management algorithms (arch_remove_reservations()) look into guest e820, - * which we don't want. - */ -static struct resource hostmem_resource = { - .name = "Host RAM", -}; - -void __attribute__((weak)) __init arch_xen_balloon_init(struct resource *res) -{} - static struct resource *additional_memory_resource(phys_addr_t size) { - struct resource *res, *res_hostmem; - int ret = -ENOMEM; + struct resource *res; + int ret; res = kzalloc(sizeof(*res), GFP_KERNEL); if (!res) @@ -284,42 +269,13 @@ static struct resource *additional_memory_resource(phys_addr_t size) res->name = "System RAM"; res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; - res_hostmem = kzalloc(sizeof(*res), GFP_KERNEL); - if (res_hostmem) { - /* Try to grab a range from hostmem */ - res_hostmem->name = "Host memory"; - ret = allocate_resource(&hostmem_resource, res_hostmem, - size, 0, -1, - PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); - } - - if (!ret) { - /* - * Insert this resource into iomem. Because hostmem_resource - * tracks portion of guest e820 marked as UNUSABLE noone else - * should try to use it. - */ - res->start = res_hostmem->start; - res->end = res_hostmem->end; - ret = insert_resource(&iomem_resource, res); - if (ret < 0) { - pr_err("Can't insert iomem_resource [%llx - %llx]\n", - res->start, res->end); - release_memory_resource(res_hostmem); - res_hostmem = NULL; - res->start = res->end = 0; - } - } - - if (ret) { - ret = allocate_resource(&iomem_resource, res, - size, 0, -1, - PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); - if (ret < 0) { - pr_err("Cannot allocate new System RAM resource\n"); - kfree(res); - return NULL; - } + ret = allocate_resource(&iomem_resource, res, + size, 0, -1, + PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); + if (ret < 0) { + pr_err("Cannot allocate new System RAM resource\n"); + kfree(res); + return NULL; } #ifdef CONFIG_SPARSEMEM @@ -331,7 +287,6 @@ static struct resource *additional_memory_resource(phys_addr_t size) pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n", pfn, limit); release_memory_resource(res); - release_memory_resource(res_hostmem); return NULL; } } @@ -810,8 +765,6 @@ static int __init balloon_init(void) set_online_page_callback(&xen_online_page); register_memory_notifier(&xen_memory_nb); register_sysctl_table(xen_root); - - arch_xen_balloon_init(&hostmem_resource); #endif #ifdef CONFIG_XEN_PV diff --git a/include/xen/balloon.h b/include/xen/balloon.h index 61f410fd74e4c..4914b93a23f2b 100644 --- a/include/xen/balloon.h +++ b/include/xen/balloon.h @@ -44,8 +44,3 @@ static inline void xen_balloon_init(void) { } #endif - -#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG -struct resource; -void arch_xen_balloon_init(struct resource *hostmem_resource); -#endif -- GitLab From b718d6bede4bd6c8c6faac1369567f3d7587d4c1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 1 Nov 2018 16:17:22 -0700 Subject: [PATCH 2167/2269] pstore/ram: Correctly calculate usable PRZ bytes [ Upstream commit 89d328f637b9904b6d4c9af73c8a608b8dd4d6f8 ] The actual number of bytes stored in a PRZ is smaller than the bytes requested by platform data, since there is a header on each PRZ. Additionally, if ECC is enabled, there are trailing bytes used as well. Normally this mismatch doesn't matter since PRZs are circular buffers and the leading "overflow" bytes are just thrown away. However, in the case of a compressed record, this rather badly corrupts the results. This corruption was visible with "ramoops.mem_size=204800 ramoops.ecc=1". Any stored crashes would not be uncompressable (producing a pstorefs "dmesg-*.enc.z" file), and triggering errors at boot: [ 2.790759] pstore: crypto_comp_decompress failed, ret = -22! Backporting this depends on commit 70ad35db3321 ("pstore: Convert console write to use ->write_buf") Reported-by: Joel Fernandes Fixes: b0aad7a99c1d ("pstore: Add compression support to pstore") Signed-off-by: Kees Cook Reviewed-by: Joel Fernandes (Google) Signed-off-by: Sasha Levin --- fs/pstore/ram.c | 15 ++++++--------- include/linux/pstore.h | 5 ++++- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 7125b398d312e..9f7e546d7050b 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -804,17 +804,14 @@ static int ramoops_probe(struct platform_device *pdev) cxt->pstore.data = cxt; /* - * Console can handle any buffer size, so prefer LOG_LINE_MAX. If we - * have to handle dumps, we must have at least record_size buffer. And - * for ftrace, bufsize is irrelevant (if bufsize is 0, buf will be - * ZERO_SIZE_PTR). + * Since bufsize is only used for dmesg crash dumps, it + * must match the size of the dprz record (after PRZ header + * and ECC bytes have been accounted for). */ - if (cxt->console_size) - cxt->pstore.bufsize = 1024; /* LOG_LINE_MAX */ - cxt->pstore.bufsize = max(cxt->record_size, cxt->pstore.bufsize); - cxt->pstore.buf = kmalloc(cxt->pstore.bufsize, GFP_KERNEL); + cxt->pstore.bufsize = cxt->dprzs[0]->buffer_size; + cxt->pstore.buf = kzalloc(cxt->pstore.bufsize, GFP_KERNEL); if (!cxt->pstore.buf) { - pr_err("cannot allocate pstore buffer\n"); + pr_err("cannot allocate pstore crash dump buffer\n"); err = -ENOMEM; goto fail_clear; } diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 61f806a7fe29e..170bb981d2fd4 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -90,7 +90,10 @@ struct pstore_record { * * @buf_lock: spinlock to serialize access to @buf * @buf: preallocated crash dump buffer - * @bufsize: size of @buf available for crash dump writes + * @bufsize: size of @buf available for crash dump bytes (must match + * smallest number of bytes available for writing to a + * backend entry, since compressed bytes don't take kindly + * to being truncated) * * @read_mutex: serializes @open, @read, @close, and @erase callbacks * @flags: bitfield of frontends the backend can accept writes for -- GitLab From 38026d1afcc1153aa6efebaeac7fc2a31c2779d0 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 26 Oct 2018 17:16:29 +1100 Subject: [PATCH 2168/2269] fscache: fix race between enablement and dropping of object [ Upstream commit c5a94f434c82529afda290df3235e4d85873c5b4 ] It was observed that a process blocked indefintely in __fscache_read_or_alloc_page(), waiting for FSCACHE_COOKIE_LOOKING_UP to be cleared via fscache_wait_for_deferred_lookup(). At this time, ->backing_objects was empty, which would normaly prevent __fscache_read_or_alloc_page() from getting to the point of waiting. This implies that ->backing_objects was cleared *after* __fscache_read_or_alloc_page was was entered. When an object is "killed" and then "dropped", FSCACHE_COOKIE_LOOKING_UP is cleared in fscache_lookup_failure(), then KILL_OBJECT and DROP_OBJECT are "called" and only in DROP_OBJECT is ->backing_objects cleared. This leaves a window where something else can set FSCACHE_COOKIE_LOOKING_UP and __fscache_read_or_alloc_page() can start waiting, before ->backing_objects is cleared There is some uncertainty in this analysis, but it seems to be fit the observations. Adding the wake in this patch will be handled correctly by __fscache_read_or_alloc_page(), as it checks if ->backing_objects is empty again, after waiting. Customer which reported the hang, also report that the hang cannot be reproduced with this fix. The backtrace for the blocked process looked like: PID: 29360 TASK: ffff881ff2ac0f80 CPU: 3 COMMAND: "zsh" #0 [ffff881ff43efbf8] schedule at ffffffff815e56f1 #1 [ffff881ff43efc58] bit_wait at ffffffff815e64ed #2 [ffff881ff43efc68] __wait_on_bit at ffffffff815e61b8 #3 [ffff881ff43efca0] out_of_line_wait_on_bit at ffffffff815e625e #4 [ffff881ff43efd08] fscache_wait_for_deferred_lookup at ffffffffa04f2e8f [fscache] #5 [ffff881ff43efd18] __fscache_read_or_alloc_page at ffffffffa04f2ffe [fscache] #6 [ffff881ff43efd58] __nfs_readpage_from_fscache at ffffffffa0679668 [nfs] #7 [ffff881ff43efd78] nfs_readpage at ffffffffa067092b [nfs] #8 [ffff881ff43efda0] generic_file_read_iter at ffffffff81187a73 #9 [ffff881ff43efe50] nfs_file_read at ffffffffa066544b [nfs] #10 [ffff881ff43efe70] __vfs_read at ffffffff811fc756 #11 [ffff881ff43efee8] vfs_read at ffffffff811fccfa #12 [ffff881ff43eff18] sys_read at ffffffff811fda62 #13 [ffff881ff43eff50] entry_SYSCALL_64_fastpath at ffffffff815e986e Signed-off-by: NeilBrown Signed-off-by: David Howells Signed-off-by: Sasha Levin --- fs/fscache/object.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 7a182c87f3780..ab1d7f35f6c2b 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -715,6 +715,9 @@ static const struct fscache_state *fscache_drop_object(struct fscache_object *ob if (awaken) wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING); + if (test_and_clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags)) + wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP); + /* Prevent a race with our last child, which has to signal EV_CLEARED * before dropping our spinlock. -- GitLab From 0859bb2526fb3725d2b53142e67be38e12fca72b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 17 Jul 2018 09:53:42 +0100 Subject: [PATCH 2169/2269] fscache, cachefiles: remove redundant variable 'cache' [ Upstream commit 31ffa563833576bd49a8bf53120568312755e6e2 ] Variable 'cache' is being assigned but is never used hence it is redundant and can be removed. Cleans up clang warning: warning: variable 'cache' set but not used [-Wunused-but-set-variable] Signed-off-by: Colin Ian King Signed-off-by: David Howells Signed-off-by: Sasha Levin --- fs/cachefiles/rdwr.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 54379cf7db7f1..5e9176ec0d3ae 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -969,11 +969,8 @@ error: void cachefiles_uncache_page(struct fscache_object *_object, struct page *page) { struct cachefiles_object *object; - struct cachefiles_cache *cache; object = container_of(_object, struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); _enter("%p,{%lu}", object, page->index); -- GitLab From 9407fd14d0ee929ea21571055ccf4361b6e2323a Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 21 Nov 2018 15:17:37 -0800 Subject: [PATCH 2170/2269] nvme: flush namespace scanning work just before removing namespaces [ Upstream commit f6c8e432cb0479255322c5d0335b9f1699a0270c ] nvme_stop_ctrl can be called also for reset flow and there is no need to flush the scan_work as namespaces are not being removed. This can cause deadlock in rdma, fc and loop drivers since nvme_stop_ctrl barriers before controller teardown (and specifically I/O cancellation of the scan_work itself) takes place, but the scan_work will be blocked anyways so there is no need to flush it. Instead, move scan_work flush to nvme_remove_namespaces() where it really needs to flush. Reported-by: Ming Lei Signed-off-by: Sagi Grimberg Reviewed-by: Keith Busch Reviewed by: James Smart Tested-by: Ewan D. Milne Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3a63d58d2ca9f..65f3f1a34b6b5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2572,6 +2572,9 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) { struct nvme_ns *ns, *next; + /* prevent racing with ns scanning */ + flush_work(&ctrl->scan_work); + /* * The dead states indicates the controller was not gracefully * disconnected. In that case, we won't be able to flush any data while @@ -2743,7 +2746,6 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) { nvme_stop_keep_alive(ctrl); flush_work(&ctrl->async_event_work); - flush_work(&ctrl->scan_work); cancel_work_sync(&ctrl->fw_act_work); } EXPORT_SYMBOL_GPL(nvme_stop_ctrl); -- GitLab From 8e3f3c06be02aaf0b09f11eafc3e2f567bdbd6d2 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 29 Nov 2018 09:55:59 +0000 Subject: [PATCH 2171/2269] ACPI/IORT: Fix iort_get_platform_device_domain() uninitialized pointer value [ Upstream commit ea2412dc21cc790335d319181dddc43682aef164 ] Running the Clang static analyzer on IORT code detected the following error: Logic error: Branch condition evaluates to a garbage value in iort_get_platform_device_domain() If the named component associated with a given device has no IORT mappings, iort_get_platform_device_domain() exits its MSI mapping loop with msi_parent pointer containing garbage, which can lead to erroneous code path execution. Initialize the msi_parent pointer, fixing the bug. Fixes: d4f54a186667 ("ACPI: platform: setup MSI domain for ACPI based platform device") Reported-by: Patrick Bellasi Reviewed-by: Hanjun Guo Acked-by: Will Deacon Cc: Sudeep Holla Cc: "Rafael J. Wysocki" Signed-off-by: Lorenzo Pieralisi Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- drivers/acpi/arm64/iort.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index de56394dd161f..ca414910710ea 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -547,7 +547,7 @@ struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id) */ static struct irq_domain *iort_get_platform_device_domain(struct device *dev) { - struct acpi_iort_node *node, *msi_parent; + struct acpi_iort_node *node, *msi_parent = NULL; struct fwnode_handle *iort_fwnode; struct acpi_iort_its_group *its; int i; -- GitLab From df2055c0eb8af72790060d6797cac9be56622412 Mon Sep 17 00:00:00 2001 From: Larry Chen Date: Fri, 30 Nov 2018 14:08:56 -0800 Subject: [PATCH 2172/2269] ocfs2: fix deadlock caused by ocfs2_defrag_extent() [ Upstream commit e21e57445a64598b29a6f629688f9b9a39e7242a ] ocfs2_defrag_extent may fall into deadlock. ocfs2_ioctl_move_extents ocfs2_ioctl_move_extents ocfs2_move_extents ocfs2_defrag_extent ocfs2_lock_allocators_move_extents ocfs2_reserve_clusters inode_lock GLOBAL_BITMAP_SYSTEM_INODE __ocfs2_flush_truncate_log inode_lock GLOBAL_BITMAP_SYSTEM_INODE As backtrace shows above, ocfs2_reserve_clusters() will call inode_lock against the global bitmap if local allocator has not sufficient cluters. Once global bitmap could meet the demand, ocfs2_reserve_cluster will return success with global bitmap locked. After ocfs2_reserve_cluster(), if truncate log is full, __ocfs2_flush_truncate_log() will definitely fall into deadlock because it needs to inode_lock global bitmap, which has already been locked. To fix this bug, we could remove from ocfs2_lock_allocators_move_extents() the code which intends to lock global allocator, and put the removed code after __ocfs2_flush_truncate_log(). ocfs2_lock_allocators_move_extents() is referred by 2 places, one is here, the other does not need the data allocator context, which means this patch does not affect the caller so far. Link: http://lkml.kernel.org/r/20181101071422.14470-1-lchen@suse.com Signed-off-by: Larry Chen Reviewed-by: Changwei Ge Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/ocfs2/move_extents.c | 47 +++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 7eb3b0a6347ef..f55f82ca34250 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -156,18 +156,14 @@ out: } /* - * lock allocators, and reserving appropriate number of bits for - * meta blocks and data clusters. - * - * in some cases, we don't need to reserve clusters, just let data_ac - * be NULL. + * lock allocator, and reserve appropriate number of bits for + * meta blocks. */ -static int ocfs2_lock_allocators_move_extents(struct inode *inode, +static int ocfs2_lock_meta_allocator_move_extents(struct inode *inode, struct ocfs2_extent_tree *et, u32 clusters_to_move, u32 extents_to_split, struct ocfs2_alloc_context **meta_ac, - struct ocfs2_alloc_context **data_ac, int extra_blocks, int *credits) { @@ -192,13 +188,6 @@ static int ocfs2_lock_allocators_move_extents(struct inode *inode, goto out; } - if (data_ac) { - ret = ocfs2_reserve_clusters(osb, clusters_to_move, data_ac); - if (ret) { - mlog_errno(ret); - goto out; - } - } *credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el); @@ -257,10 +246,10 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, } } - ret = ocfs2_lock_allocators_move_extents(inode, &context->et, *len, 1, - &context->meta_ac, - &context->data_ac, - extra_blocks, &credits); + ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et, + *len, 1, + &context->meta_ac, + extra_blocks, &credits); if (ret) { mlog_errno(ret); goto out; @@ -283,6 +272,21 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, } } + /* + * Make sure ocfs2_reserve_cluster is called after + * __ocfs2_flush_truncate_log, otherwise, dead lock may happen. + * + * If ocfs2_reserve_cluster is called + * before __ocfs2_flush_truncate_log, dead lock on global bitmap + * may happen. + * + */ + ret = ocfs2_reserve_clusters(osb, *len, &context->data_ac); + if (ret) { + mlog_errno(ret); + goto out_unlock_mutex; + } + handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); @@ -600,9 +604,10 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, } } - ret = ocfs2_lock_allocators_move_extents(inode, &context->et, len, 1, - &context->meta_ac, - NULL, extra_blocks, &credits); + ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et, + len, 1, + &context->meta_ac, + extra_blocks, &credits); if (ret) { mlog_errno(ret); goto out; -- GitLab From c7aafad098b066ceee0d6a3b2ba63573ef09dc38 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 30 Nov 2018 14:09:07 -0800 Subject: [PATCH 2173/2269] mm/page_alloc.c: fix calculation of pgdat->nr_zones [ Upstream commit 8f416836c0d50b198cad1225132e5abebf8980dc ] init_currently_empty_zone() will adjust pgdat->nr_zones and set it to 'zone_idx(zone) + 1' unconditionally. This is correct in the normal case, while not exact in hot-plug situation. This function is used in two places: * free_area_init_core() * move_pfn_range_to_zone() In the first case, we are sure zone index increase monotonically. While in the second one, this is under users control. One way to reproduce this is: ---------------------------- 1. create a virtual machine with empty node1 -m 4G,slots=32,maxmem=32G \ -smp 4,maxcpus=8 \ -numa node,nodeid=0,mem=4G,cpus=0-3 \ -numa node,nodeid=1,mem=0G,cpus=4-7 2. hot-add cpu 3-7 cpu-add [3-7] 2. hot-add memory to nod1 object_add memory-backend-ram,id=ram0,size=1G device_add pc-dimm,id=dimm0,memdev=ram0,node=1 3. online memory with following order echo online_movable > memory47/state echo online > memory40/state After this, node1 will have its nr_zones equals to (ZONE_NORMAL + 1) instead of (ZONE_MOVABLE + 1). Michal said: "Having an incorrect nr_zones might result in all sorts of problems which would be quite hard to debug (e.g. reclaim not considering the movable zone). I do not expect many users would suffer from this it but still this is trivial and obviously right thing to do so backporting to the stable tree shouldn't be harmful (last famous words)" Link: http://lkml.kernel.org/r/20181117022022.9956-1-richard.weiyang@gmail.com Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") Signed-off-by: Wei Yang Acked-by: Michal Hocko Reviewed-by: Oscar Salvador Cc: Anshuman Khandual Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/page_alloc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6be91a1a00d9f..a2f365f404336 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5544,8 +5544,10 @@ void __meminit init_currently_empty_zone(struct zone *zone, unsigned long size) { struct pglist_data *pgdat = zone->zone_pgdat; + int zone_idx = zone_idx(zone) + 1; - pgdat->nr_zones = zone_idx(zone) + 1; + if (zone_idx > pgdat->nr_zones) + pgdat->nr_zones = zone_idx; zone->zone_start_pfn = zone_start_pfn; -- GitLab From 5ee5fa61d04e2bd8ab004f307bd6ad0808482746 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Fri, 30 Nov 2018 14:09:14 -0800 Subject: [PATCH 2174/2269] hfs: do not free node before using [ Upstream commit ce96a407adef126870b3f4a1b73529dd8aa80f49 ] hfs_bmap_free() frees the node via hfs_bnode_put(node). However, it then reads node->this when dumping error message on an error path, which may result in a use-after-free bug. This patch frees the node only when it is never again used. Link: http://lkml.kernel.org/r/1542963889-128825-1-git-send-email-bianpan2016@163.com Fixes: a1185ffa2fc ("HFS rewrite") Signed-off-by: Pan Bian Reviewed-by: Andrew Morton Cc: Joe Perches Cc: Ernesto A. Fernandez Cc: Viacheslav Dubeyko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/hfs/btree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index 374b5688e29e5..9bdff5e406261 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c @@ -329,13 +329,14 @@ void hfs_bmap_free(struct hfs_bnode *node) nidx -= len * 8; i = node->next; - hfs_bnode_put(node); if (!i) { /* panic */; pr_crit("unable to free bnode %u. bmap not found!\n", node->this); + hfs_bnode_put(node); return; } + hfs_bnode_put(node); node = hfs_bnode_find(tree, i); if (IS_ERR(node)) return; -- GitLab From 95c8714e3fa0bee316600c46fd007fec2750206b Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Fri, 30 Nov 2018 14:09:18 -0800 Subject: [PATCH 2175/2269] hfsplus: do not free node before using [ Upstream commit c7d7d620dcbd2a1c595092280ca943f2fced7bbd ] hfs_bmap_free() frees node via hfs_bnode_put(node). However it then reads node->this when dumping error message on an error path, which may result in a use-after-free bug. This patch frees node only when it is never used. Link: http://lkml.kernel.org/r/1543053441-66942-1-git-send-email-bianpan2016@163.com Signed-off-by: Pan Bian Reviewed-by: Andrew Morton Cc: Ernesto A. Fernandez Cc: Joe Perches Cc: Viacheslav Dubeyko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/hfsplus/btree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index de14b2b6881bb..3de3bc4918b55 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -454,14 +454,15 @@ void hfs_bmap_free(struct hfs_bnode *node) nidx -= len * 8; i = node->next; - hfs_bnode_put(node); if (!i) { /* panic */; pr_crit("unable to free bnode %u. " "bmap not found!\n", node->this); + hfs_bnode_put(node); return; } + hfs_bnode_put(node); node = hfs_bnode_find(tree, i); if (IS_ERR(node)) return; -- GitLab From 225b11374625aa4827fc1ee0b810c27dfe851809 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 30 Nov 2018 14:09:48 -0800 Subject: [PATCH 2176/2269] debugobjects: avoid recursive calls with kmemleak [ Upstream commit 8de456cf87ba863e028c4dd01bae44255ce3d835 ] CONFIG_DEBUG_OBJECTS_RCU_HEAD does not play well with kmemleak due to recursive calls. fill_pool kmemleak_ignore make_black_object put_object __call_rcu (kernel/rcu/tree.c) debug_rcu_head_queue debug_object_activate debug_object_init fill_pool kmemleak_ignore make_black_object ... So add SLAB_NOLEAKTRACE to kmem_cache_create() to not register newly allocated debug objects at all. Link: http://lkml.kernel.org/r/20181126165343.2339-1-cai@gmx.us Signed-off-by: Qian Cai Suggested-by: Catalin Marinas Acked-by: Waiman Long Acked-by: Catalin Marinas Cc: Thomas Gleixner Cc: Yang Shi Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- lib/debugobjects.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 99308479b1c80..bacb00a9cd9f9 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -111,7 +111,6 @@ static void fill_pool(void) if (!new) return; - kmemleak_ignore(new); raw_spin_lock_irqsave(&pool_lock, flags); hlist_add_head(&new->node, &obj_pool); debug_objects_allocated++; @@ -1085,7 +1084,6 @@ static int __init debug_objects_replace_static_objects(void) obj = kmem_cache_zalloc(obj_cache, GFP_KERNEL); if (!obj) goto free; - kmemleak_ignore(obj); hlist_add_head(&obj->node, &objects); } @@ -1141,7 +1139,8 @@ void __init debug_objects_mem_init(void) obj_cache = kmem_cache_create("debug_objects_cache", sizeof (struct debug_obj), 0, - SLAB_DEBUG_OBJECTS, NULL); + SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE, + NULL); if (!obj_cache || debug_objects_replace_static_objects()) { debug_objects_enabled = 0; -- GitLab From 56926f913bf8284f0d33d79cdf0421f0caec8a1f Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Fri, 30 Nov 2018 14:10:54 -0800 Subject: [PATCH 2177/2269] ocfs2: fix potential use after free [ Upstream commit 164f7e586739d07eb56af6f6d66acebb11f315c8 ] ocfs2_get_dentry() calls iput(inode) to drop the reference count of inode, and if the reference count hits 0, inode is freed. However, in this function, it then reads inode->i_generation, which may result in a use after free bug. Move the put operation later. Link: http://lkml.kernel.org/r/1543109237-110227-1-git-send-email-bianpan2016@163.com Fixes: 781f200cb7a("ocfs2: Remove masklog ML_EXPORT.") Signed-off-by: Pan Bian Reviewed-by: Andrew Morton Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Changwei Ge Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/ocfs2/export.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 9f88188060db9..4bf8d5854b271 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -125,10 +125,10 @@ check_err: check_gen: if (handle->ih_generation != inode->i_generation) { - iput(inode); trace_ocfs2_get_dentry_generation((unsigned long long)blkno, handle->ih_generation, inode->i_generation); + iput(inode); result = ERR_PTR(-ESTALE); goto bail; } -- GitLab From 62582f67ff7617313b27b897ed02b569b712dcac Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 13 Dec 2018 09:24:35 -0500 Subject: [PATCH 2178/2269] Revert "printk: Never set console_may_schedule in console_trylock()" This reverts commit c9b8d580b3fb0ab65d37c372aef19a318fda3199. This is just a technical revert to make the printk fix apply cleanly, this patch will be re-picked in about 3 commits. --- kernel/printk/printk.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index a9cf2e15f6a37..7161312593ddc 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1762,12 +1762,6 @@ asmlinkage int vprintk_emit(int facility, int level, /* If called from the scheduler, we can not call up(). */ if (!in_sched) { - /* - * Disable preemption to avoid being preempted while holding - * console_sem which would prevent anyone from printing to - * console - */ - preempt_disable(); /* * Try to acquire and then immediately release the console * semaphore. The release will print out buffers and wake up @@ -1775,7 +1769,6 @@ asmlinkage int vprintk_emit(int facility, int level, */ if (console_trylock()) console_unlock(); - preempt_enable(); } return printed_len; @@ -2090,7 +2083,20 @@ int console_trylock(void) return 0; } console_locked = 1; - console_may_schedule = 0; + /* + * When PREEMPT_COUNT disabled we can't reliably detect if it's + * safe to schedule (e.g. calling printk while holding a spin_lock), + * because preempt_disable()/preempt_enable() are just barriers there + * and preempt_count() is always 0. + * + * RCU read sections have a separate preemption counter when + * PREEMPT_RCU enabled thus we must take extra care and check + * rcu_preempt_depth(), otherwise RCU read sections modify + * preempt_count(). + */ + console_may_schedule = !oops_in_progress && + preemptible() && + !rcu_preempt_depth(); return 1; } EXPORT_SYMBOL(console_trylock); -- GitLab From 594231141b003939444a1202a02992b3d0adb23e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 10 Jan 2018 14:24:17 +0100 Subject: [PATCH 2179/2269] printk: Add console owner and waiter logic to load balance console writes [ Upstream commit dbdda842fe96f8932bae554f0adf463c27c42bc7 ] This patch implements what I discussed in Kernel Summit. I added lockdep annotation (hopefully correctly), and it hasn't had any splats (since I fixed some bugs in the first iterations). It did catch problems when I had the owner covering too much. But now that the owner is only set when actively calling the consoles, lockdep has stayed quiet. Here's the design again: I added a "console_owner" which is set to a task that is actively writing to the consoles. It is *not* the same as the owner of the console_lock. It is only set when doing the calls to the console functions. It is protected by a console_owner_lock which is a raw spin lock. There is a console_waiter. This is set when there is an active console owner that is not current, and waiter is not set. This too is protected by console_owner_lock. In printk() when it tries to write to the consoles, we have: if (console_trylock()) console_unlock(); Now I added an else, which will check if there is an active owner, and no current waiter. If that is the case, then console_waiter is set, and the task goes into a spin until it is no longer set. When the active console owner finishes writing the current message to the consoles, it grabs the console_owner_lock and sees if there is a waiter, and clears console_owner. If there is a waiter, then it breaks out of the loop, clears the waiter flag (because that will release the waiter from its spin), and exits. Note, it does *not* release the console semaphore. Because it is a semaphore, there is no owner. Another task may release it. This means that the waiter is guaranteed to be the new console owner! Which it becomes. Then the waiter calls console_unlock() and continues to write to the consoles. If another task comes along and does a printk() it too can become the new waiter, and we wash rinse and repeat! By Petr Mladek about possible new deadlocks: The thing is that we move console_sem only to printk() call that normally calls console_unlock() as well. It means that the transferred owner should not bring new type of dependencies. As Steven said somewhere: "If there is a deadlock, it was there even before." We could look at it from this side. The possible deadlock would look like: CPU0 CPU1 console_unlock() console_owner = current; spin_lockA() printk() spin = true; while (...) call_console_drivers() spin_lockA() This would be a deadlock. CPU0 would wait for the lock A. While CPU1 would own the lockA and would wait for CPU0 to finish calling the console drivers and pass the console_sem owner. But if the above is true than the following scenario was already possible before: CPU0 spin_lockA() printk() console_unlock() call_console_drivers() spin_lockA() By other words, this deadlock was there even before. Such deadlocks are prevented by using printk_deferred() in the sections guarded by the lock A. By Steven Rostedt: To demonstrate the issue, this module has been shown to lock up a system with 4 CPUs and a slow console (like a serial console). It is also able to lock up a 8 CPU system with only a fast (VGA) console, by passing in "loops=100". The changes in this commit prevent this module from locking up the system. #include #include #include #include #include #include static bool stop_testing; static unsigned int loops = 1; static void preempt_printk_workfn(struct work_struct *work) { int i; while (!READ_ONCE(stop_testing)) { for (i = 0; i < loops && !READ_ONCE(stop_testing); i++) { preempt_disable(); pr_emerg("%5d%-75s\n", smp_processor_id(), " XXX NOPREEMPT"); preempt_enable(); } msleep(1); } } static struct work_struct __percpu *works; static void finish(void) { int cpu; WRITE_ONCE(stop_testing, true); for_each_online_cpu(cpu) flush_work(per_cpu_ptr(works, cpu)); free_percpu(works); } static int __init test_init(void) { int cpu; works = alloc_percpu(struct work_struct); if (!works) return -ENOMEM; /* * This is just a test module. This will break if you * do any CPU hot plugging between loading and * unloading the module. */ for_each_online_cpu(cpu) { struct work_struct *work = per_cpu_ptr(works, cpu); INIT_WORK(work, &preempt_printk_workfn); schedule_work_on(cpu, work); } return 0; } static void __exit test_exit(void) { finish(); } module_param(loops, uint, 0); module_init(test_init); module_exit(test_exit); MODULE_LICENSE("GPL"); Link: http://lkml.kernel.org/r/20180110132418.7080-2-pmladek@suse.com Cc: akpm@linux-foundation.org Cc: linux-mm@kvack.org Cc: Cong Wang Cc: Dave Hansen Cc: Johannes Weiner Cc: Mel Gorman Cc: Michal Hocko Cc: Vlastimil Babka Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jan Kara Cc: Mathieu Desnoyers Cc: Tetsuo Handa Cc: Byungchul Park Cc: Tejun Heo Cc: Pavel Machek Cc: linux-kernel@vger.kernel.org Signed-off-by: Steven Rostedt (VMware) [pmladek@suse.com: Commit message about possible deadlocks] Acked-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Signed-off-by: Sasha Levin --- kernel/printk/printk.c | 108 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 107 insertions(+), 1 deletion(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 7161312593ddc..b88b402444d66 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -86,8 +86,15 @@ EXPORT_SYMBOL_GPL(console_drivers); static struct lockdep_map console_lock_dep_map = { .name = "console_lock" }; +static struct lockdep_map console_owner_dep_map = { + .name = "console_owner" +}; #endif +static DEFINE_RAW_SPINLOCK(console_owner_lock); +static struct task_struct *console_owner; +static bool console_waiter; + enum devkmsg_log_bits { __DEVKMSG_LOG_BIT_ON = 0, __DEVKMSG_LOG_BIT_OFF, @@ -1767,8 +1774,56 @@ asmlinkage int vprintk_emit(int facility, int level, * semaphore. The release will print out buffers and wake up * /dev/kmsg and syslog() users. */ - if (console_trylock()) + if (console_trylock()) { console_unlock(); + } else { + struct task_struct *owner = NULL; + bool waiter; + bool spin = false; + + printk_safe_enter_irqsave(flags); + + raw_spin_lock(&console_owner_lock); + owner = READ_ONCE(console_owner); + waiter = READ_ONCE(console_waiter); + if (!waiter && owner && owner != current) { + WRITE_ONCE(console_waiter, true); + spin = true; + } + raw_spin_unlock(&console_owner_lock); + + /* + * If there is an active printk() writing to the + * consoles, instead of having it write our data too, + * see if we can offload that load from the active + * printer, and do some printing ourselves. + * Go into a spin only if there isn't already a waiter + * spinning, and there is an active printer, and + * that active printer isn't us (recursive printk?). + */ + if (spin) { + /* We spin waiting for the owner to release us */ + spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); + /* Owner will clear console_waiter on hand off */ + while (READ_ONCE(console_waiter)) + cpu_relax(); + + spin_release(&console_owner_dep_map, 1, _THIS_IP_); + printk_safe_exit_irqrestore(flags); + + /* + * The owner passed the console lock to us. + * Since we did not spin on console lock, annotate + * this as a trylock. Otherwise lockdep will + * complain. + */ + mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); + console_unlock(); + printk_safe_enter_irqsave(flags); + } + printk_safe_exit_irqrestore(flags); + + } } return printed_len; @@ -2155,6 +2210,7 @@ void console_unlock(void) static u64 seen_seq; unsigned long flags; bool wake_klogd = false; + bool waiter = false; bool do_cond_resched, retry; if (console_suspended) { @@ -2243,14 +2299,64 @@ skip: console_seq++; raw_spin_unlock(&logbuf_lock); + /* + * While actively printing out messages, if another printk() + * were to occur on another CPU, it may wait for this one to + * finish. This task can not be preempted if there is a + * waiter waiting to take over. + */ + raw_spin_lock(&console_owner_lock); + console_owner = current; + raw_spin_unlock(&console_owner_lock); + + /* The waiter may spin on us after setting console_owner */ + spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); + stop_critical_timings(); /* don't trace print latency */ call_console_drivers(ext_text, ext_len, text, len); start_critical_timings(); + + raw_spin_lock(&console_owner_lock); + waiter = READ_ONCE(console_waiter); + console_owner = NULL; + raw_spin_unlock(&console_owner_lock); + + /* + * If there is a waiter waiting for us, then pass the + * rest of the work load over to that waiter. + */ + if (waiter) + break; + + /* There was no waiter, and nothing will spin on us here */ + spin_release(&console_owner_dep_map, 1, _THIS_IP_); + printk_safe_exit_irqrestore(flags); if (do_cond_resched) cond_resched(); } + + /* + * If there is an active waiter waiting on the console_lock. + * Pass off the printing to the waiter, and the waiter + * will continue printing on its CPU, and when all writing + * has finished, the last printer will wake up klogd. + */ + if (waiter) { + WRITE_ONCE(console_waiter, false); + /* The waiter is now free to continue */ + spin_release(&console_owner_dep_map, 1, _THIS_IP_); + /* + * Hand off console_lock to waiter. The waiter will perform + * the up(). After this, the waiter is the console_lock owner. + */ + mutex_release(&console_lock_dep_map, 1, _THIS_IP_); + printk_safe_exit_irqrestore(flags); + /* Note, if waiter is set, logbuf_lock is not held */ + return; + } + console_locked = 0; /* Release the exclusive_console once it is used */ -- GitLab From ef433725f5f054e44c11d40f2d80f35824351697 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 12 Jan 2018 17:08:37 +0100 Subject: [PATCH 2180/2269] printk: Hide console waiter logic into helpers [ Upstream commit c162d5b4338d72deed61aa65ed0f2f4ba2bbc8ab ] The commit ("printk: Add console owner and waiter logic to load balance console writes") made vprintk_emit() and console_unlock() even more complicated. This patch extracts the new code into 3 helper functions. They should help to keep it rather self-contained. It will be easier to use and maintain. This patch just shuffles the existing code. It does not change the functionality. Link: http://lkml.kernel.org/r/20180112160837.GD24497@linux.suse Cc: akpm@linux-foundation.org Cc: linux-mm@kvack.org Cc: Cong Wang Cc: Dave Hansen Cc: Johannes Weiner Cc: Mel Gorman Cc: Michal Hocko Cc: Vlastimil Babka Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jan Kara Cc: Mathieu Desnoyers Cc: Tetsuo Handa Cc: rostedt@home.goodmis.org Cc: Byungchul Park Cc: Tejun Heo Cc: Pavel Machek Cc: linux-kernel@vger.kernel.org Reviewed-by: Steven Rostedt (VMware) Acked-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Signed-off-by: Sasha Levin --- kernel/printk/printk.c | 245 +++++++++++++++++++++++++---------------- 1 file changed, 148 insertions(+), 97 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index b88b402444d66..2d1c2700bd858 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -86,15 +86,8 @@ EXPORT_SYMBOL_GPL(console_drivers); static struct lockdep_map console_lock_dep_map = { .name = "console_lock" }; -static struct lockdep_map console_owner_dep_map = { - .name = "console_owner" -}; #endif -static DEFINE_RAW_SPINLOCK(console_owner_lock); -static struct task_struct *console_owner; -static bool console_waiter; - enum devkmsg_log_bits { __DEVKMSG_LOG_BIT_ON = 0, __DEVKMSG_LOG_BIT_OFF, @@ -1555,6 +1548,146 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) return do_syslog(type, buf, len, SYSLOG_FROM_READER); } +/* + * Special console_lock variants that help to reduce the risk of soft-lockups. + * They allow to pass console_lock to another printk() call using a busy wait. + */ + +#ifdef CONFIG_LOCKDEP +static struct lockdep_map console_owner_dep_map = { + .name = "console_owner" +}; +#endif + +static DEFINE_RAW_SPINLOCK(console_owner_lock); +static struct task_struct *console_owner; +static bool console_waiter; + +/** + * console_lock_spinning_enable - mark beginning of code where another + * thread might safely busy wait + * + * This basically converts console_lock into a spinlock. This marks + * the section where the console_lock owner can not sleep, because + * there may be a waiter spinning (like a spinlock). Also it must be + * ready to hand over the lock at the end of the section. + */ +static void console_lock_spinning_enable(void) +{ + raw_spin_lock(&console_owner_lock); + console_owner = current; + raw_spin_unlock(&console_owner_lock); + + /* The waiter may spin on us after setting console_owner */ + spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); +} + +/** + * console_lock_spinning_disable_and_check - mark end of code where another + * thread was able to busy wait and check if there is a waiter + * + * This is called at the end of the section where spinning is allowed. + * It has two functions. First, it is a signal that it is no longer + * safe to start busy waiting for the lock. Second, it checks if + * there is a busy waiter and passes the lock rights to her. + * + * Important: Callers lose the lock if there was a busy waiter. + * They must not touch items synchronized by console_lock + * in this case. + * + * Return: 1 if the lock rights were passed, 0 otherwise. + */ +static int console_lock_spinning_disable_and_check(void) +{ + int waiter; + + raw_spin_lock(&console_owner_lock); + waiter = READ_ONCE(console_waiter); + console_owner = NULL; + raw_spin_unlock(&console_owner_lock); + + if (!waiter) { + spin_release(&console_owner_dep_map, 1, _THIS_IP_); + return 0; + } + + /* The waiter is now free to continue */ + WRITE_ONCE(console_waiter, false); + + spin_release(&console_owner_dep_map, 1, _THIS_IP_); + + /* + * Hand off console_lock to waiter. The waiter will perform + * the up(). After this, the waiter is the console_lock owner. + */ + mutex_release(&console_lock_dep_map, 1, _THIS_IP_); + return 1; +} + +/** + * console_trylock_spinning - try to get console_lock by busy waiting + * + * This allows to busy wait for the console_lock when the current + * owner is running in specially marked sections. It means that + * the current owner is running and cannot reschedule until it + * is ready to lose the lock. + * + * Return: 1 if we got the lock, 0 othrewise + */ +static int console_trylock_spinning(void) +{ + struct task_struct *owner = NULL; + bool waiter; + bool spin = false; + unsigned long flags; + + if (console_trylock()) + return 1; + + printk_safe_enter_irqsave(flags); + + raw_spin_lock(&console_owner_lock); + owner = READ_ONCE(console_owner); + waiter = READ_ONCE(console_waiter); + if (!waiter && owner && owner != current) { + WRITE_ONCE(console_waiter, true); + spin = true; + } + raw_spin_unlock(&console_owner_lock); + + /* + * If there is an active printk() writing to the + * consoles, instead of having it write our data too, + * see if we can offload that load from the active + * printer, and do some printing ourselves. + * Go into a spin only if there isn't already a waiter + * spinning, and there is an active printer, and + * that active printer isn't us (recursive printk?). + */ + if (!spin) { + printk_safe_exit_irqrestore(flags); + return 0; + } + + /* We spin waiting for the owner to release us */ + spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); + /* Owner will clear console_waiter on hand off */ + while (READ_ONCE(console_waiter)) + cpu_relax(); + spin_release(&console_owner_dep_map, 1, _THIS_IP_); + + printk_safe_exit_irqrestore(flags); + /* + * The owner passed the console lock to us. + * Since we did not spin on console lock, annotate + * this as a trylock. Otherwise lockdep will + * complain. + */ + mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); + + return 1; +} + /* * Call the console drivers, asking them to write out * log_buf[start] to log_buf[end - 1]. @@ -1774,56 +1907,8 @@ asmlinkage int vprintk_emit(int facility, int level, * semaphore. The release will print out buffers and wake up * /dev/kmsg and syslog() users. */ - if (console_trylock()) { + if (console_trylock_spinning()) console_unlock(); - } else { - struct task_struct *owner = NULL; - bool waiter; - bool spin = false; - - printk_safe_enter_irqsave(flags); - - raw_spin_lock(&console_owner_lock); - owner = READ_ONCE(console_owner); - waiter = READ_ONCE(console_waiter); - if (!waiter && owner && owner != current) { - WRITE_ONCE(console_waiter, true); - spin = true; - } - raw_spin_unlock(&console_owner_lock); - - /* - * If there is an active printk() writing to the - * consoles, instead of having it write our data too, - * see if we can offload that load from the active - * printer, and do some printing ourselves. - * Go into a spin only if there isn't already a waiter - * spinning, and there is an active printer, and - * that active printer isn't us (recursive printk?). - */ - if (spin) { - /* We spin waiting for the owner to release us */ - spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); - /* Owner will clear console_waiter on hand off */ - while (READ_ONCE(console_waiter)) - cpu_relax(); - - spin_release(&console_owner_dep_map, 1, _THIS_IP_); - printk_safe_exit_irqrestore(flags); - - /* - * The owner passed the console lock to us. - * Since we did not spin on console lock, annotate - * this as a trylock. Otherwise lockdep will - * complain. - */ - mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); - console_unlock(); - printk_safe_enter_irqsave(flags); - } - printk_safe_exit_irqrestore(flags); - - } } return printed_len; @@ -1924,6 +2009,8 @@ static ssize_t msg_print_ext_header(char *buf, size_t size, static ssize_t msg_print_ext_body(char *buf, size_t size, char *dict, size_t dict_len, char *text, size_t text_len) { return 0; } +static void console_lock_spinning_enable(void) { } +static int console_lock_spinning_disable_and_check(void) { return 0; } static void call_console_drivers(const char *ext_text, size_t ext_len, const char *text, size_t len) {} static size_t msg_print_text(const struct printk_log *msg, @@ -2210,7 +2297,6 @@ void console_unlock(void) static u64 seen_seq; unsigned long flags; bool wake_klogd = false; - bool waiter = false; bool do_cond_resched, retry; if (console_suspended) { @@ -2305,31 +2391,16 @@ skip: * finish. This task can not be preempted if there is a * waiter waiting to take over. */ - raw_spin_lock(&console_owner_lock); - console_owner = current; - raw_spin_unlock(&console_owner_lock); - - /* The waiter may spin on us after setting console_owner */ - spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); + console_lock_spinning_enable(); stop_critical_timings(); /* don't trace print latency */ call_console_drivers(ext_text, ext_len, text, len); start_critical_timings(); - raw_spin_lock(&console_owner_lock); - waiter = READ_ONCE(console_waiter); - console_owner = NULL; - raw_spin_unlock(&console_owner_lock); - - /* - * If there is a waiter waiting for us, then pass the - * rest of the work load over to that waiter. - */ - if (waiter) - break; - - /* There was no waiter, and nothing will spin on us here */ - spin_release(&console_owner_dep_map, 1, _THIS_IP_); + if (console_lock_spinning_disable_and_check()) { + printk_safe_exit_irqrestore(flags); + return; + } printk_safe_exit_irqrestore(flags); @@ -2337,26 +2408,6 @@ skip: cond_resched(); } - /* - * If there is an active waiter waiting on the console_lock. - * Pass off the printing to the waiter, and the waiter - * will continue printing on its CPU, and when all writing - * has finished, the last printer will wake up klogd. - */ - if (waiter) { - WRITE_ONCE(console_waiter, false); - /* The waiter is now free to continue */ - spin_release(&console_owner_dep_map, 1, _THIS_IP_); - /* - * Hand off console_lock to waiter. The waiter will perform - * the up(). After this, the waiter is the console_lock owner. - */ - mutex_release(&console_lock_dep_map, 1, _THIS_IP_); - printk_safe_exit_irqrestore(flags); - /* Note, if waiter is set, logbuf_lock is not held */ - return; - } - console_locked = 0; /* Release the exclusive_console once it is used */ -- GitLab From 08b7a8f880a27631e25e23ac2569f7c2d166469e Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 16 Jan 2018 13:47:16 +0900 Subject: [PATCH 2181/2269] printk: Never set console_may_schedule in console_trylock() [ Upstream commit fd5f7cde1b85d4c8e09ca46ce948e008a2377f64 ] This patch, basically, reverts commit 6b97a20d3a79 ("printk: set may_schedule for some of console_trylock() callers"). That commit was a mistake, it introduced a big dependency on the scheduler, by enabling preemption under console_sem in printk()->console_unlock() path, which is rather too critical. The patch did not significantly reduce the possibilities of printk() lockups, but made it possible to stall printk(), as has been reported by Tetsuo Handa [1]. Another issues is that preemption under console_sem also messes up with Steven Rostedt's hand off scheme, by making it possible to sleep with console_sem both in console_unlock() and in vprintk_emit(), after acquiring the console_sem ownership (anywhere between printk_safe_exit_irqrestore() in console_trylock_spinning() and printk_safe_enter_irqsave() in console_unlock()). This makes hand off less likely and, at the same time, may result in a significant amount of pending logbuf messages. Preempted console_sem owner makes it impossible for other CPUs to emit logbuf messages, but does not make it impossible for other CPUs to append new messages to the logbuf. Reinstate the old behavior and make printk() non-preemptible. Should any printk() lockup reports arrive they must be handled in a different way. [1] http://lkml.kernel.org/r/201603022101.CAH73907.OVOOMFHFFtQJSL%20()%20I-love%20!%20SAKURA%20!%20ne%20!%20jp Fixes: 6b97a20d3a79 ("printk: set may_schedule for some of console_trylock() callers") Link: http://lkml.kernel.org/r/20180116044716.GE6607@jagdpanzerIV To: Tetsuo Handa Cc: Sergey Senozhatsky Cc: Tejun Heo Cc: akpm@linux-foundation.org Cc: linux-mm@kvack.org Cc: Cong Wang Cc: Dave Hansen Cc: Johannes Weiner Cc: Mel Gorman Cc: Michal Hocko Cc: Vlastimil Babka Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jan Kara Cc: Mathieu Desnoyers Cc: Byungchul Park Cc: Pavel Machek Cc: linux-kernel@vger.kernel.org Signed-off-by: Sergey Senozhatsky Reported-by: Tetsuo Handa Reviewed-by: Steven Rostedt (VMware) Signed-off-by: Petr Mladek Signed-off-by: Sasha Levin --- kernel/printk/printk.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 2d1c2700bd858..2f654a79f80b1 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1902,6 +1902,12 @@ asmlinkage int vprintk_emit(int facility, int level, /* If called from the scheduler, we can not call up(). */ if (!in_sched) { + /* + * Disable preemption to avoid being preempted while holding + * console_sem which would prevent anyone from printing to + * console + */ + preempt_disable(); /* * Try to acquire and then immediately release the console * semaphore. The release will print out buffers and wake up @@ -1909,6 +1915,7 @@ asmlinkage int vprintk_emit(int facility, int level, */ if (console_trylock_spinning()) console_unlock(); + preempt_enable(); } return printed_len; @@ -2225,20 +2232,7 @@ int console_trylock(void) return 0; } console_locked = 1; - /* - * When PREEMPT_COUNT disabled we can't reliably detect if it's - * safe to schedule (e.g. calling printk while holding a spin_lock), - * because preempt_disable()/preempt_enable() are just barriers there - * and preempt_count() is always 0. - * - * RCU read sections have a separate preemption counter when - * PREEMPT_RCU enabled thus we must take extra care and check - * rcu_preempt_depth(), otherwise RCU read sections modify - * preempt_count(). - */ - console_may_schedule = !oops_in_progress && - preemptible() && - !rcu_preempt_depth(); + console_may_schedule = 0; return 1; } EXPORT_SYMBOL(console_trylock); -- GitLab From 16c9a316f21f1bcde5ce0633848d61699d63bad3 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Mon, 26 Feb 2018 15:44:20 +0100 Subject: [PATCH 2182/2269] printk: Wake klogd when passing console_lock owner [ Upstream commit c14376de3a1befa70d9811ca2872d47367b48767 ] wake_klogd is a local variable in console_unlock(). The information is lost when the console_lock owner using the busy wait added by the commit dbdda842fe96f8932 ("printk: Add console owner and waiter logic to load balance console writes"). The following race is possible: CPU0 CPU1 console_unlock() for (;;) /* calling console for last message */ printk() log_store() log_next_seq++; /* see new message */ if (seen_seq != log_next_seq) { wake_klogd = true; seen_seq = log_next_seq; } console_lock_spinning_enable(); if (console_trylock_spinning()) /* spinning */ if (console_lock_spinning_disable_and_check()) { printk_safe_exit_irqrestore(flags); return; console_unlock() if (seen_seq != log_next_seq) { /* already seen */ /* nothing to do */ Result: Nobody would wakeup klogd. One solution would be to make a global variable from wake_klogd. But then we would need to manipulate it under a lock or so. This patch wakes klogd also when console_lock is passed to the spinning waiter. It looks like the right way to go. Also userspace should have a chance to see and store any "flood" of messages. Note that the very late klogd wake up was a historic solution. It made sense on single CPU systems or when sys_syslog() operations were synchronized using the big kernel lock like in v2.1.113. But it is questionable these days. Fixes: dbdda842fe96f8932 ("printk: Add console owner and waiter logic to load balance console writes") Link: http://lkml.kernel.org/r/20180226155734.dzwg3aovqnwtvkoy@pathway.suse.cz Cc: Steven Rostedt Cc: linux-kernel@vger.kernel.org Cc: Tejun Heo Suggested-by: Sergey Senozhatsky Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Signed-off-by: Sasha Levin --- kernel/printk/printk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 2f654a79f80b1..2e2c86dd226fe 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2393,7 +2393,7 @@ skip: if (console_lock_spinning_disable_and_check()) { printk_safe_exit_irqrestore(flags); - return; + goto out; } printk_safe_exit_irqrestore(flags); @@ -2426,6 +2426,7 @@ skip: if (retry && console_trylock()) goto again; +out: if (wake_klogd) wake_up_klogd(); } -- GitLab From 891e5a89f0faf42766eefd476285d4779fd31a21 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 17 Nov 2017 15:28:27 -0800 Subject: [PATCH 2183/2269] lib/rbtree-test: lower default params commit 0b548e33e6cb2bff240fdaf1783783be15c29080 upstream. Fengguang reported soft lockups while running the rbtree and interval tree test modules. The logic for these tests all occur in init phase, and we currently are pounding with the default values for number of nodes and number of iterations of each test. Reduce the latter by two orders of magnitude. This does not influence the value of the tests in that one thousand times by default is enough to get the picture. Link: http://lkml.kernel.org/r/20171109161715.xai2dtwqw2frhkcm@linux-n805 Signed-off-by: Davidlohr Bueso Reported-by: Fengguang Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- lib/interval_tree_test.c | 4 ++-- lib/rbtree_test.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c index 0e343fd295701..835242e74aaa3 100644 --- a/lib/interval_tree_test.c +++ b/lib/interval_tree_test.c @@ -11,10 +11,10 @@ MODULE_PARM_DESC(name, msg); __param(int, nnodes, 100, "Number of nodes in the interval tree"); -__param(int, perf_loops, 100000, "Number of iterations modifying the tree"); +__param(int, perf_loops, 1000, "Number of iterations modifying the tree"); __param(int, nsearches, 100, "Number of searches to the interval tree"); -__param(int, search_loops, 10000, "Number of iterations searching the tree"); +__param(int, search_loops, 1000, "Number of iterations searching the tree"); __param(bool, search_all, false, "Searches will iterate all nodes in the tree"); __param(uint, max_endpoint, ~0, "Largest value for the interval's endpoint"); diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index 191a238e5a9d7..7d36c1e27ff65 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -11,7 +11,7 @@ MODULE_PARM_DESC(name, msg); __param(int, nnodes, 100, "Number of nodes in the rb-tree"); -__param(int, perf_loops, 100000, "Number of iterations modifying the rb-tree"); +__param(int, perf_loops, 1000, "Number of iterations modifying the rb-tree"); __param(int, check_loops, 100, "Number of iterations modifying and verifying the rb-tree"); struct test_node { -- GitLab From 5d2cc520d1f1ed929ef1f6b7ad57ddde9c42b651 Mon Sep 17 00:00:00 2001 From: Tigran Mkrtchyan Date: Mon, 26 Nov 2018 18:35:14 +0100 Subject: [PATCH 2184/2269] flexfiles: enforce per-mirror stateid only for v4 DSes commit 320f35b7bf8cccf1997ca3126843535e1b95e9c4 upstream. Since commit bb21ce0ad227 we always enforce per-mirror stateid. However, this makes sense only for v4+ servers. Signed-off-by: Tigran Mkrtchyan Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/flexfilelayout/flexfilelayout.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 13612a848378f..8dbde5ded0425 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1725,7 +1725,8 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) if (fh) hdr->args.fh = fh; - if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) + if (vers == 4 && + !nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) goto out_failed; /* @@ -1790,7 +1791,8 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) if (fh) hdr->args.fh = fh; - if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) + if (vers == 4 && + !nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) goto out_failed; /* -- GitLab From 16906e5ad4cc0439d03dc294c0251f4c875ccef2 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 1 Jul 2018 13:57:24 -0700 Subject: [PATCH 2185/2269] staging: speakup: Replace strncpy with memcpy commit fd29edc7232bc19f969e8f463138afc5472b3d5f upstream. gcc 8.1.0 generates the following warnings. drivers/staging/speakup/kobjects.c: In function 'punc_store': drivers/staging/speakup/kobjects.c:522:2: warning: 'strncpy' output truncated before terminating nul copying as many bytes from a string as its length drivers/staging/speakup/kobjects.c:504:6: note: length computed here drivers/staging/speakup/kobjects.c: In function 'synth_store': drivers/staging/speakup/kobjects.c:391:2: warning: 'strncpy' output truncated before terminating nul copying as many bytes from a string as its length drivers/staging/speakup/kobjects.c:388:8: note: length computed here Using strncpy() is indeed less than perfect since the length of data to be copied has already been determined with strlen(). Replace strncpy() with memcpy() to address the warning and optimize the code a little. Signed-off-by: Guenter Roeck Reviewed-by: Samuel Thibault Signed-off-by: Greg Kroah-Hartman --- drivers/staging/speakup/kobjects.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/speakup/kobjects.c b/drivers/staging/speakup/kobjects.c index ca85476e3ff76..23256bbdba510 100644 --- a/drivers/staging/speakup/kobjects.c +++ b/drivers/staging/speakup/kobjects.c @@ -387,7 +387,7 @@ static ssize_t synth_store(struct kobject *kobj, struct kobj_attribute *attr, len = strlen(buf); if (len < 2 || len > 9) return -EINVAL; - strncpy(new_synth_name, buf, len); + memcpy(new_synth_name, buf, len); if (new_synth_name[len - 1] == '\n') len--; new_synth_name[len] = '\0'; @@ -518,7 +518,7 @@ static ssize_t punc_store(struct kobject *kobj, struct kobj_attribute *attr, return -EINVAL; } - strncpy(punc_buf, buf, x); + memcpy(punc_buf, buf, x); while (x && punc_buf[x - 1] == '\n') x--; -- GitLab From 62711dc6898b3186a8a3501e4a3829add8624b3f Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 9 Dec 2018 17:04:19 +0900 Subject: [PATCH 2186/2269] ALSA: fireface: fix reference to wrong register for clock configuration commit fa9c98e4b975bb3192ed6af09d9fa282ed3cd8a0 upstream. In an initial commit, 'SYNC_STATUS' register is referred to get clock configuration, however this is wrong, according to my local note at hand for reverse-engineering about packet dump. It should be 'CLOCK_CONFIG' register. Actually, ff400_dump_clock_config() is correctly programmed. This commit fixes the bug. Cc: # v4.12+ Fixes: 76fdb3a9e13a ('ALSA: fireface: add support for Fireface 400') Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/fireface/ff-protocol-ff400.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/fireface/ff-protocol-ff400.c b/sound/firewire/fireface/ff-protocol-ff400.c index 9f5036442ab96..b47954a6b8aba 100644 --- a/sound/firewire/fireface/ff-protocol-ff400.c +++ b/sound/firewire/fireface/ff-protocol-ff400.c @@ -30,7 +30,7 @@ static int ff400_get_clock(struct snd_ff *ff, unsigned int *rate, int err; err = snd_fw_transaction(ff->unit, TCODE_READ_QUADLET_REQUEST, - FF400_SYNC_STATUS, ®, sizeof(reg), 0); + FF400_CLOCK_CONFIG, ®, sizeof(reg), 0); if (err < 0) return err; data = le32_to_cpu(reg); -- GitLab From d655a1a6684e0d6cc2429caeb4c5dd0fed4efb79 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 7 Dec 2018 15:14:59 +0800 Subject: [PATCH 2187/2269] ALSA: hda/realtek - Fixed headphone issue for ALC700 commit bde1a7459623a66c2abec4d0a841e4b06cc88d9a upstream. If it plugged headphone or headset into the jack, then do the reboot, it will have a chance to cause headphone no sound. It just need to run the headphone mode procedure after boot time. The issue will be fixed. It also suitable for ALC234 ALC274 and ALC294. Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f6136f041a810..31c91e0a815e1 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6965,6 +6965,37 @@ static void alc269_fill_coef(struct hda_codec *codec) alc_update_coef_idx(codec, 0x4, 0, 1<<11); } +static void alc294_hp_init(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + int i, val; + + if (!hp_pin) + return; + + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); + + msleep(100); + + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); + + alc_update_coef_idx(codec, 0x6f, 0x000f, 0);/* Set HP depop to manual mode */ + alc_update_coefex_idx(codec, 0x58, 0x00, 0x8000, 0x8000); /* HP depop procedure start */ + + /* Wait for depop procedure finish */ + val = alc_read_coefex_idx(codec, 0x58, 0x01); + for (i = 0; i < 20 && val & 0x0080; i++) { + msleep(50); + val = alc_read_coefex_idx(codec, 0x58, 0x01); + } + /* Set HP depop to auto mode */ + alc_update_coef_idx(codec, 0x6f, 0x000f, 0x000b); + msleep(50); +} + /* */ static int patch_alc269(struct hda_codec *codec) @@ -7101,6 +7132,7 @@ static int patch_alc269(struct hda_codec *codec) spec->codec_variant = ALC269_TYPE_ALC294; spec->gen.mixer_nid = 0; /* ALC2x4 does not have any loopback mixer path */ alc_update_coef_idx(codec, 0x6b, 0x0018, (1<<4) | (1<<3)); /* UAJ MIC Vref control by verb */ + alc294_hp_init(codec); break; case 0x10ec0300: spec->codec_variant = ALC269_TYPE_ALC300; @@ -7112,6 +7144,7 @@ static int patch_alc269(struct hda_codec *codec) spec->codec_variant = ALC269_TYPE_ALC700; spec->gen.mixer_nid = 0; /* ALC700 does not have any loopback mixer path */ alc_update_coef_idx(codec, 0x4a, 1 << 15, 0); /* Combo jack auto trigger control */ + alc294_hp_init(codec); break; } -- GitLab From 01a166012cd16b9ce147744c514f228df6cf495e Mon Sep 17 00:00:00 2001 From: Piotr Stankiewicz Date: Wed, 28 Nov 2018 06:44:46 -0800 Subject: [PATCH 2188/2269] IB/hfi1: Fix an out-of-bounds access in get_hw_stats commit 36d842194a57f1b21fbc6a6875f2fa2f9a7f8679 upstream. When running with KASAN, the following trace is produced: [ 62.535888] ================================================================== [ 62.544930] BUG: KASAN: slab-out-of-bounds in gut_hw_stats+0x122/0x230 [hfi1] [ 62.553856] Write of size 8 at addr ffff88080e8d6330 by task kworker/0:1/14 [ 62.565333] CPU: 0 PID: 14 Comm: kworker/0:1 Not tainted 4.19.0-test-build-kasan+ #8 [ 62.575087] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0019.101220160604 10/12/2016 [ 62.587951] Workqueue: events work_for_cpu_fn [ 62.594050] Call Trace: [ 62.598023] dump_stack+0xc6/0x14c [ 62.603089] ? dump_stack_print_info.cold.1+0x2f/0x2f [ 62.610041] ? kmsg_dump_rewind_nolock+0x59/0x59 [ 62.616615] ? get_hw_stats+0x122/0x230 [hfi1] [ 62.622985] print_address_description+0x6c/0x23c [ 62.629744] ? get_hw_stats+0x122/0x230 [hfi1] [ 62.636108] kasan_report.cold.6+0x241/0x308 [ 62.642365] get_hw_stats+0x122/0x230 [hfi1] [ 62.648703] ? hfi1_alloc_rn+0x40/0x40 [hfi1] [ 62.655088] ? __kmalloc+0x110/0x240 [ 62.660695] ? hfi1_alloc_rn+0x40/0x40 [hfi1] [ 62.667142] setup_hw_stats+0xd8/0x430 [ib_core] [ 62.673972] ? show_hfi+0x50/0x50 [hfi1] [ 62.680026] ib_device_register_sysfs+0x165/0x180 [ib_core] [ 62.687995] ib_register_device+0x5a2/0xa10 [ib_core] [ 62.695340] ? show_hfi+0x50/0x50 [hfi1] [ 62.701421] ? ib_unregister_device+0x2e0/0x2e0 [ib_core] [ 62.709222] ? __vmalloc_node_range+0x2d0/0x380 [ 62.716131] ? rvt_driver_mr_init+0x11f/0x2d0 [rdmavt] [ 62.723735] ? vmalloc_node+0x5c/0x70 [ 62.729697] ? rvt_driver_mr_init+0x11f/0x2d0 [rdmavt] [ 62.737347] ? rvt_driver_mr_init+0x1f5/0x2d0 [rdmavt] [ 62.744998] ? __rvt_alloc_mr+0x110/0x110 [rdmavt] [ 62.752315] ? rvt_rc_error+0x140/0x140 [rdmavt] [ 62.759434] ? rvt_vma_open+0x30/0x30 [rdmavt] [ 62.766364] ? mutex_unlock+0x1d/0x40 [ 62.772445] ? kmem_cache_create_usercopy+0x15d/0x230 [ 62.780115] rvt_register_device+0x1f6/0x360 [rdmavt] [ 62.787823] ? rvt_get_port_immutable+0x180/0x180 [rdmavt] [ 62.796058] ? __get_txreq+0x400/0x400 [hfi1] [ 62.802969] ? memcpy+0x34/0x50 [ 62.808611] hfi1_register_ib_device+0xde6/0xeb0 [hfi1] [ 62.816601] ? hfi1_get_npkeys+0x10/0x10 [hfi1] [ 62.823760] ? hfi1_init+0x89f/0x9a0 [hfi1] [ 62.830469] ? hfi1_setup_eagerbufs+0xad0/0xad0 [hfi1] [ 62.838204] ? pcie_capability_clear_and_set_word+0xcd/0xe0 [ 62.846429] ? pcie_capability_read_word+0xd0/0xd0 [ 62.853791] ? hfi1_pcie_init+0x187/0x4b0 [hfi1] [ 62.860958] init_one+0x67f/0xae0 [hfi1] [ 62.867301] ? hfi1_init+0x9a0/0x9a0 [hfi1] [ 62.873876] ? wait_woken+0x130/0x130 [ 62.879860] ? read_word_at_a_time+0xe/0x20 [ 62.886329] ? strscpy+0x14b/0x280 [ 62.891998] ? hfi1_init+0x9a0/0x9a0 [hfi1] [ 62.898405] local_pci_probe+0x70/0xd0 [ 62.904295] ? pci_device_shutdown+0x90/0x90 [ 62.910833] work_for_cpu_fn+0x29/0x40 [ 62.916750] process_one_work+0x584/0x960 [ 62.922974] ? rcu_work_rcufn+0x40/0x40 [ 62.928991] ? __schedule+0x396/0xdc0 [ 62.934806] ? __sched_text_start+0x8/0x8 [ 62.941020] ? pick_next_task_fair+0x68b/0xc60 [ 62.947674] ? run_rebalance_domains+0x260/0x260 [ 62.954471] ? __list_add_valid+0x29/0xa0 [ 62.960607] ? move_linked_works+0x1c7/0x230 [ 62.967077] ? trace_event_raw_event_workqueue_execute_start+0x140/0x140 [ 62.976248] ? mutex_lock+0xa6/0x100 [ 62.982029] ? __mutex_lock_slowpath+0x10/0x10 [ 62.988795] ? __switch_to+0x37a/0x710 [ 62.994731] worker_thread+0x62e/0x9d0 [ 63.000602] ? max_active_store+0xf0/0xf0 [ 63.006828] ? __switch_to_asm+0x40/0x70 [ 63.012932] ? __switch_to_asm+0x34/0x70 [ 63.019013] ? __switch_to_asm+0x40/0x70 [ 63.025042] ? __switch_to_asm+0x34/0x70 [ 63.031030] ? __switch_to_asm+0x40/0x70 [ 63.037006] ? __schedule+0x396/0xdc0 [ 63.042660] ? kmem_cache_alloc_trace+0xf3/0x1f0 [ 63.049323] ? kthread+0x59/0x1d0 [ 63.054594] ? ret_from_fork+0x35/0x40 [ 63.060257] ? __sched_text_start+0x8/0x8 [ 63.066212] ? schedule+0xcf/0x250 [ 63.071529] ? __wake_up_common+0x110/0x350 [ 63.077794] ? __schedule+0xdc0/0xdc0 [ 63.083348] ? wait_woken+0x130/0x130 [ 63.088963] ? finish_task_switch+0x1f1/0x520 [ 63.095258] ? kasan_unpoison_shadow+0x30/0x40 [ 63.101792] ? __init_waitqueue_head+0xa0/0xd0 [ 63.108183] ? replenish_dl_entity.cold.60+0x18/0x18 [ 63.115151] ? _raw_spin_lock_irqsave+0x25/0x50 [ 63.121754] ? max_active_store+0xf0/0xf0 [ 63.127753] kthread+0x1ae/0x1d0 [ 63.132894] ? kthread_bind+0x30/0x30 [ 63.138422] ret_from_fork+0x35/0x40 [ 63.146973] Allocated by task 14: [ 63.152077] kasan_kmalloc+0xbf/0xe0 [ 63.157471] __kmalloc+0x110/0x240 [ 63.162804] init_cntrs+0x34d/0xdf0 [hfi1] [ 63.168883] hfi1_init_dd+0x29a3/0x2f90 [hfi1] [ 63.175244] init_one+0x551/0xae0 [hfi1] [ 63.181065] local_pci_probe+0x70/0xd0 [ 63.186759] work_for_cpu_fn+0x29/0x40 [ 63.192310] process_one_work+0x584/0x960 [ 63.198163] worker_thread+0x62e/0x9d0 [ 63.203843] kthread+0x1ae/0x1d0 [ 63.208874] ret_from_fork+0x35/0x40 [ 63.217203] Freed by task 1: [ 63.221844] __kasan_slab_free+0x12e/0x180 [ 63.227844] kfree+0x92/0x1a0 [ 63.232570] single_release+0x3a/0x60 [ 63.238024] __fput+0x1d9/0x480 [ 63.242911] task_work_run+0x139/0x190 [ 63.248440] exit_to_usermode_loop+0x191/0x1a0 [ 63.254814] do_syscall_64+0x301/0x330 [ 63.260283] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 63.270199] The buggy address belongs to the object at ffff88080e8d5500 which belongs to the cache kmalloc-4096 of size 4096 [ 63.287247] The buggy address is located 3632 bytes inside of 4096-byte region [ffff88080e8d5500, ffff88080e8d6500) [ 63.303564] The buggy address belongs to the page: [ 63.310447] page:ffffea00203a3400 count:1 mapcount:0 mapping:ffff88081380e840 index:0x0 compound_mapcount: 0 [ 63.323102] flags: 0x2fffff80008100(slab|head) [ 63.329775] raw: 002fffff80008100 0000000000000000 0000000100000001 ffff88081380e840 [ 63.340175] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 [ 63.350564] page dumped because: kasan: bad access detected [ 63.361974] Memory state around the buggy address: [ 63.369137] ffff88080e8d6200: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 63.379082] ffff88080e8d6280: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 63.389032] >ffff88080e8d6300: 00 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc [ 63.398944] ^ [ 63.406141] ffff88080e8d6380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 63.416109] ffff88080e8d6400: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 63.426099] ================================================================== The trace happens because get_hw_stats() assumes there is room in the memory allocated in init_cntrs() to accommodate the driver counters. Unfortunately, that routine only allocated space for the device counters. Fix by insuring the allocation has room for the additional driver counters. Cc: # v4.14+ Fixes: b7481944b06e9 ("IB/hfi1: Show statistics counters under IB stats interface") Reviewed-by: Mike Marciniczyn Reviewed-by: Mike Ruhl Signed-off-by: Piotr Stankiewicz Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/chip.c | 3 ++- drivers/infiniband/hw/hfi1/hfi.h | 2 ++ drivers/infiniband/hw/hfi1/verbs.c | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index f9faacce92509..db33ad985a12a 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -12449,7 +12449,8 @@ static int init_cntrs(struct hfi1_devdata *dd) } /* allocate space for the counter values */ - dd->cntrs = kcalloc(dd->ndevcntrs, sizeof(u64), GFP_KERNEL); + dd->cntrs = kcalloc(dd->ndevcntrs + num_driver_cntrs, sizeof(u64), + GFP_KERNEL); if (!dd->cntrs) goto bail; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 13a7bcaa58e69..ee2859dcceabf 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -152,6 +152,8 @@ struct hfi1_ib_stats { extern struct hfi1_ib_stats hfi1_stats; extern const struct pci_error_handlers hfi1_pci_err_handler; +extern int num_driver_cntrs; + /* * First-cut criterion for "device is active" is * two thousand dwords combined Tx, Rx traffic per diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 63d404a6752a1..12cf0f7ca7bb3 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1693,7 +1693,7 @@ static const char * const driver_cntr_names[] = { static DEFINE_MUTEX(cntr_names_lock); /* protects the *_cntr_names bufers */ static const char **dev_cntr_names; static const char **port_cntr_names; -static int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names); +int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names); static int num_dev_cntrs; static int num_port_cntrs; static int cntr_names_initialized; -- GitLab From 4465b31b75490c038777cfccfbdf2ba8e9e5286a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Dec 2018 09:58:24 -0800 Subject: [PATCH 2189/2269] tcp: lack of available data can also cause TSO defer commit f9bfe4e6a9d08d405fe7b081ee9a13e649c97ecf upstream. tcp_tso_should_defer() can return true in three different cases : 1) We are cwnd-limited 2) We are rwnd-limited 3) We are application limited. Neal pointed out that my recent fix went too far, since it assumed that if we were not in 1) case, we must be rwnd-limited Fix this by properly populating the is_cwnd_limited and is_rwnd_limited booleans. After this change, we can finally move the silly check for FIN flag only for the application-limited case. The same move for EOR bit will be handled in net-next, since commit 1c09f7d073b1 ("tcp: do not try to defer skbs with eor mark (MSG_EOR)") is scheduled for linux-4.21 Tested by running 200 concurrent netperf -t TCP_RR -- -r 60000,100 and checking none of them was rwnd_limited in the chrono_stat output from "ss -ti" command. Fixes: 41727549de3e ("tcp: Do not underestimate rwnd_limited") Signed-off-by: Eric Dumazet Suggested-by: Neal Cardwell Reviewed-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Reviewed-by: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1b31b0a1c7faf..24bad638c2ecf 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1885,7 +1885,9 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, * This algorithm is from John Heffner. */ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, - bool *is_cwnd_limited, u32 max_segs) + bool *is_cwnd_limited, + bool *is_rwnd_limited, + u32 max_segs) { const struct inet_connection_sock *icsk = inet_csk(sk); u32 age, send_win, cong_win, limit, in_flight; @@ -1893,9 +1895,6 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, struct sk_buff *head; int win_divisor; - if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) - goto send_now; - if (icsk->icsk_ca_state >= TCP_CA_Recovery) goto send_now; @@ -1951,10 +1950,27 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, if (age < (tp->srtt_us >> 4)) goto send_now; - /* Ok, it looks like it is advisable to defer. */ + /* Ok, it looks like it is advisable to defer. + * Three cases are tracked : + * 1) We are cwnd-limited + * 2) We are rwnd-limited + * 3) We are application limited. + */ + if (cong_win < send_win) { + if (cong_win <= skb->len) { + *is_cwnd_limited = true; + return true; + } + } else { + if (send_win <= skb->len) { + *is_rwnd_limited = true; + return true; + } + } - if (cong_win < send_win && cong_win <= skb->len) - *is_cwnd_limited = true; + /* If this packet won't get more data, do not wait. */ + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) + goto send_now; return true; @@ -2328,11 +2344,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, } else { if (!push_one && tcp_tso_should_defer(sk, skb, &is_cwnd_limited, - max_segs)) { - if (!is_cwnd_limited) - is_rwnd_limited = true; + &is_rwnd_limited, max_segs)) break; - } } limit = mss_now; -- GitLab From 3beeb2615681fe87f4e6291ade669d50c27ce59a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 17 Dec 2018 09:28:56 +0100 Subject: [PATCH 2190/2269] Linux 4.14.89 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3fdee40861a16..b83477be8d0c1 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 88 +SUBLEVEL = 89 EXTRAVERSION = NAME = Petit Gorille -- GitLab From c6ced91f3c12de5906a1b9c898ceaf334424a9d1 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 14 Dec 2018 09:05:20 +0000 Subject: [PATCH 2191/2269] ANDROID: sched/events: Fix out of bound memory access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCC 8 provides the following warning: ./include/trace/events/sched.h:736:3: warning: ‘memcpy’ forming offset [8, 16] is out of the bounds [0, 7] [-Warray-bounds] memcpy(__entry->comm, p ? p->comm : "(null)", TASK_COMM_LEN); Indeed, in the case where p==NULL, we copy TASK_COMM_LEN bytes from the memory location where "(null)" is stored, which is incorrect. Fix this by making sure to pass the right size parameter to memcpy in all cases. Bug: 120440300 Test: Compilation warning gone, no changes noticed in traces Fixes: acfe25da3551 ("ANDROID: sched/events: Introduce sched_entity load tracking trace event") Change-Id: Id93c9c0265f10c09b731daca25401696785b4b1e Suggested-by: Dietmar Eggemann Signed-off-by: Quentin Perret --- include/trace/events/sched.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 76c6ae3d422e2..ccc96c0738bb6 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -748,7 +748,8 @@ TRACE_EVENT(sched_load_se, __entry->cpu = __trace_sched_cpu(gcfs_rq, se); __trace_sched_path(gcfs_rq, __get_dynamic_array(path), __get_dynamic_array_len(path)); - memcpy(__entry->comm, p ? p->comm : "(null)", TASK_COMM_LEN); + memcpy(__entry->comm, p ? p->comm : "(null)", + p ? TASK_COMM_LEN : sizeof("(null)")); __entry->pid = p ? p->pid : -1; __entry->load = se->avg.load_avg; __entry->util = se->avg.util_avg; -- GitLab From a6424a3d08fb2ae65d738548c943dd0515a7c926 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Wed, 5 Dec 2018 17:58:19 +0000 Subject: [PATCH 2192/2269] ANDROID: sched: Clean-up SchedTune documentation SchedTune's documentation file hasn't been updated in a while and occasionally contains stale informations in a few places. Clean-up the doc file by: - removing the references to the now defunct sched-freq; - replacing references to SCHED_LOAD_SCALE by SCHED_CAPACITY_SCALE; - removing the statement that the wake-up path is not boost-aware; - removing reference to negative boosting; - removing 'motivation' paragraphs that aren't really relevant anymore; - and making sure to fit all the text in 80 chars. No fundamental changes about the core of the explanations. Bug: 120440300 Fixes: 159c14f03977 ("ANDROID: sched: fair/tune: Add schedtune with cgroups interface") Change-Id: I8ad92a93082e2efe92bc3a7526960e50032be909 Signed-off-by: Quentin Perret --- Documentation/scheduler/sched-tune.txt | 97 ++++++++++---------------- 1 file changed, 36 insertions(+), 61 deletions(-) diff --git a/Documentation/scheduler/sched-tune.txt b/Documentation/scheduler/sched-tune.txt index 5df0ea3613115..1a103715f7bdc 100644 --- a/Documentation/scheduler/sched-tune.txt +++ b/Documentation/scheduler/sched-tune.txt @@ -30,11 +30,9 @@ Table of Contents 1. Motivation ============= -Sched-DVFS [3] was a new event-driven cpufreq governor which allows the +Schedutil [3] is a utilization-driven cpufreq governor which allows the scheduler to select the optimal DVFS operating point (OPP) for running a task -allocated to a CPU. Later, the cpufreq maintainers introduced a similar -governor, schedutil. The introduction of schedutil also enables running -workloads at the most energy efficient OPPs. +allocated to a CPU. However, sometimes it may be desired to intentionally boost the performance of a workload even if that could imply a reasonable increase in energy @@ -44,16 +42,16 @@ by it's CPU bandwidth demand. This last requirement is especially important if we consider that one of the main goals of the utilization-driven governor component is to replace all -currently available CPUFreq policies. Since sched-DVFS and schedutil are event -based, as opposed to the sampling driven governors we currently have, they are -already more responsive at selecting the optimal OPP to run tasks allocated to -a CPU. However, just tracking the actual task load demand may not be enough -from a performance standpoint. For example, it is not possible to get -behaviors similar to those provided by the "performance" and "interactive" -CPUFreq governors. +currently available CPUFreq policies. Since schedutil is event-based, as +opposed to the sampling driven governors we currently have, they are already +more responsive at selecting the optimal OPP to run tasks allocated to a CPU. +However, just tracking the actual task utilization may not be enough from a +performance standpoint. For example, it is not possible to get behaviors +similar to those provided by the "performance" and "interactive" CPUFreq +governors. This document describes an implementation of a tunable, stacked on top of the -utilization-driven governors which extends their functionality to support task +utilization-driven governor which extends its functionality to support task performance boosting. By "performance boosting" we mean the reduction of the time required to @@ -63,17 +61,6 @@ example, if we consider a simple periodic task which executes the same workload for 5[s] every 20[s] while running at a certain OPP, a boosted execution of that task must complete each of its activations in less than 5[s]. -A previous attempt [5] to introduce such a boosting feature has not been -successful mainly because of the complexity of the proposed solution. Previous -versions of the approach described in this document exposed a single simple -interface to user-space. This single tunable knob allowed the tuning of -system wide scheduler behaviours ranging from energy efficiency at one end -through to incremental performance boosting at the other end. This first -tunable affects all tasks. However, that is not useful for Android products -so in this version only a more advanced extension of the concept is provided -which uses CGroups to boost the performance of only selected tasks while using -the energy efficient default for all others. - The rest of this document introduces in more details the proposed solution which has been named SchedTune. @@ -97,25 +84,22 @@ More details are given in section 5. 2.1 Boosting ============ -The boost value is expressed as an integer in the range [-100..0..100]. +The boost value is expressed as an integer in the range [0..100]. A value of 0 (default) configures the CFS scheduler for maximum energy -efficiency. This means that sched-DVFS runs the tasks at the minimum OPP +efficiency. This means that schedutil runs the tasks at the minimum OPP required to satisfy their workload demand. A value of 100 configures scheduler for maximum performance, which translates to the selection of the maximum OPP on that CPU. -A value of -100 configures scheduler for minimum performance, which translates -to the selection of the minimum OPP on that CPU. - -The range between -100, 0 and 100 can be set to satisfy other scenarios suitably. -For example to satisfy interactive response or depending on other system events +The range between 0 and 100 can be set to satisfy other scenarios suitably. For +example to satisfy interactive response or depending on other system events (battery level etc). The overall design of the SchedTune module is built on top of "Per-Entity Load -Tracking" (PELT) signals and sched-DVFS by introducing a bias on the Operating -Performance Point (OPP) selection. +Tracking" (PELT) signals and schedutil by introducing a bias on the OPP +selection. Each time a task is allocated on a CPU, cpufreq is given the opportunity to tune the operating frequency of that CPU to better match the workload demand. The @@ -141,9 +125,6 @@ can be placed according to the energy-aware wakeup strategy. A value of 1 signals to the CFS scheduler that tasks in this group should be placed to minimise wakeup latency. -The value is combined with the boost value - task placement will not be -boost aware however CPU OPP selection is still boost aware. - Android platforms typically use this flag for application tasks which the user is currently interacting with. @@ -169,21 +150,16 @@ to a signal to get its inflated value: margin := boosting_strategy(sched_cfs_boost, signal) boosted_signal := signal + margin -Different boosting strategies were identified and analyzed before selecting the -one found to be most effective. - -Signal Proportional Compensation (SPC) --------------------------------------- - -In this boosting strategy the sched_cfs_boost value is used to compute a -margin which is proportional to the complement of the original signal. +The boosting strategy currently implemented in SchedTune is called 'Signal +Proportional Compensation' (SPC). With SPC, the sched_cfs_boost value is used to +compute a margin which is proportional to the complement of the original signal. When a signal has a maximum possible value, its complement is defined as the delta from the actual value and its possible maximum. -Since the tunable implementation uses signals which have SCHED_LOAD_SCALE as +Since the tunable implementation uses signals which have SCHED_CAPACITY_SCALE as the maximum possible value, the margin becomes: - margin := sched_cfs_boost * (SCHED_LOAD_SCALE - signal) + margin := sched_cfs_boost * (SCHED_CAPACITY_SCALE - signal) Using this boosting strategy: - a 100% sched_cfs_boost means that the signal is scaled to the maximum value @@ -209,7 +185,7 @@ following figure where: ^ - | SCHED_LOAD_SCALE + | SCHED_CAPACITY_SCALE +-----------------------------------------------------------------+ |pppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppp | @@ -250,7 +226,7 @@ one, depending on the value of sched_cfs_boost. This is a clean an non invasive modification of the existing existing code paths. The signal representing a CPU's utilization is boosted according to the -previously described SPC boosting strategy. To sched-DVFS, this allows a CPU +previously described SPC boosting strategy. To schedutil, this allows a CPU (ie CFS run-queue) to appear more used then it actually is. Thus, with the sched_cfs_boost enabled we have the following main functions to @@ -262,10 +238,9 @@ get the current utilization of a CPU: The new boosted_cpu_util() is similar to the first but returns a boosted utilization signal which is a function of the sched_cfs_boost value. -This function is used in the CFS scheduler code paths where sched-DVFS needs to -decide the OPP to run a CPU at. -For example, this allows selecting the highest OPP for a CPU which has -the boost value set to 100%. +This function is used in the CFS scheduler code paths where schedutil needs to +decide the OPP to run a CPU at. For example, this allows selecting the highest +OPP for a CPU which has the boost value set to 100%. 5. Per task group boosting @@ -305,16 +280,16 @@ main characteristics: This number is defined at compile time and by default configured to 16. This is a design decision motivated by two main reasons: - a) In a real system we do not expect utilization scenarios with more then few - boost groups. For example, a reasonable collection of groups could be - just "background", "interactive" and "performance". + a) In a real system we do not expect utilization scenarios with more than + a few boost groups. For example, a reasonable collection of groups could + be just "background", "interactive" and "performance". b) It simplifies the implementation considerably, especially for the code which has to compute the per CPU boosting once there are multiple RUNNABLE tasks with different boost values. -Such a simple design should allow servicing the main utilization scenarios identified -so far. It provides a simple interface which can be used to manage the -power-performance of all tasks or only selected tasks. +Such a simple design should allow servicing the main utilization scenarios +identified so far. It provides a simple interface which can be used to manage +the power-performance of all tasks or only selected tasks. Moreover, this interface can be easily integrated by user-space run-times (e.g. Android, ChromeOS) to implement a QoS solution for task boosting based on tasks classification, which has been a long standing requirement. @@ -397,9 +372,9 @@ How are multiple groups of tasks with different boost values managed? --------------------------------------------------------------------- The current SchedTune implementation keeps track of the boosted RUNNABLE tasks -on a CPU. The CPU utilization seen by the scheduler-driven cpufreq governors -(and used to select an appropriate OPP) is boosted with a value which is the -maximum of the boost values of the currently RUNNABLE tasks in its RQ. +on a CPU. The CPU utilization seen by schedutil (and used to select an +appropriate OPP) is boosted with a value which is the maximum of the boost +values of the currently RUNNABLE tasks in its RQ. This allows cpufreq to boost a CPU only while there are boosted tasks ready to run and switch back to the energy efficient mode as soon as the last boosted @@ -410,4 +385,4 @@ task is dequeued. ============= [1] http://lwn.net/Articles/552889 [2] http://lkml.org/lkml/2012/5/18/91 -[3] http://lkml.org/lkml/2015/6/26/620 +[3] https://lkml.org/lkml/2016/3/29/1041 -- GitLab From 505d1d4021d07216ad55406ff599cb1d6038cc78 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 5 Dec 2018 20:53:00 -0800 Subject: [PATCH 2193/2269] FROMGIT: dm crypt: log the encryption algorithm implementation Log the encryption algorithm's driver name when a dm-crypt target is created. This will help people determine whether the expected implementation is being used. In some cases we've seen people do benchmarks and reject using encryption for performance reasons, when in fact they used a much slower implementation than was possible on the hardware. It can make an enormous difference; e.g., AES-XTS on ARM can be over 10x faster with the crypto extensions than without. It can also be useful to know if an implementation using an external crypto accelerator is being used instead of a software implementation. Example message: [ 29.307629] device-mapper: crypt: xts(aes) using implementation "xts-aes-ce" We've already found the similar message in fs/crypto/keyinfo.c to be very useful. Signed-off-by: Eric Biggers Signed-off-by: Mike Snitzer (cherry picked from commit 32bbca787931a371256ac51219a6e9c9dbc86960 https://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git for-next) Test: Verified that the message is logged when dm-crypt is used. Change-Id: I20857a5350266431924e177dea50418aeb95009d Signed-off-by: Eric Biggers --- drivers/md/dm-crypt.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index c60d29d09687d..47b59682d9f6d 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1893,6 +1893,13 @@ static int crypt_alloc_tfms_skcipher(struct crypt_config *cc, char *ciphermode) } } + /* + * dm-crypt performance can vary greatly depending on which crypto + * algorithm implementation is used. Help people debug performance + * problems by logging the ->cra_driver_name. + */ + DMINFO("%s using implementation \"%s\"", ciphermode, + crypto_skcipher_alg(any_tfm(cc))->base.cra_driver_name); return 0; } @@ -1911,6 +1918,8 @@ static int crypt_alloc_tfms_aead(struct crypt_config *cc, char *ciphermode) return err; } + DMINFO("%s using implementation \"%s\"", ciphermode, + crypto_aead_alg(any_tfm_aead(cc))->base.cra_driver_name); return 0; } -- GitLab From 3c9f3e0b62ad4ceca4da628708597f4cb0afe54a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 5 Dec 2018 20:54:13 -0800 Subject: [PATCH 2194/2269] FROMGIT: dm verity: log the hash algorithm implementation Log the hash algorithm's driver name when a dm-verity target is created. This will help people determine whether the expected implementation is being used. It can make an enormous difference; e.g., SHA-256 on ARM can be 8x faster with the crypto extensions than without. It can also be useful to know if an implementation using an external crypto accelerator is being used instead of a software implementation. Example message: [ 35.281945] device-mapper: verity: sha256 using implementation "sha256-ce" We've already found the similar message in fs/crypto/keyinfo.c to be very useful. Signed-off-by: Eric Biggers Signed-off-by: Mike Snitzer (cherry picked from commit 8ec60e93af574f0426dba17208084f1038b1418e https://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git for-next) Test: Verified that the message is logged when dm-verity is used. Change-Id: I78a803eb8e0db8eda7b4790cd338d5d5555486e5 Signed-off-by: Eric Biggers --- drivers/md/dm-verity-target.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index be1e888811229..75c577c437baa 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -1076,6 +1076,15 @@ int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) v->tfm = NULL; goto bad; } + + /* + * dm-verity performance can vary greatly depending on which hash + * algorithm implementation is used. Help people debug performance + * problems by logging the ->cra_driver_name. + */ + DMINFO("%s using implementation \"%s\"", v->alg_name, + crypto_hash_alg_common(v->tfm)->base.cra_driver_name); + v->digest_size = crypto_ahash_digestsize(v->tfm); if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) { ti->error = "Digest size too big"; -- GitLab From 2eaa69bd84cf14406f10514b70cb034d4d011ba6 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Wed, 19 Dec 2018 09:54:40 -0800 Subject: [PATCH 2195/2269] BACKPORT: mm: Add an F_SEAL_FUTURE_WRITE seal to memfd Android uses ashmem for sharing memory regions. We are looking forward to migrating all usecases of ashmem to memfd so that we can possibly remove the ashmem driver in the future from staging while also benefiting from using memfd and contributing to it. Note staging drivers are also not ABI and generally can be removed at anytime. One of the main usecases Android has is the ability to create a region and mmap it as writeable, then add protection against making any "future" writes while keeping the existing already mmap'ed writeable-region active. This allows us to implement a usecase where receivers of the shared memory buffer can get a read-only view, while the sender continues to write to the buffer. See CursorWindow documentation in Android for more details: https://developer.android.com/reference/android/database/CursorWindow This usecase cannot be implemented with the existing F_SEAL_WRITE seal. To support the usecase, this patch adds a new F_SEAL_FUTURE_WRITE seal which prevents any future mmap and write syscalls from succeeding while keeping the existing mmap active. Verified with test program at: https://lore.kernel.org/patchwork/patch/1008117/ Backport link: https://lore.kernel.org/patchwork/patch/1014892/ Bug: 113362644 Change-Id: If7424db3b64372932d455f0219cd9df613fec1d4 Signed-off-by: Joel Fernandes (Google) Signed-off-by: Joel Fernandes --- include/uapi/linux/fcntl.h | 1 + mm/shmem.c | 29 +++++++++++++++++++++++++---- tools/include/uapi/linux/fcntl.h | 1 + 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 6448cdd9a350d..586bf89f6b873 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -41,6 +41,7 @@ #define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ #define F_SEAL_GROW 0x0004 /* prevent file from growing */ #define F_SEAL_WRITE 0x0008 /* prevent writes */ +#define F_SEAL_FUTURE_WRITE 0x0010 /* prevent writes */ /* (1U << 31) is reserved for signed error codes */ /* diff --git a/mm/shmem.c b/mm/shmem.c index e0951e1f7d824..0d5144565aa57 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2149,6 +2149,25 @@ out_nomem: static int shmem_mmap(struct file *file, struct vm_area_struct *vma) { + struct shmem_inode_info *info = SHMEM_I(file_inode(file)); + + + if (info->seals & F_SEAL_FUTURE_WRITE) { + /* + * New PROT_WRITE and MAP_SHARED mmaps are not allowed when + * "future write" seal active. + */ + if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE)) + return -EPERM; + + /* + * Since the F_SEAL_FUTURE_WRITE seals allow for a MAP_SHARED + * read-only mapping, take care to not allow mprotect to revert + * protections. + */ + vma->vm_flags &= ~(VM_MAYWRITE); + } + file_accessed(file); vma->vm_ops = &shmem_vm_ops; if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) && @@ -2402,8 +2421,9 @@ shmem_write_begin(struct file *file, struct address_space *mapping, pgoff_t index = pos >> PAGE_SHIFT; /* i_mutex is held by caller */ - if (unlikely(info->seals & (F_SEAL_WRITE | F_SEAL_GROW))) { - if (info->seals & F_SEAL_WRITE) + if (unlikely(info->seals & (F_SEAL_GROW | + F_SEAL_WRITE | F_SEAL_FUTURE_WRITE))) { + if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) return -EPERM; if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size) return -EPERM; @@ -2760,7 +2780,8 @@ continue_resched: #define F_ALL_SEALS (F_SEAL_SEAL | \ F_SEAL_SHRINK | \ F_SEAL_GROW | \ - F_SEAL_WRITE) + F_SEAL_WRITE | \ + F_SEAL_FUTURE_WRITE) int shmem_add_seals(struct file *file, unsigned int seals) { @@ -2887,7 +2908,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq); /* protected by i_mutex */ - if (info->seals & F_SEAL_WRITE) { + if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) { error = -EPERM; goto out; } diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h index 6448cdd9a350d..44cdb652e0da9 100644 --- a/tools/include/uapi/linux/fcntl.h +++ b/tools/include/uapi/linux/fcntl.h @@ -41,6 +41,7 @@ #define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ #define F_SEAL_GROW 0x0004 /* prevent file from growing */ #define F_SEAL_WRITE 0x0008 /* prevent writes */ +#define F_SEAL_FUTURE_WRITE 0x0010 /* prevent future writes */ /* (1U << 31) is reserved for signed error codes */ /* -- GitLab From e9c7ae0eb4cb933bac041a6ffe53694c2666b433 Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Wed, 19 Dec 2018 15:37:41 -0800 Subject: [PATCH 2196/2269] ANDROID: Revert fs/squashfs back to linux-4.14.y Bug: 29521202 Test: local build test only Change-Id: I117e6e733f0ece85fd4d90604efc4d59fa545464 Signed-off-by: Alistair Strachan --- fs/squashfs/Kconfig | 28 ++ fs/squashfs/Makefile | 3 +- fs/squashfs/block.c | 550 +++++++++-------------------------- fs/squashfs/cache.c | 73 +++-- fs/squashfs/decompressor.c | 55 ++-- fs/squashfs/file.c | 139 +++------ fs/squashfs/file_cache.c | 38 +++ fs/squashfs/file_direct.c | 240 ++++++++------- fs/squashfs/lz4_wrapper.c | 30 +- fs/squashfs/lzo_wrapper.c | 40 ++- fs/squashfs/page_actor.c | 175 ++++------- fs/squashfs/page_actor.h | 84 +++--- fs/squashfs/squashfs.h | 9 +- fs/squashfs/squashfs_fs_sb.h | 2 +- fs/squashfs/super.c | 7 - fs/squashfs/xz_wrapper.c | 15 +- fs/squashfs/zlib_wrapper.c | 14 +- 17 files changed, 624 insertions(+), 878 deletions(-) create mode 100644 fs/squashfs/file_cache.c diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index 6c81bf6200671..1adb3346b9d6a 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -25,6 +25,34 @@ config SQUASHFS If unsure, say N. +choice + prompt "File decompression options" + depends on SQUASHFS + help + Squashfs now supports two options for decompressing file + data. Traditionally Squashfs has decompressed into an + intermediate buffer and then memcopied it into the page cache. + Squashfs now supports the ability to decompress directly into + the page cache. + + If unsure, select "Decompress file data into an intermediate buffer" + +config SQUASHFS_FILE_CACHE + bool "Decompress file data into an intermediate buffer" + help + Decompress file data into an intermediate buffer and then + memcopy it into the page cache. + +config SQUASHFS_FILE_DIRECT + bool "Decompress files directly into the page cache" + help + Directly decompress file data into the page cache. + Doing so can significantly improve performance because + it eliminates a memcpy and it also removes the lock contention + on the single buffer. + +endchoice + choice prompt "Decompressor parallelisation options" depends on SQUASHFS diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index b9a2990a6c19f..7bd9b8b856d0b 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -6,7 +6,8 @@ obj-$(CONFIG_SQUASHFS) += squashfs.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o squashfs-y += namei.o super.o symlink.o decompressor.o -squashfs-y += file_direct.o page_actor.o +squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o +squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index dfc2e7641c309..f098b9f1c3963 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -28,13 +28,10 @@ #include #include -#include #include #include -#include #include #include -#include #include "squashfs_fs.h" #include "squashfs_fs_sb.h" @@ -42,382 +39,45 @@ #include "decompressor.h" #include "page_actor.h" -static struct workqueue_struct *squashfs_read_wq; - -struct squashfs_read_request { - struct super_block *sb; - u64 index; - int length; - int compressed; - int offset; - u64 read_end; - struct squashfs_page_actor *output; - enum { - SQUASHFS_COPY, - SQUASHFS_DECOMPRESS, - SQUASHFS_METADATA, - } data_processing; - bool synchronous; - - /* - * If the read is synchronous, it is possible to retrieve information - * about the request by setting these pointers. - */ - int *res; - int *bytes_read; - int *bytes_uncompressed; - - int nr_buffers; - struct buffer_head **bh; - struct work_struct offload; -}; - -struct squashfs_bio_request { - struct buffer_head **bh; - int nr_buffers; -}; - -static int squashfs_bio_submit(struct squashfs_read_request *req); - -int squashfs_init_read_wq(void) -{ - squashfs_read_wq = create_workqueue("SquashFS read wq"); - return !!squashfs_read_wq; -} - -void squashfs_destroy_read_wq(void) -{ - flush_workqueue(squashfs_read_wq); - destroy_workqueue(squashfs_read_wq); -} - -static void free_read_request(struct squashfs_read_request *req, int error) -{ - if (!req->synchronous) - squashfs_page_actor_free(req->output, error); - if (req->res) - *(req->res) = error; - kfree(req->bh); - kfree(req); -} - -static void squashfs_process_blocks(struct squashfs_read_request *req) -{ - int error = 0; - int bytes, i, length; - struct squashfs_sb_info *msblk = req->sb->s_fs_info; - struct squashfs_page_actor *actor = req->output; - struct buffer_head **bh = req->bh; - int nr_buffers = req->nr_buffers; - - for (i = 0; i < nr_buffers; ++i) { - if (!bh[i]) - continue; - wait_on_buffer(bh[i]); - if (!buffer_uptodate(bh[i])) - error = -EIO; - } - if (error) - goto cleanup; - - if (req->data_processing == SQUASHFS_METADATA) { - /* Extract the length of the metadata block */ - if (req->offset != msblk->devblksize - 1) { - length = le16_to_cpup((__le16 *) - (bh[0]->b_data + req->offset)); - } else { - length = (unsigned char)bh[0]->b_data[req->offset]; - length |= (unsigned char)bh[1]->b_data[0] << 8; - } - req->compressed = SQUASHFS_COMPRESSED(length); - req->data_processing = req->compressed ? SQUASHFS_DECOMPRESS - : SQUASHFS_COPY; - length = SQUASHFS_COMPRESSED_SIZE(length); - if (req->index + length + 2 > req->read_end) { - for (i = 0; i < nr_buffers; ++i) - put_bh(bh[i]); - kfree(bh); - req->length = length; - req->index += 2; - squashfs_bio_submit(req); - return; - } - req->length = length; - req->offset = (req->offset + 2) % PAGE_SIZE; - if (req->offset < 2) { - put_bh(bh[0]); - ++bh; - --nr_buffers; - } - } - if (req->bytes_read) - *(req->bytes_read) = req->length; - - if (req->data_processing == SQUASHFS_COPY) { - squashfs_bh_to_actor(bh, nr_buffers, req->output, req->offset, - req->length, msblk->devblksize); - } else if (req->data_processing == SQUASHFS_DECOMPRESS) { - req->length = squashfs_decompress(msblk, bh, nr_buffers, - req->offset, req->length, actor); - if (req->length < 0) { - error = -EIO; - goto cleanup; - } - } - - /* Last page may have trailing bytes not filled */ - bytes = req->length % PAGE_SIZE; - if (bytes && actor->page[actor->pages - 1]) - zero_user_segment(actor->page[actor->pages - 1], bytes, - PAGE_SIZE); - -cleanup: - if (req->bytes_uncompressed) - *(req->bytes_uncompressed) = req->length; - if (error) { - for (i = 0; i < nr_buffers; ++i) - if (bh[i]) - put_bh(bh[i]); - } - free_read_request(req, error); -} - -static void read_wq_handler(struct work_struct *work) -{ - squashfs_process_blocks(container_of(work, - struct squashfs_read_request, offload)); -} - -static void squashfs_bio_end_io(struct bio *bio) -{ - int i; - blk_status_t error = bio->bi_status; - struct squashfs_bio_request *bio_req = bio->bi_private; - - bio_put(bio); - - for (i = 0; i < bio_req->nr_buffers; ++i) { - if (!bio_req->bh[i]) - continue; - if (!error) - set_buffer_uptodate(bio_req->bh[i]); - else - clear_buffer_uptodate(bio_req->bh[i]); - unlock_buffer(bio_req->bh[i]); - } - kfree(bio_req); -} - -static int bh_is_optional(struct squashfs_read_request *req, int idx) -{ - int start_idx, end_idx; - struct squashfs_sb_info *msblk = req->sb->s_fs_info; - - start_idx = (idx * msblk->devblksize - req->offset) >> PAGE_SHIFT; - end_idx = ((idx + 1) * msblk->devblksize - req->offset + 1) >> PAGE_SHIFT; - if (start_idx >= req->output->pages) - return 1; - if (start_idx < 0) - start_idx = end_idx; - if (end_idx >= req->output->pages) - end_idx = start_idx; - return !req->output->page[start_idx] && !req->output->page[end_idx]; -} - -static int actor_getblks(struct squashfs_read_request *req, u64 block) -{ - int i; - - req->bh = kmalloc_array(req->nr_buffers, sizeof(*(req->bh)), GFP_NOIO); - if (!req->bh) - return -ENOMEM; - - for (i = 0; i < req->nr_buffers; ++i) { - /* - * When dealing with an uncompressed block, the actor may - * contains NULL pages. There's no need to read the buffers - * associated with these pages. - */ - if (!req->compressed && bh_is_optional(req, i)) { - req->bh[i] = NULL; - continue; - } - req->bh[i] = sb_getblk(req->sb, block + i); - if (!req->bh[i]) { - while (--i) { - if (req->bh[i]) - put_bh(req->bh[i]); - } - return -1; - } - } - return 0; -} - -static int squashfs_bio_submit(struct squashfs_read_request *req) +/* + * Read the metadata block length, this is stored in the first two + * bytes of the metadata block. + */ +static struct buffer_head *get_block_length(struct super_block *sb, + u64 *cur_index, int *offset, int *length) { - struct bio *bio = NULL; + struct squashfs_sb_info *msblk = sb->s_fs_info; struct buffer_head *bh; - struct squashfs_bio_request *bio_req = NULL; - int b = 0, prev_block = 0; - struct squashfs_sb_info *msblk = req->sb->s_fs_info; - - u64 read_start = round_down(req->index, msblk->devblksize); - u64 read_end = round_up(req->index + req->length, msblk->devblksize); - sector_t block = read_start >> msblk->devblksize_log2; - sector_t block_end = read_end >> msblk->devblksize_log2; - int offset = read_start - round_down(req->index, PAGE_SIZE); - int nr_buffers = block_end - block; - int blksz = msblk->devblksize; - int bio_max_pages = nr_buffers > BIO_MAX_PAGES ? BIO_MAX_PAGES - : nr_buffers; - /* Setup the request */ - req->read_end = read_end; - req->offset = req->index - read_start; - req->nr_buffers = nr_buffers; - if (actor_getblks(req, block) < 0) - goto getblk_failed; - - /* Create and submit the BIOs */ - for (b = 0; b < nr_buffers; ++b, offset += blksz) { - bh = req->bh[b]; - if (!bh || !trylock_buffer(bh)) - continue; - if (buffer_uptodate(bh)) { - unlock_buffer(bh); - continue; + bh = sb_bread(sb, *cur_index); + if (bh == NULL) + return NULL; + + if (msblk->devblksize - *offset == 1) { + *length = (unsigned char) bh->b_data[*offset]; + put_bh(bh); + bh = sb_bread(sb, ++(*cur_index)); + if (bh == NULL) + return NULL; + *length |= (unsigned char) bh->b_data[0] << 8; + *offset = 1; + } else { + *length = (unsigned char) bh->b_data[*offset] | + (unsigned char) bh->b_data[*offset + 1] << 8; + *offset += 2; + + if (*offset == msblk->devblksize) { + put_bh(bh); + bh = sb_bread(sb, ++(*cur_index)); + if (bh == NULL) + return NULL; + *offset = 0; } - offset %= PAGE_SIZE; - - /* Append the buffer to the current BIO if it is contiguous */ - if (bio && bio_req && prev_block + 1 == b) { - if (bio_add_page(bio, bh->b_page, blksz, offset)) { - bio_req->nr_buffers += 1; - prev_block = b; - continue; - } - } - - /* Otherwise, submit the current BIO and create a new one */ - if (bio) - submit_bio(bio); - bio_req = kcalloc(1, sizeof(struct squashfs_bio_request), - GFP_NOIO); - if (!bio_req) - goto req_alloc_failed; - bio_req->bh = &req->bh[b]; - bio = bio_alloc(GFP_NOIO, bio_max_pages); - if (!bio) - goto bio_alloc_failed; - bio_set_dev(bio, req->sb->s_bdev); - bio->bi_iter.bi_sector = (block + b) - << (msblk->devblksize_log2 - 9); - bio_set_op_attrs(bio, REQ_OP_READ, 0); - bio->bi_private = bio_req; - bio->bi_end_io = squashfs_bio_end_io; - - bio_add_page(bio, bh->b_page, blksz, offset); - bio_req->nr_buffers += 1; - prev_block = b; } - if (bio) - submit_bio(bio); - if (req->synchronous) - squashfs_process_blocks(req); - else { - INIT_WORK(&req->offload, read_wq_handler); - schedule_work(&req->offload); - } - return 0; - -bio_alloc_failed: - kfree(bio_req); -req_alloc_failed: - unlock_buffer(bh); - while (--nr_buffers >= b) - if (req->bh[nr_buffers]) - put_bh(req->bh[nr_buffers]); - while (--b >= 0) - if (req->bh[b]) - wait_on_buffer(req->bh[b]); -getblk_failed: - free_read_request(req, -ENOMEM); - return -ENOMEM; + return bh; } -static int read_metadata_block(struct squashfs_read_request *req, - u64 *next_index) -{ - int ret, error, bytes_read = 0, bytes_uncompressed = 0; - struct squashfs_sb_info *msblk = req->sb->s_fs_info; - - if (req->index + 2 > msblk->bytes_used) { - free_read_request(req, -EINVAL); - return -EINVAL; - } - req->length = 2; - - /* Do not read beyond the end of the device */ - if (req->index + req->length > msblk->bytes_used) - req->length = msblk->bytes_used - req->index; - req->data_processing = SQUASHFS_METADATA; - - /* - * Reading metadata is always synchronous because we don't know the - * length in advance and the function is expected to update - * 'next_index' and return the length. - */ - req->synchronous = true; - req->res = &error; - req->bytes_read = &bytes_read; - req->bytes_uncompressed = &bytes_uncompressed; - - TRACE("Metadata block @ 0x%llx, %scompressed size %d, src size %d\n", - req->index, req->compressed ? "" : "un", bytes_read, - req->output->length); - - ret = squashfs_bio_submit(req); - if (ret) - return ret; - if (error) - return error; - if (next_index) - *next_index += 2 + bytes_read; - return bytes_uncompressed; -} - -static int read_data_block(struct squashfs_read_request *req, int length, - u64 *next_index, bool synchronous) -{ - int ret, error = 0, bytes_uncompressed = 0, bytes_read = 0; - - req->compressed = SQUASHFS_COMPRESSED_BLOCK(length); - req->length = length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length); - req->data_processing = req->compressed ? SQUASHFS_DECOMPRESS - : SQUASHFS_COPY; - - req->synchronous = synchronous; - if (synchronous) { - req->res = &error; - req->bytes_read = &bytes_read; - req->bytes_uncompressed = &bytes_uncompressed; - } - - TRACE("Data block @ 0x%llx, %scompressed size %d, src size %d\n", - req->index, req->compressed ? "" : "un", req->length, - req->output->length); - - ret = squashfs_bio_submit(req); - if (ret) - return ret; - if (synchronous) - ret = error ? error : bytes_uncompressed; - if (next_index) - *next_index += length; - return ret; -} /* * Read and decompress a metadata block or datablock. Length is non-zero @@ -428,50 +88,130 @@ static int read_data_block(struct squashfs_read_request *req, int length, * generated a larger block - this does occasionally happen with compression * algorithms). */ -static int __squashfs_read_data(struct super_block *sb, u64 index, int length, - u64 *next_index, struct squashfs_page_actor *output, bool sync) +int squashfs_read_data(struct super_block *sb, u64 index, int length, + u64 *next_index, struct squashfs_page_actor *output) { - struct squashfs_read_request *req; + struct squashfs_sb_info *msblk = sb->s_fs_info; + struct buffer_head **bh; + int offset = index & ((1 << msblk->devblksize_log2) - 1); + u64 cur_index = index >> msblk->devblksize_log2; + int bytes, compressed, b = 0, k = 0, avail, i; - req = kcalloc(1, sizeof(struct squashfs_read_request), GFP_KERNEL); - if (!req) { - if (!sync) - squashfs_page_actor_free(output, -ENOMEM); + bh = kcalloc(((output->length + msblk->devblksize - 1) + >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL); + if (bh == NULL) return -ENOMEM; - } - req->sb = sb; - req->index = index; - req->output = output; + if (length) { + /* + * Datablock. + */ + bytes = -offset; + compressed = SQUASHFS_COMPRESSED_BLOCK(length); + length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length); + if (next_index) + *next_index = index + length; + + TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n", + index, compressed ? "" : "un", length, output->length); + + if (length < 0 || length > output->length || + (index + length) > msblk->bytes_used) + goto read_failure; + + for (b = 0; bytes < length; b++, cur_index++) { + bh[b] = sb_getblk(sb, cur_index); + if (bh[b] == NULL) + goto block_release; + bytes += msblk->devblksize; + } + ll_rw_block(REQ_OP_READ, 0, b, bh); + } else { + /* + * Metadata block. + */ + if ((index + 2) > msblk->bytes_used) + goto read_failure; + + bh[0] = get_block_length(sb, &cur_index, &offset, &length); + if (bh[0] == NULL) + goto read_failure; + b = 1; + + bytes = msblk->devblksize - offset; + compressed = SQUASHFS_COMPRESSED(length); + length = SQUASHFS_COMPRESSED_SIZE(length); + if (next_index) + *next_index = index + length + 2; - if (next_index) - *next_index = index; + TRACE("Block @ 0x%llx, %scompressed size %d\n", index, + compressed ? "" : "un", length); - if (length) - length = read_data_block(req, length, next_index, sync); - else - length = read_metadata_block(req, next_index); + if (length < 0 || length > output->length || + (index + length) > msblk->bytes_used) + goto block_release; - if (length < 0) { - ERROR("squashfs_read_data failed to read block 0x%llx\n", - (unsigned long long)index); - return -EIO; + for (; bytes < length; b++) { + bh[b] = sb_getblk(sb, ++cur_index); + if (bh[b] == NULL) + goto block_release; + bytes += msblk->devblksize; + } + ll_rw_block(REQ_OP_READ, 0, b - 1, bh + 1); } - return length; -} + for (i = 0; i < b; i++) { + wait_on_buffer(bh[i]); + if (!buffer_uptodate(bh[i])) + goto block_release; + } -int squashfs_read_data(struct super_block *sb, u64 index, int length, - u64 *next_index, struct squashfs_page_actor *output) -{ - return __squashfs_read_data(sb, index, length, next_index, output, - true); -} + if (compressed) { + if (!msblk->stream) + goto read_failure; + length = squashfs_decompress(msblk, bh, b, offset, length, + output); + if (length < 0) + goto read_failure; + } else { + /* + * Block is uncompressed. + */ + int in, pg_offset = 0; + void *data = squashfs_first_page(output); + + for (bytes = length; k < b; k++) { + in = min(bytes, msblk->devblksize - offset); + bytes -= in; + while (in) { + if (pg_offset == PAGE_SIZE) { + data = squashfs_next_page(output); + pg_offset = 0; + } + avail = min_t(int, in, PAGE_SIZE - + pg_offset); + memcpy(data + pg_offset, bh[k]->b_data + offset, + avail); + in -= avail; + pg_offset += avail; + offset += avail; + } + offset = 0; + put_bh(bh[k]); + } + squashfs_finish_page(output); + } -int squashfs_read_data_async(struct super_block *sb, u64 index, int length, - u64 *next_index, struct squashfs_page_actor *output) -{ + kfree(bh); + return length; + +block_release: + for (; k < b; k++) + put_bh(bh[k]); - return __squashfs_read_data(sb, index, length, next_index, output, - false); +read_failure: + ERROR("squashfs_read_data failed to read block 0x%llx\n", + (unsigned long long) index); + kfree(bh); + return -EIO; } diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index 9d9d4aa9a7ba1..0839efa720b3b 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -209,14 +209,17 @@ void squashfs_cache_put(struct squashfs_cache_entry *entry) */ void squashfs_cache_delete(struct squashfs_cache *cache) { - int i; + int i, j; if (cache == NULL) return; for (i = 0; i < cache->entries; i++) { - if (cache->entry[i].page) - free_page_array(cache->entry[i].page, cache->pages); + if (cache->entry[i].data) { + for (j = 0; j < cache->pages; j++) + kfree(cache->entry[i].data[j]); + kfree(cache->entry[i].data); + } kfree(cache->entry[i].actor); } @@ -233,7 +236,7 @@ void squashfs_cache_delete(struct squashfs_cache *cache) struct squashfs_cache *squashfs_cache_init(char *name, int entries, int block_size) { - int i; + int i, j; struct squashfs_cache *cache = kzalloc(sizeof(*cache), GFP_KERNEL); if (cache == NULL) { @@ -265,13 +268,22 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries, init_waitqueue_head(&cache->entry[i].wait_queue); entry->cache = cache; entry->block = SQUASHFS_INVALID_BLK; - entry->page = alloc_page_array(cache->pages, GFP_KERNEL); - if (!entry->page) { + entry->data = kcalloc(cache->pages, sizeof(void *), GFP_KERNEL); + if (entry->data == NULL) { ERROR("Failed to allocate %s cache entry\n", name); goto cleanup; } - entry->actor = squashfs_page_actor_init(entry->page, - cache->pages, 0, NULL); + + for (j = 0; j < cache->pages; j++) { + entry->data[j] = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (entry->data[j] == NULL) { + ERROR("Failed to allocate %s buffer\n", name); + goto cleanup; + } + } + + entry->actor = squashfs_page_actor_init(entry->data, + cache->pages, 0); if (entry->actor == NULL) { ERROR("Failed to allocate %s cache entry\n", name); goto cleanup; @@ -302,20 +314,18 @@ int squashfs_copy_data(void *buffer, struct squashfs_cache_entry *entry, return min(length, entry->length - offset); while (offset < entry->length) { - void *buff = kmap_atomic(entry->page[offset / PAGE_SIZE]) - + (offset % PAGE_SIZE); + void *buff = entry->data[offset / PAGE_SIZE] + + (offset % PAGE_SIZE); int bytes = min_t(int, entry->length - offset, PAGE_SIZE - (offset % PAGE_SIZE)); if (bytes >= remaining) { memcpy(buffer, buff, remaining); - kunmap_atomic(buff); remaining = 0; break; } memcpy(buffer, buff, bytes); - kunmap_atomic(buff); buffer += bytes; remaining -= bytes; offset += bytes; @@ -409,38 +419,43 @@ struct squashfs_cache_entry *squashfs_get_datablock(struct super_block *sb, void *squashfs_read_table(struct super_block *sb, u64 block, int length) { int pages = (length + PAGE_SIZE - 1) >> PAGE_SHIFT; - struct page **page; - void *buff; - int res; + int i, res; + void *table, *buffer, **data; struct squashfs_page_actor *actor; - page = alloc_page_array(pages, GFP_KERNEL); - if (!page) + table = buffer = kmalloc(length, GFP_KERNEL); + if (table == NULL) return ERR_PTR(-ENOMEM); - actor = squashfs_page_actor_init(page, pages, length, NULL); - if (actor == NULL) { + data = kcalloc(pages, sizeof(void *), GFP_KERNEL); + if (data == NULL) { res = -ENOMEM; goto failed; } + actor = squashfs_page_actor_init(data, pages, length); + if (actor == NULL) { + res = -ENOMEM; + goto failed2; + } + + for (i = 0; i < pages; i++, buffer += PAGE_SIZE) + data[i] = buffer; + res = squashfs_read_data(sb, block, length | SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, actor); + kfree(data); + kfree(actor); + if (res < 0) - goto failed2; + goto failed; - buff = kmalloc(length, GFP_KERNEL); - if (!buff) - goto failed2; - squashfs_actor_to_buf(actor, buff, length); - squashfs_page_actor_free(actor, 0); - free_page_array(page, pages); - return buff; + return table; failed2: - squashfs_page_actor_free(actor, 0); + kfree(data); failed: - free_page_array(page, pages); + kfree(table); return ERR_PTR(res); } diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index 86831aaedb9e8..836639810ea01 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c @@ -24,8 +24,7 @@ #include #include #include -#include -#include +#include #include "squashfs_fs.h" #include "squashfs_fs_sb.h" @@ -102,44 +101,40 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id) static void *get_comp_opts(struct super_block *sb, unsigned short flags) { struct squashfs_sb_info *msblk = sb->s_fs_info; - void *comp_opts, *buffer = NULL; - struct page *page; + void *buffer = NULL, *comp_opts; struct squashfs_page_actor *actor = NULL; int length = 0; - if (!SQUASHFS_COMP_OPTS(flags)) - return squashfs_comp_opts(msblk, buffer, length); - /* * Read decompressor specific options from file system if present */ - - page = alloc_page(GFP_KERNEL); - if (!page) - return ERR_PTR(-ENOMEM); - - actor = squashfs_page_actor_init(&page, 1, 0, NULL); - if (actor == NULL) { - comp_opts = ERR_PTR(-ENOMEM); - goto actor_error; - } - - length = squashfs_read_data(sb, - sizeof(struct squashfs_super_block), 0, NULL, actor); - - if (length < 0) { - comp_opts = ERR_PTR(length); - goto read_error; + if (SQUASHFS_COMP_OPTS(flags)) { + buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (buffer == NULL) { + comp_opts = ERR_PTR(-ENOMEM); + goto out; + } + + actor = squashfs_page_actor_init(&buffer, 1, 0); + if (actor == NULL) { + comp_opts = ERR_PTR(-ENOMEM); + goto out; + } + + length = squashfs_read_data(sb, + sizeof(struct squashfs_super_block), 0, NULL, actor); + + if (length < 0) { + comp_opts = ERR_PTR(length); + goto out; + } } - buffer = kmap_atomic(page); comp_opts = squashfs_comp_opts(msblk, buffer, length); - kunmap_atomic(buffer); -read_error: - squashfs_page_actor_free(actor, 0); -actor_error: - __free_page(page); +out: + kfree(actor); + kfree(buffer); return comp_opts; } diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 9236e7d45eb32..f1c1430ae7213 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -47,7 +47,6 @@ #include #include #include -#include #include "squashfs_fs.h" #include "squashfs_fs_sb.h" @@ -452,129 +451,63 @@ static int squashfs_readpage_fragment(struct page *page, int expected) return res; } -static int squashfs_readpages_fragment(struct page *page, - struct list_head *readahead_pages, struct address_space *mapping, - int expected) -{ - if (!page) { - page = lru_to_page(readahead_pages); - list_del(&page->lru); - if (add_to_page_cache_lru(page, mapping, page->index, - mapping_gfp_constraint(mapping, GFP_KERNEL))) { - put_page(page); - return 0; - } - } - return squashfs_readpage_fragment(page, expected); -} - static int squashfs_readpage_sparse(struct page *page, int expected) { squashfs_copy_cache(page, NULL, expected, 0); return 0; } -static int squashfs_readpages_sparse(struct page *page, - struct list_head *readahead_pages, struct address_space *mapping, - int expected) -{ - if (!page) { - page = lru_to_page(readahead_pages); - list_del(&page->lru); - if (add_to_page_cache_lru(page, mapping, page->index, - mapping_gfp_constraint(mapping, GFP_KERNEL))) { - put_page(page); - return 0; - } - } - return squashfs_readpage_sparse(page, expected); -} - -static int __squashfs_readpages(struct file *file, struct page *page, - struct list_head *readahead_pages, unsigned int nr_pages, - struct address_space *mapping) +static int squashfs_readpage(struct file *file, struct page *page) { - struct inode *inode = mapping->host; + struct inode *inode = page->mapping->host; struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + int index = page->index >> (msblk->block_log - PAGE_SHIFT); int file_end = i_size_read(inode) >> msblk->block_log; - int res; - - do { - struct page *cur_page = page ? page - : lru_to_page(readahead_pages); - int page_index = cur_page->index; - int index = page_index >> (msblk->block_log - PAGE_SHIFT); - int expected = index == file_end ? + int expected = index == file_end ? (i_size_read(inode) & (msblk->block_size - 1)) : msblk->block_size; - - if (page_index >= ((i_size_read(inode) + PAGE_SIZE - 1) >> - PAGE_SHIFT)) - return 1; - - if (index < file_end || squashfs_i(inode)->fragment_block == - SQUASHFS_INVALID_BLK) { - u64 block = 0; - int bsize = read_blocklist(inode, index, &block); - - if (bsize < 0) - return -1; - - if (bsize == 0) { - res = squashfs_readpages_sparse(page, - readahead_pages, mapping, expected); - } else { - res = squashfs_readpages_block(page, - readahead_pages, &nr_pages, mapping, - page_index, block, bsize); - } - } else { - res = squashfs_readpages_fragment(page, - readahead_pages, mapping, expected); - } - if (res) - return 0; - page = NULL; - } while (readahead_pages && !list_empty(readahead_pages)); - - return 0; -} - -static int squashfs_readpage(struct file *file, struct page *page) -{ - int ret; + int res; + void *pageaddr; TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", - page->index, squashfs_i(page->mapping->host)->start); + page->index, squashfs_i(inode)->start); + + if (page->index >= ((i_size_read(inode) + PAGE_SIZE - 1) >> + PAGE_SHIFT)) + goto out; - get_page(page); + if (index < file_end || squashfs_i(inode)->fragment_block == + SQUASHFS_INVALID_BLK) { + u64 block = 0; + int bsize = read_blocklist(inode, index, &block); + if (bsize < 0) + goto error_out; - ret = __squashfs_readpages(file, page, NULL, 1, page->mapping); - if (ret) { - flush_dcache_page(page); - if (ret < 0) - SetPageError(page); + if (bsize == 0) + res = squashfs_readpage_sparse(page, expected); else - SetPageUptodate(page); - zero_user_segment(page, 0, PAGE_SIZE); - unlock_page(page); - put_page(page); - } + res = squashfs_readpage_block(page, block, bsize, expected); + } else + res = squashfs_readpage_fragment(page, expected); - return 0; -} + if (!res) + return 0; + +error_out: + SetPageError(page); +out: + pageaddr = kmap_atomic(page); + memset(pageaddr, 0, PAGE_SIZE); + kunmap_atomic(pageaddr); + flush_dcache_page(page); + if (!PageError(page)) + SetPageUptodate(page); + unlock_page(page); -static int squashfs_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned int nr_pages) -{ - TRACE("Entered squashfs_readpages, %u pages, first page index %lx\n", - nr_pages, lru_to_page(pages)->index); - __squashfs_readpages(file, NULL, pages, nr_pages, mapping); return 0; } const struct address_space_operations squashfs_aops = { - .readpage = squashfs_readpage, - .readpages = squashfs_readpages, + .readpage = squashfs_readpage }; diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c new file mode 100644 index 0000000000000..a9ba8d96776ac --- /dev/null +++ b/fs/squashfs/file_cache.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "squashfs.h" + +/* Read separately compressed datablock and memcopy into page cache */ +int squashfs_readpage_block(struct page *page, u64 block, int bsize, int expected) +{ + struct inode *i = page->mapping->host; + struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, + block, bsize); + int res = buffer->error; + + if (res) + ERROR("Unable to read page, block %llx, size %x\n", block, + bsize); + else + squashfs_copy_cache(page, buffer, expected, 0); + + squashfs_cache_put(buffer); + return res; +} diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index dc87f77ce11e4..80db1b86a27c6 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -13,7 +13,6 @@ #include #include #include -#include #include "squashfs_fs.h" #include "squashfs_fs_sb.h" @@ -21,136 +20,157 @@ #include "squashfs.h" #include "page_actor.h" -static void release_actor_pages(struct page **page, int pages, int error) -{ - int i; +static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, + int pages, struct page **page, int bytes); - for (i = 0; i < pages; i++) { - if (!page[i]) - continue; - flush_dcache_page(page[i]); - if (!error) - SetPageUptodate(page[i]); - else { - SetPageError(page[i]); - zero_user_segment(page[i], 0, PAGE_SIZE); - } - unlock_page(page[i]); - put_page(page[i]); - } - kfree(page); -} +/* Read separately compressed datablock directly into page cache */ +int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, + int expected) -/* - * Create a "page actor" which will kmap and kunmap the - * page cache pages appropriately within the decompressor - */ -static struct squashfs_page_actor *actor_from_page_cache( - unsigned int actor_pages, struct page *target_page, - struct list_head *rpages, unsigned int *nr_pages, int start_index, - struct address_space *mapping) { + struct inode *inode = target_page->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + + int file_end = (i_size_read(inode) - 1) >> PAGE_SHIFT; + int mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1; + int start_index = target_page->index & ~mask; + int end_index = start_index | mask; + int i, n, pages, missing_pages, bytes, res = -ENOMEM; struct page **page; struct squashfs_page_actor *actor; - int i, n; - gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL); - - page = kmalloc_array(actor_pages, sizeof(void *), GFP_KERNEL); - if (!page) - return NULL; - - for (i = 0, n = start_index; i < actor_pages; i++, n++) { - if (target_page == NULL && rpages && !list_empty(rpages)) { - struct page *cur_page = lru_to_page(rpages); - - if (cur_page->index < start_index + actor_pages) { - list_del(&cur_page->lru); - --(*nr_pages); - if (add_to_page_cache_lru(cur_page, mapping, - cur_page->index, gfp)) - put_page(cur_page); - else - target_page = cur_page; - } else - rpages = NULL; - } + void *pageaddr; + + if (end_index > file_end) + end_index = file_end; + + pages = end_index - start_index + 1; + + page = kmalloc_array(pages, sizeof(void *), GFP_KERNEL); + if (page == NULL) + return res; + + /* + * Create a "page actor" which will kmap and kunmap the + * page cache pages appropriately within the decompressor + */ + actor = squashfs_page_actor_init_special(page, pages, 0); + if (actor == NULL) + goto out; - if (target_page && target_page->index == n) { - page[i] = target_page; - target_page = NULL; - } else { - page[i] = grab_cache_page_nowait(mapping, n); - if (page[i] == NULL) - continue; + /* Try to grab all the pages covered by the Squashfs block */ + for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) { + page[i] = (n == target_page->index) ? target_page : + grab_cache_page_nowait(target_page->mapping, n); + + if (page[i] == NULL) { + missing_pages++; + continue; } if (PageUptodate(page[i])) { unlock_page(page[i]); put_page(page[i]); page[i] = NULL; + missing_pages++; } } - actor = squashfs_page_actor_init(page, actor_pages, 0, - release_actor_pages); - if (!actor) { - release_actor_pages(page, actor_pages, -ENOMEM); - kfree(page); - return NULL; + if (missing_pages) { + /* + * Couldn't get one or more pages, this page has either + * been VM reclaimed, but others are still in the page cache + * and uptodate, or we're racing with another thread in + * squashfs_readpage also trying to grab them. Fall back to + * using an intermediate buffer. + */ + res = squashfs_read_cache(target_page, block, bsize, pages, + page, expected); + if (res < 0) + goto mark_errored; + + goto out; } - return actor; -} -int squashfs_readpages_block(struct page *target_page, - struct list_head *readahead_pages, - unsigned int *nr_pages, - struct address_space *mapping, - int page_index, u64 block, int bsize) + /* Decompress directly into the page cache buffers */ + res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); + if (res < 0) + goto mark_errored; -{ - struct squashfs_page_actor *actor; - struct inode *inode = mapping->host; - struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; - int start_index, end_index, file_end, actor_pages, res; - int mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1; + if (res != expected) { + res = -EIO; + goto mark_errored; + } - /* - * If readpage() is called on an uncompressed datablock, we can just - * read the pages instead of fetching the whole block. - * This greatly improves the performance when a process keep doing - * random reads because we only fetch the necessary data. - * The readahead algorithm will take care of doing speculative reads - * if necessary. - * We can't read more than 1 block even if readahead provides use more - * pages because we don't know yet if the next block is compressed or - * not. + /* Last page may have trailing bytes not filled */ + bytes = res % PAGE_SIZE; + if (bytes) { + pageaddr = kmap_atomic(page[pages - 1]); + memset(pageaddr + bytes, 0, PAGE_SIZE - bytes); + kunmap_atomic(pageaddr); + } + + /* Mark pages as uptodate, unlock and release */ + for (i = 0; i < pages; i++) { + flush_dcache_page(page[i]); + SetPageUptodate(page[i]); + unlock_page(page[i]); + if (page[i] != target_page) + put_page(page[i]); + } + + kfree(actor); + kfree(page); + + return 0; + +mark_errored: + /* Decompression failed, mark pages as errored. Target_page is + * dealt with by the caller */ - if (bsize && !SQUASHFS_COMPRESSED_BLOCK(bsize)) { - u64 block_end = block + msblk->block_size; - - block += (page_index & mask) * PAGE_SIZE; - actor_pages = (block_end - block) / PAGE_SIZE; - if (*nr_pages < actor_pages) - actor_pages = *nr_pages; - start_index = page_index; - bsize = min_t(int, bsize, (PAGE_SIZE * actor_pages) - | SQUASHFS_COMPRESSED_BIT_BLOCK); - } else { - file_end = (i_size_read(inode) - 1) >> PAGE_SHIFT; - start_index = page_index & ~mask; - end_index = start_index | mask; - if (end_index > file_end) - end_index = file_end; - actor_pages = end_index - start_index + 1; + for (i = 0; i < pages; i++) { + if (page[i] == NULL || page[i] == target_page) + continue; + flush_dcache_page(page[i]); + SetPageError(page[i]); + unlock_page(page[i]); + put_page(page[i]); + } + +out: + kfree(actor); + kfree(page); + return res; +} + + +static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, + int pages, struct page **page, int bytes) +{ + struct inode *i = target_page->mapping->host; + struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, + block, bsize); + int res = buffer->error, n, offset = 0; + + if (res) { + ERROR("Unable to read page, block %llx, size %x\n", block, + bsize); + goto out; } - actor = actor_from_page_cache(actor_pages, target_page, - readahead_pages, nr_pages, start_index, - mapping); - if (!actor) - return -ENOMEM; + for (n = 0; n < pages && bytes > 0; n++, + bytes -= PAGE_SIZE, offset += PAGE_SIZE) { + int avail = min_t(int, bytes, PAGE_SIZE); + + if (page[n] == NULL) + continue; + + squashfs_fill_page(page[n], buffer, offset, avail); + unlock_page(page[n]); + if (page[n] != target_page) + put_page(page[n]); + } - res = squashfs_read_data_async(inode->i_sb, block, bsize, NULL, - actor); - return res < 0 ? res : 0; +out: + squashfs_cache_put(buffer); + return res; } diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c index 5d8512534d935..95da653665485 100644 --- a/fs/squashfs/lz4_wrapper.c +++ b/fs/squashfs/lz4_wrapper.c @@ -94,17 +94,39 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm, struct buffer_head **bh, int b, int offset, int length, struct squashfs_page_actor *output) { - int res; struct squashfs_lz4 *stream = strm; + void *buff = stream->input, *data; + int avail, i, bytes = length, res; + + for (i = 0; i < b; i++) { + avail = min(bytes, msblk->devblksize - offset); + memcpy(buff, bh[i]->b_data + offset, avail); + buff += avail; + bytes -= avail; + offset = 0; + put_bh(bh[i]); + } - squashfs_bh_to_buf(bh, b, stream->input, offset, length, - msblk->devblksize); res = LZ4_decompress_safe(stream->input, stream->output, length, output->length); if (res < 0) return -EIO; - squashfs_buf_to_actor(stream->output, output, res); + + bytes = res; + data = squashfs_first_page(output); + buff = stream->output; + while (data) { + if (bytes <= PAGE_SIZE) { + memcpy(data, buff, bytes); + break; + } + memcpy(data, buff, PAGE_SIZE); + buff += PAGE_SIZE; + bytes -= PAGE_SIZE; + data = squashfs_next_page(output); + } + squashfs_finish_page(output); return res; } diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c index 2c844d53a59e0..934c17e965908 100644 --- a/fs/squashfs/lzo_wrapper.c +++ b/fs/squashfs/lzo_wrapper.c @@ -79,19 +79,45 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm, struct buffer_head **bh, int b, int offset, int length, struct squashfs_page_actor *output) { - int res; - size_t out_len = output->length; struct squashfs_lzo *stream = strm; + void *buff = stream->input, *data; + int avail, i, bytes = length, res; + size_t out_len = output->length; + + for (i = 0; i < b; i++) { + avail = min(bytes, msblk->devblksize - offset); + memcpy(buff, bh[i]->b_data + offset, avail); + buff += avail; + bytes -= avail; + offset = 0; + put_bh(bh[i]); + } - squashfs_bh_to_buf(bh, b, stream->input, offset, length, - msblk->devblksize); res = lzo1x_decompress_safe(stream->input, (size_t)length, stream->output, &out_len); if (res != LZO_E_OK) - return -EIO; - squashfs_buf_to_actor(stream->output, output, out_len); + goto failed; - return out_len; + res = bytes = (int)out_len; + data = squashfs_first_page(output); + buff = stream->output; + while (data) { + if (bytes <= PAGE_SIZE) { + memcpy(data, buff, bytes); + break; + } else { + memcpy(data, buff, PAGE_SIZE); + buff += PAGE_SIZE; + bytes -= PAGE_SIZE; + data = squashfs_next_page(output); + } + } + squashfs_finish_page(output); + + return res; + +failed: + return -EIO; } const struct squashfs_decompressor squashfs_lzo_comp_ops = { diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c index e348f5647fbdc..9b7b1b6a78926 100644 --- a/fs/squashfs/page_actor.c +++ b/fs/squashfs/page_actor.c @@ -9,145 +9,92 @@ #include #include #include -#include #include "page_actor.h" -struct squashfs_page_actor *squashfs_page_actor_init(struct page **page, - int pages, int length, void (*release_pages)(struct page **, int, int)) -{ - struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); - - if (actor == NULL) - return NULL; +/* + * This file contains implementations of page_actor for decompressing into + * an intermediate buffer, and for decompressing directly into the + * page cache. + * + * Calling code should avoid sleeping between calls to squashfs_first_page() + * and squashfs_finish_page(). + */ - actor->length = length ? : pages * PAGE_SIZE; - actor->page = page; - actor->pages = pages; - actor->next_page = 0; - actor->pageaddr = NULL; - actor->release_pages = release_pages; - return actor; +/* Implementation of page_actor for decompressing into intermediate buffer */ +static void *cache_first_page(struct squashfs_page_actor *actor) +{ + actor->next_page = 1; + return actor->buffer[0]; } -void squashfs_page_actor_free(struct squashfs_page_actor *actor, int error) +static void *cache_next_page(struct squashfs_page_actor *actor) { - if (!actor) - return; + if (actor->next_page == actor->pages) + return NULL; - if (actor->release_pages) - actor->release_pages(actor->page, actor->pages, error); - kfree(actor); + return actor->buffer[actor->next_page++]; } -void squashfs_actor_to_buf(struct squashfs_page_actor *actor, void *buf, - int length) +static void cache_finish_page(struct squashfs_page_actor *actor) { - void *pageaddr; - int pos = 0, avail, i; - - for (i = 0; i < actor->pages && pos < length; ++i) { - avail = min_t(int, length - pos, PAGE_SIZE); - if (actor->page[i]) { - pageaddr = kmap_atomic(actor->page[i]); - memcpy(buf + pos, pageaddr, avail); - kunmap_atomic(pageaddr); - } - pos += avail; - } + /* empty */ } -void squashfs_buf_to_actor(void *buf, struct squashfs_page_actor *actor, - int length) +struct squashfs_page_actor *squashfs_page_actor_init(void **buffer, + int pages, int length) { - void *pageaddr; - int pos = 0, avail, i; - - for (i = 0; i < actor->pages && pos < length; ++i) { - avail = min_t(int, length - pos, PAGE_SIZE); - if (actor->page[i]) { - pageaddr = kmap_atomic(actor->page[i]); - memcpy(pageaddr, buf + pos, avail); - kunmap_atomic(pageaddr); - } - pos += avail; - } + struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); + + if (actor == NULL) + return NULL; + + actor->length = length ? : pages * PAGE_SIZE; + actor->buffer = buffer; + actor->pages = pages; + actor->next_page = 0; + actor->squashfs_first_page = cache_first_page; + actor->squashfs_next_page = cache_next_page; + actor->squashfs_finish_page = cache_finish_page; + return actor; } -void squashfs_bh_to_actor(struct buffer_head **bh, int nr_buffers, - struct squashfs_page_actor *actor, int offset, int length, int blksz) +/* Implementation of page_actor for decompressing directly into page cache. */ +static void *direct_first_page(struct squashfs_page_actor *actor) { - void *kaddr = NULL; - int bytes = 0, pgoff = 0, b = 0, p = 0, avail, i; - - while (bytes < length) { - if (actor->page[p]) { - kaddr = kmap_atomic(actor->page[p]); - while (pgoff < PAGE_SIZE && bytes < length) { - avail = min_t(int, blksz - offset, - PAGE_SIZE - pgoff); - memcpy(kaddr + pgoff, bh[b]->b_data + offset, - avail); - pgoff += avail; - bytes += avail; - offset = (offset + avail) % blksz; - if (!offset) { - put_bh(bh[b]); - ++b; - } - } - kunmap_atomic(kaddr); - pgoff = 0; - } else { - for (i = 0; i < PAGE_SIZE / blksz; ++i) { - if (bh[b]) - put_bh(bh[b]); - ++b; - } - bytes += PAGE_SIZE; - } - ++p; - } + actor->next_page = 1; + return actor->pageaddr = kmap_atomic(actor->page[0]); } -void squashfs_bh_to_buf(struct buffer_head **bh, int nr_buffers, void *buf, - int offset, int length, int blksz) +static void *direct_next_page(struct squashfs_page_actor *actor) { - int i, avail, bytes = 0; - - for (i = 0; i < nr_buffers && bytes < length; ++i) { - avail = min_t(int, length - bytes, blksz - offset); - if (bh[i]) { - memcpy(buf + bytes, bh[i]->b_data + offset, avail); - put_bh(bh[i]); - } - bytes += avail; - offset = 0; - } + if (actor->pageaddr) + kunmap_atomic(actor->pageaddr); + + return actor->pageaddr = actor->next_page == actor->pages ? NULL : + kmap_atomic(actor->page[actor->next_page++]); } -void free_page_array(struct page **page, int nr_pages) +static void direct_finish_page(struct squashfs_page_actor *actor) { - int i; - - for (i = 0; i < nr_pages; ++i) - __free_page(page[i]); - kfree(page); + if (actor->pageaddr) + kunmap_atomic(actor->pageaddr); } -struct page **alloc_page_array(int nr_pages, int gfp_mask) +struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page, + int pages, int length) { - int i; - struct page **page; + struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); - page = kcalloc(nr_pages, sizeof(struct page *), gfp_mask); - if (!page) + if (actor == NULL) return NULL; - for (i = 0; i < nr_pages; ++i) { - page[i] = alloc_page(gfp_mask); - if (!page[i]) { - free_page_array(page, i); - return NULL; - } - } - return page; + + actor->length = length ? : pages * PAGE_SIZE; + actor->page = page; + actor->pages = pages; + actor->next_page = 0; + actor->pageaddr = NULL; + actor->squashfs_first_page = direct_first_page; + actor->squashfs_next_page = direct_next_page; + actor->squashfs_finish_page = direct_finish_page; + return actor; } diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h index aa1ed790b5a38..98537eab27e27 100644 --- a/fs/squashfs/page_actor.h +++ b/fs/squashfs/page_actor.h @@ -5,61 +5,77 @@ * Phillip Lougher * * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level squashfsory. + * the COPYING file in the top-level directory. */ +#ifndef CONFIG_SQUASHFS_FILE_DIRECT struct squashfs_page_actor { - struct page **page; - void *pageaddr; + void **page; int pages; int length; int next_page; - void (*release_pages)(struct page **, int, int); }; -extern struct squashfs_page_actor *squashfs_page_actor_init(struct page **, - int, int, void (*)(struct page **, int, int)); -extern void squashfs_page_actor_free(struct squashfs_page_actor *, int); +static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page, + int pages, int length) +{ + struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); -extern void squashfs_actor_to_buf(struct squashfs_page_actor *, void *, int); -extern void squashfs_buf_to_actor(void *, struct squashfs_page_actor *, int); -extern void squashfs_bh_to_actor(struct buffer_head **, int, - struct squashfs_page_actor *, int, int, int); -extern void squashfs_bh_to_buf(struct buffer_head **, int, void *, int, int, - int); + if (actor == NULL) + return NULL; + + actor->length = length ? : pages * PAGE_SIZE; + actor->page = page; + actor->pages = pages; + actor->next_page = 0; + return actor; +} -/* - * Calling code should avoid sleeping between calls to squashfs_first_page() - * and squashfs_finish_page(). - */ static inline void *squashfs_first_page(struct squashfs_page_actor *actor) { actor->next_page = 1; - return actor->pageaddr = actor->page[0] ? kmap_atomic(actor->page[0]) - : NULL; + return actor->page[0]; } static inline void *squashfs_next_page(struct squashfs_page_actor *actor) { - if (!IS_ERR_OR_NULL(actor->pageaddr)) - kunmap_atomic(actor->pageaddr); - - if (actor->next_page == actor->pages) - return actor->pageaddr = ERR_PTR(-ENODATA); - - actor->pageaddr = actor->page[actor->next_page] ? - kmap_atomic(actor->page[actor->next_page]) : NULL; - ++actor->next_page; - return actor->pageaddr; + return actor->next_page == actor->pages ? NULL : + actor->page[actor->next_page++]; } static inline void squashfs_finish_page(struct squashfs_page_actor *actor) { - if (!IS_ERR_OR_NULL(actor->pageaddr)) - kunmap_atomic(actor->pageaddr); + /* empty */ } +#else +struct squashfs_page_actor { + union { + void **buffer; + struct page **page; + }; + void *pageaddr; + void *(*squashfs_first_page)(struct squashfs_page_actor *); + void *(*squashfs_next_page)(struct squashfs_page_actor *); + void (*squashfs_finish_page)(struct squashfs_page_actor *); + int pages; + int length; + int next_page; +}; -extern struct page **alloc_page_array(int, int); -extern void free_page_array(struct page **, int); - +extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int); +extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page + **, int, int); +static inline void *squashfs_first_page(struct squashfs_page_actor *actor) +{ + return actor->squashfs_first_page(actor); +} +static inline void *squashfs_next_page(struct squashfs_page_actor *actor) +{ + return actor->squashfs_next_page(actor); +} +static inline void squashfs_finish_page(struct squashfs_page_actor *actor) +{ + actor->squashfs_finish_page(actor); +} +#endif #endif diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index a9ea6ee34ec23..f89f8a74c6cea 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -28,12 +28,8 @@ #define WARNING(s, args...) pr_warn("SQUASHFS: "s, ## args) /* block.c */ -extern int squashfs_init_read_wq(void); -extern void squashfs_destroy_read_wq(void); extern int squashfs_read_data(struct super_block *, u64, int, u64 *, struct squashfs_page_actor *); -extern int squashfs_read_data_async(struct super_block *, u64, int, u64 *, - struct squashfs_page_actor *); /* cache.c */ extern struct squashfs_cache *squashfs_cache_init(char *, int, int); @@ -75,9 +71,8 @@ void squashfs_fill_page(struct page *, struct squashfs_cache_entry *, int, int); void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int, int); -/* file_direct.c */ -extern int squashfs_readpages_block(struct page *, struct list_head *, - unsigned int *, struct address_space *, int, u64, int); +/* file_xxx.c */ +extern int squashfs_readpage_block(struct page *, u64, int, int); /* id.c */ extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *); diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 3b767ce1e46db..ef69c31947bf8 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -49,7 +49,7 @@ struct squashfs_cache_entry { int num_waiters; wait_queue_head_t wait_queue; struct squashfs_cache *cache; - struct page **page; + void **data; struct squashfs_page_actor *actor; }; diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 445ce580f06db..1516bb779b8d4 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -445,15 +445,9 @@ static int __init init_squashfs_fs(void) if (err) return err; - if (!squashfs_init_read_wq()) { - destroy_inodecache(); - return -ENOMEM; - } - err = register_filesystem(&squashfs_fs_type); if (err) { destroy_inodecache(); - squashfs_destroy_read_wq(); return err; } @@ -467,7 +461,6 @@ static void __exit exit_squashfs_fs(void) { unregister_filesystem(&squashfs_fs_type); destroy_inodecache(); - squashfs_destroy_read_wq(); } diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c index 2f7be1fb167cf..6bfaef73d0652 100644 --- a/fs/squashfs/xz_wrapper.c +++ b/fs/squashfs/xz_wrapper.c @@ -55,7 +55,7 @@ static void *squashfs_xz_comp_opts(struct squashfs_sb_info *msblk, struct comp_opts *opts; int err = 0, n; - opts = kmalloc(sizeof(*opts), GFP_ATOMIC); + opts = kmalloc(sizeof(*opts), GFP_KERNEL); if (opts == NULL) { err = -ENOMEM; goto out2; @@ -136,7 +136,6 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, enum xz_ret xz_err; int avail, total = 0, k = 0; struct squashfs_xz *stream = strm; - void *buf = NULL; xz_dec_reset(stream->state); stream->buf.in_pos = 0; @@ -157,20 +156,12 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, if (stream->buf.out_pos == stream->buf.out_size) { stream->buf.out = squashfs_next_page(output); - if (!IS_ERR(stream->buf.out)) { + if (stream->buf.out != NULL) { stream->buf.out_pos = 0; total += PAGE_SIZE; } } - if (!stream->buf.out) { - if (!buf) { - buf = kmalloc(PAGE_SIZE, GFP_ATOMIC); - if (!buf) - goto out; - } - stream->buf.out = buf; - } xz_err = xz_dec_run(stream->state, &stream->buf); if (stream->buf.in_pos == stream->buf.in_size && k < b) @@ -182,13 +173,11 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, if (xz_err != XZ_STREAM_END || k < b) goto out; - kfree(buf); return total + stream->buf.out_pos; out: for (; k < b; k++) put_bh(bh[k]); - kfree(buf); return -EIO; } diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index d917c728422b5..2ec24d128bce0 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c @@ -66,7 +66,6 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, struct buffer_head **bh, int b, int offset, int length, struct squashfs_page_actor *output) { - void *buf = NULL; int zlib_err, zlib_init = 0, k = 0; z_stream *stream = strm; @@ -85,19 +84,10 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, if (stream->avail_out == 0) { stream->next_out = squashfs_next_page(output); - if (!IS_ERR(stream->next_out)) + if (stream->next_out != NULL) stream->avail_out = PAGE_SIZE; } - if (!stream->next_out) { - if (!buf) { - buf = kmalloc(PAGE_SIZE, GFP_ATOMIC); - if (!buf) - goto out; - } - stream->next_out = buf; - } - if (!zlib_init) { zlib_err = zlib_inflateInit(stream); if (zlib_err != Z_OK) { @@ -125,13 +115,11 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, if (k < b) goto out; - kfree(buf); return stream->total_out; out: for (; k < b; k++) put_bh(bh[k]); - kfree(buf); return -EIO; } -- GitLab From 30c64b5a4f2b88de1c69fcc556eb4a1238ad5ac3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 13 Nov 2017 07:15:41 +0100 Subject: [PATCH 2197/2269] timer/debug: Change /proc/timer_list from 0444 to 0400 [ Upstream commit 8e7df2b5b7f245c9bd11064712db5cb69044a362 ] While it uses %pK, there's still few reasons to read this file as non-root. Suggested-by: Linus Torvalds Acked-by: Thomas Gleixner Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/time/timer_list.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 0e7f5428a1484..0ed768b56c606 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -389,7 +389,7 @@ static int __init init_timer_list_procfs(void) { struct proc_dir_entry *pe; - pe = proc_create("timer_list", 0444, NULL, &timer_list_fops); + pe = proc_create("timer_list", 0400, NULL, &timer_list_fops); if (!pe) return -ENOMEM; return 0; -- GitLab From 7ff0bcb2cb31602dd347b216f5f70851102d5067 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 4 Dec 2018 17:04:57 +0800 Subject: [PATCH 2198/2269] pinctrl: sunxi: a83t: Fix IRQ offset typo for PH11 commit 478b6767ad26ab86d9ecc341027dd09a87b1f997 upstream. Pin PH11 is used on various A83T board to detect a change in the OTG port's ID pin, as in when an OTG host cable is plugged in. The incorrect offset meant the gpiochip/irqchip was activating the wrong pin for interrupts. Fixes: 4730f33f0d82 ("pinctrl: sunxi: add allwinner A83T PIO controller support") Cc: Signed-off-by: Chen-Yu Tsai Acked-by: Maxime Ripard Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c index 6624499eae72f..4ada80317a3bd 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c @@ -568,7 +568,7 @@ static const struct sunxi_desc_pin sun8i_a83t_pins[] = { SUNXI_PIN(SUNXI_PINCTRL_PIN(H, 11), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION_IRQ_BANK(0x6, 2, 1)), /* PH_EINT11 */ + SUNXI_FUNCTION_IRQ_BANK(0x6, 2, 11)), /* PH_EINT11 */ }; static const struct sunxi_pinctrl_desc sun8i_a83t_pinctrl_data = { -- GitLab From 5f4610fe2ed4351b45c0d82b3246f18b886d26f9 Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Tue, 11 Dec 2018 12:37:49 -0500 Subject: [PATCH 2199/2269] aio: fix spectre gadget in lookup_ioctx commit a538e3ff9dabcdf6c3f477a373c629213d1c3066 upstream. Matthew pointed out that the ioctx_table is susceptible to spectre v1, because the index can be controlled by an attacker. The below patch should mitigate the attack for all of the aio system calls. Cc: stable@vger.kernel.org Reported-by: Matthew Wilcox Reported-by: Dan Carpenter Signed-off-by: Jeff Moyer Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/aio.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/aio.c b/fs/aio.c index 3a749c3a92e32..a2de58f77338d 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -43,6 +43,7 @@ #include #include +#include #include "internal.h" @@ -1084,6 +1085,7 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) if (!table || id >= table->nr) goto out; + id = array_index_nospec(id, table->nr); ctx = rcu_dereference(table->table[id]); if (ctx && ctx->user_id == ctx_id) { if (percpu_ref_tryget_live(&ctx->users)) -- GitLab From b99eaefb6850b38abdd8a2029bbfde08170ec6c9 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 14 Dec 2018 14:17:17 -0800 Subject: [PATCH 2200/2269] userfaultfd: check VM_MAYWRITE was set after verifying the uffd is registered commit 01e881f5a1fca4677e82733061868c6d6ea05ca7 upstream. Calling UFFDIO_UNREGISTER on virtual ranges not yet registered in uffd could trigger an harmless false positive WARN_ON. Check the vma is already registered before checking VM_MAYWRITE to shut off the false positive warning. Link: http://lkml.kernel.org/r/20181206212028.18726-2-aarcange@redhat.com Cc: Fixes: 29ec90660d68 ("userfaultfd: shmem/hugetlbfs: only allow to register VM_MAYWRITE vmas") Signed-off-by: Andrea Arcangeli Reported-by: syzbot+06c7092e7d71218a2c16@syzkaller.appspotmail.com Acked-by: Mike Rapoport Acked-by: Hugh Dickins Acked-by: Peter Xu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/userfaultfd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index f92e1f2fc8460..5f10052d2671f 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1567,7 +1567,6 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, cond_resched(); BUG_ON(!vma_can_userfault(vma)); - WARN_ON(!(vma->vm_flags & VM_MAYWRITE)); /* * Nothing to do: this vma is already registered into this @@ -1576,6 +1575,8 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, if (!vma->vm_userfaultfd_ctx.ctx) goto skip; + WARN_ON(!(vma->vm_flags & VM_MAYWRITE)); + if (vma->vm_start > start) start = vma->vm_start; vma_end = min(end, vma->vm_end); -- GitLab From 87d143de94e24bc209b10750b1937d7b5b9ee648 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 10 Dec 2018 19:33:31 +0000 Subject: [PATCH 2201/2269] arm64: dma-mapping: Fix FORCE_CONTIGUOUS buffer clearing commit 3238c359acee4ab57f15abb5a82b8ab38a661ee7 upstream. We need to invalidate the caches *before* clearing the buffer via the non-cacheable alias, else in the worst case __dma_flush_area() may write back dirty lines over the top of our nice new zeros. Fixes: dd65a941f6ba ("arm64: dma-mapping: clear buffers allocated with FORCE_CONTIGUOUS flag") Cc: # 4.18.x- Acked-by: Will Deacon Signed-off-by: Robin Murphy Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/dma-mapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 58470b151bc3f..ba88b5b68db69 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -633,9 +633,9 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, prot, __builtin_return_address(0)); if (addr) { - memset(addr, 0, size); if (!coherent) __dma_flush_area(page_to_virt(page), iosize); + memset(addr, 0, size); } else { iommu_dma_unmap_page(dev, *handle, iosize, 0, attrs); dma_release_from_contiguous(dev, page, -- GitLab From 30d358d804dacc04c095a95ba8584fabca155e83 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Tue, 20 Nov 2018 01:14:00 +0200 Subject: [PATCH 2202/2269] MMC: OMAP: fix broken MMC on OMAP15XX/OMAP5910/OMAP310 commit e8cde625bfe8a714a856e1366bcbb259d7346095 upstream. Since v2.6.22 or so there has been reports [1] about OMAP MMC being broken on OMAP15XX based hardware (OMAP5910 and OMAP310). The breakage seems to have been caused by commit 46a6730e3ff9 ("mmc-omap: Fix omap to use MMC_POWER_ON") that changed clock enabling to be done on MMC_POWER_ON. This can happen multiple times in a row, and on 15XX the hardware doesn't seem to like it and the MMC just stops responding. Fix by memorizing the power mode and do the init only when necessary. Before the patch (on Palm TE): mmc0: new SD card at address b368 mmcblk0: mmc0:b368 SDC 977 MiB mmci-omap mmci-omap.0: command timeout (CMD18) mmci-omap mmci-omap.0: command timeout (CMD13) mmci-omap mmci-omap.0: command timeout (CMD13) mmci-omap mmci-omap.0: command timeout (CMD12) [x 6] mmci-omap mmci-omap.0: command timeout (CMD13) [x 6] mmcblk0: error -110 requesting status mmci-omap mmci-omap.0: command timeout (CMD8) mmci-omap mmci-omap.0: command timeout (CMD18) mmci-omap mmci-omap.0: command timeout (CMD13) mmci-omap mmci-omap.0: command timeout (CMD13) mmci-omap mmci-omap.0: command timeout (CMD12) [x 6] mmci-omap mmci-omap.0: command timeout (CMD13) [x 6] mmcblk0: error -110 requesting status mmcblk0: recovery failed! print_req_error: I/O error, dev mmcblk0, sector 0 Buffer I/O error on dev mmcblk0, logical block 0, async page read mmcblk0: unable to read partition table After the patch: mmc0: new SD card at address b368 mmcblk0: mmc0:b368 SDC 977 MiB mmcblk0: p1 The patch is based on a fix and analysis done by Ladislav Michl. Tested on OMAP15XX/OMAP310 (Palm TE), OMAP1710 (Nokia 770) and OMAP2420 (Nokia N810). [1] https://marc.info/?t=123175197000003&r=1&w=2 Fixes: 46a6730e3ff9 ("mmc-omap: Fix omap to use MMC_POWER_ON") Reported-by: Ladislav Michl Reported-by: Andrzej Zaborowski Tested-by: Ladislav Michl Acked-by: Tony Lindgren Signed-off-by: Aaro Koskinen Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/omap.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c index bd49f34d76546..c28c51ad650f4 100644 --- a/drivers/mmc/host/omap.c +++ b/drivers/mmc/host/omap.c @@ -104,6 +104,7 @@ struct mmc_omap_slot { unsigned int vdd; u16 saved_con; u16 bus_mode; + u16 power_mode; unsigned int fclk_freq; struct tasklet_struct cover_tasklet; @@ -1157,7 +1158,7 @@ static void mmc_omap_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) struct mmc_omap_slot *slot = mmc_priv(mmc); struct mmc_omap_host *host = slot->host; int i, dsor; - int clk_enabled; + int clk_enabled, init_stream; mmc_omap_select_slot(slot, 0); @@ -1167,6 +1168,7 @@ static void mmc_omap_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) slot->vdd = ios->vdd; clk_enabled = 0; + init_stream = 0; switch (ios->power_mode) { case MMC_POWER_OFF: mmc_omap_set_power(slot, 0, ios->vdd); @@ -1174,13 +1176,17 @@ static void mmc_omap_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) case MMC_POWER_UP: /* Cannot touch dsor yet, just power up MMC */ mmc_omap_set_power(slot, 1, ios->vdd); + slot->power_mode = ios->power_mode; goto exit; case MMC_POWER_ON: mmc_omap_fclk_enable(host, 1); clk_enabled = 1; dsor |= 1 << 11; + if (slot->power_mode != MMC_POWER_ON) + init_stream = 1; break; } + slot->power_mode = ios->power_mode; if (slot->bus_mode != ios->bus_mode) { if (slot->pdata->set_bus_mode != NULL) @@ -1196,7 +1202,7 @@ static void mmc_omap_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) for (i = 0; i < 2; i++) OMAP_MMC_WRITE(host, CON, dsor); slot->saved_con = dsor; - if (ios->power_mode == MMC_POWER_ON) { + if (init_stream) { /* worst case at 400kHz, 80 cycles makes 200 microsecs */ int usecs = 250; @@ -1234,6 +1240,7 @@ static int mmc_omap_new_slot(struct mmc_omap_host *host, int id) slot->host = host; slot->mmc = mmc; slot->id = id; + slot->power_mode = MMC_POWER_UNDEFINED; slot->pdata = &host->pdata->slots[id]; host->slots[id] = slot; -- GitLab From 38ef9c5a931163eb8abf8f1c654d1b13de26f792 Mon Sep 17 00:00:00 2001 From: Alek Du Date: Thu, 6 Dec 2018 17:24:59 +0800 Subject: [PATCH 2203/2269] mmc: sdhci: fix the timeout check window for clock and reset commit b704441e38f645dcfba1348ca3cc1ba43d1a9f31 upstream. We observed some premature timeouts on a virtualization platform, the log is like this: case 1: [159525.255629] mmc1: Internal clock never stabilised. [159525.255818] mmc1: sdhci: ============ SDHCI REGISTER DUMP =========== [159525.256049] mmc1: sdhci: Sys addr: 0x00000000 | Version: 0x00001002 ... [159525.257205] mmc1: sdhci: Wake-up: 0x00000000 | Clock: 0x0000fa03 From the clock control register dump, we are pretty sure the clock was stablized. case 2: [ 914.550127] mmc1: Reset 0x2 never completed. [ 914.550321] mmc1: sdhci: ============ SDHCI REGISTER DUMP =========== [ 914.550608] mmc1: sdhci: Sys addr: 0x00000010 | Version: 0x00001002 After checking the sdhci code, we found the timeout check actually has a little window that the CPU can be scheduled out and when it comes back, the original time set or check is not valid. Fixes: 5a436cc0af62 ("mmc: sdhci: Optimize delay loops") Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Alek Du Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index f063fe569339b..0edcc2763f3ca 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -193,8 +193,12 @@ void sdhci_reset(struct sdhci_host *host, u8 mask) timeout = ktime_add_ms(ktime_get(), 100); /* hw clears the bit when it's done */ - while (sdhci_readb(host, SDHCI_SOFTWARE_RESET) & mask) { - if (ktime_after(ktime_get(), timeout)) { + while (1) { + bool timedout = ktime_after(ktime_get(), timeout); + + if (!(sdhci_readb(host, SDHCI_SOFTWARE_RESET) & mask)) + break; + if (timedout) { pr_err("%s: Reset 0x%x never completed.\n", mmc_hostname(host->mmc), (int)mask); sdhci_dumpregs(host); @@ -1417,9 +1421,13 @@ void sdhci_enable_clk(struct sdhci_host *host, u16 clk) /* Wait max 20 ms */ timeout = ktime_add_ms(ktime_get(), 20); - while (!((clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL)) - & SDHCI_CLOCK_INT_STABLE)) { - if (ktime_after(ktime_get(), timeout)) { + while (1) { + bool timedout = ktime_after(ktime_get(), timeout); + + clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL); + if (clk & SDHCI_CLOCK_INT_STABLE) + break; + if (timedout) { pr_err("%s: Internal clock never stabilised.\n", mmc_hostname(host->mmc)); sdhci_dumpregs(host); -- GitLab From c1149b873482a1f2a85659692376103e7d052dde Mon Sep 17 00:00:00 2001 From: Chad Austin Date: Mon, 10 Dec 2018 10:54:52 -0800 Subject: [PATCH 2204/2269] fuse: continue to send FUSE_RELEASEDIR when FUSE_OPEN returns ENOSYS commit 2e64ff154ce6ce9a8dc0f9556463916efa6ff460 upstream. When FUSE_OPEN returns ENOSYS, the no_open bit is set on the connection. Because the FUSE_RELEASE and FUSE_RELEASEDIR paths share code, this incorrectly caused the FUSE_RELEASEDIR request to be dropped and never sent to userspace. Pass an isdir bool to distinguish between FUSE_RELEASE and FUSE_RELEASEDIR inside of fuse_file_put. Fixes: 7678ac50615d ("fuse: support clients that don't implement 'open'") Cc: # v3.14 Signed-off-by: Chad Austin Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dir.c | 2 +- fs/fuse/file.c | 21 +++++++++++---------- fs/fuse/fuse_i.h | 2 +- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 29868c35c19a6..d933ecb7a08c1 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1424,7 +1424,7 @@ static int fuse_dir_open(struct inode *inode, struct file *file) static int fuse_dir_release(struct inode *inode, struct file *file) { - fuse_release_common(file, FUSE_RELEASEDIR); + fuse_release_common(file, true); return 0; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 47d7a510be5be..52514a64dcd67 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -86,12 +86,12 @@ static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) iput(req->misc.release.inode); } -static void fuse_file_put(struct fuse_file *ff, bool sync) +static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir) { if (refcount_dec_and_test(&ff->count)) { struct fuse_req *req = ff->reserved_req; - if (ff->fc->no_open) { + if (ff->fc->no_open && !isdir) { /* * Drop the release request when client does not * implement 'open' @@ -244,10 +244,11 @@ static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode) req->in.args[0].value = inarg; } -void fuse_release_common(struct file *file, int opcode) +void fuse_release_common(struct file *file, bool isdir) { struct fuse_file *ff = file->private_data; struct fuse_req *req = ff->reserved_req; + int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE; fuse_prepare_release(ff, file->f_flags, opcode); @@ -269,7 +270,7 @@ void fuse_release_common(struct file *file, int opcode) * synchronous RELEASE is allowed (and desirable) in this case * because the server can be trusted not to screw up. */ - fuse_file_put(ff, ff->fc->destroy_req != NULL); + fuse_file_put(ff, ff->fc->destroy_req != NULL, isdir); } static int fuse_open(struct inode *inode, struct file *file) @@ -285,7 +286,7 @@ static int fuse_release(struct inode *inode, struct file *file) if (fc->writeback_cache) write_inode_now(inode, 1); - fuse_release_common(file, FUSE_RELEASE); + fuse_release_common(file, false); /* return value is ignored by VFS */ return 0; @@ -299,7 +300,7 @@ void fuse_sync_release(struct fuse_file *ff, int flags) * iput(NULL) is a no-op and since the refcount is 1 and everything's * synchronous, we are fine with not doing igrab() here" */ - fuse_file_put(ff, true); + fuse_file_put(ff, true, false); } EXPORT_SYMBOL_GPL(fuse_sync_release); @@ -804,7 +805,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) put_page(page); } if (req->ff) - fuse_file_put(req->ff, false); + fuse_file_put(req->ff, false, false); } static void fuse_send_readpages(struct fuse_req *req, struct file *file) @@ -1458,7 +1459,7 @@ static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) __free_page(req->pages[i]); if (req->ff) - fuse_file_put(req->ff, false); + fuse_file_put(req->ff, false, false); } static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) @@ -1615,7 +1616,7 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc) ff = __fuse_write_file_get(fc, fi); err = fuse_flush_times(inode, ff); if (ff) - fuse_file_put(ff, 0); + fuse_file_put(ff, false, false); return err; } @@ -1929,7 +1930,7 @@ static int fuse_writepages(struct address_space *mapping, err = 0; } if (data.ff) - fuse_file_put(data.ff, false); + fuse_file_put(data.ff, false, false); kfree(data.orig_pages); out: diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index e105640153ce2..e682f2eff6c0d 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -739,7 +739,7 @@ void fuse_sync_release(struct fuse_file *ff, int flags); /** * Send RELEASE or RELEASEDIR request */ -void fuse_release_common(struct file *file, int opcode); +void fuse_release_common(struct file *file, bool isdir); /** * Send FSYNC or FSYNCDIR request -- GitLab From 529571392b7a3efb91d12d70551ece5c0e60ece6 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Sun, 2 Dec 2018 12:12:24 +0100 Subject: [PATCH 2205/2269] ARM: mmp/mmp2: fix cpu_is_mmp2() on mmp2-dt commit 76f4e2c3b6a560cdd7a75b87df543e04d05a9e5f upstream. cpu_is_mmp2() was equivalent to cpu_is_pj4(), wouldn't be correct for multiplatform kernels. Fix it by also considering mmp_chip_id, as is done for cpu_is_pxa168() and cpu_is_pxa910() above. Moreover, it is only available with CONFIG_CPU_MMP2 and thus doesn't work on DT-based MMP2 machines. Enable it on CONFIG_MACH_MMP2_DT too. Note: CONFIG_CPU_MMP2 is only used for machines that use board files instead of DT. It should perhaps be renamed. I'm not doing it now, because I don't have a better idea. Signed-off-by: Lubomir Rintel Acked-by: Arnd Bergmann Cc: stable@vger.kernel.org Signed-off-by: Olof Johansson Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-mmp/cputype.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-mmp/cputype.h b/arch/arm/mach-mmp/cputype.h index 446edaeb78a71..a96abcf521b4b 100644 --- a/arch/arm/mach-mmp/cputype.h +++ b/arch/arm/mach-mmp/cputype.h @@ -44,10 +44,12 @@ static inline int cpu_is_pxa910(void) #define cpu_is_pxa910() (0) #endif -#ifdef CONFIG_CPU_MMP2 +#if defined(CONFIG_CPU_MMP2) || defined(CONFIG_MACH_MMP2_DT) static inline int cpu_is_mmp2(void) { - return (((read_cpuid_id() >> 8) & 0xff) == 0x58); + return (((read_cpuid_id() >> 8) & 0xff) == 0x58) && + (((mmp_chip_id & 0xfff) == 0x410) || + ((mmp_chip_id & 0xfff) == 0x610)); } #else #define cpu_is_mmp2() (0) -- GitLab From cd5d8a9203ed16bfb16a03c570fb4cf7e7899125 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 11 Dec 2018 13:31:40 -0500 Subject: [PATCH 2206/2269] dm thin: send event about thin-pool state change _after_ making it commit f6c367585d0d851349d3a9e607c43e5bea993fa1 upstream. Sending a DM event before a thin-pool state change is about to happen is a bug. It wasn't realized until it became clear that userspace response to the event raced with the actual state change that the event was meant to notify about. Fix this by first updating internal thin-pool state to reflect what the DM event is being issued about. This fixes a long-standing racey/buggy userspace device-mapper-test-suite 'resize_io' test that would get an event but not find the state it was looking for -- so it would just go on to hang because no other events caused the test to reevaluate the thin-pool's state. Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-thin.c | 68 +++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 33 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 699c40c7fe608..da98fc7b995cd 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -195,7 +195,7 @@ static void throttle_unlock(struct throttle *t) struct dm_thin_new_mapping; /* - * The pool runs in 4 modes. Ordered in degraded order for comparisons. + * The pool runs in various modes. Ordered in degraded order for comparisons. */ enum pool_mode { PM_WRITE, /* metadata may be changed */ @@ -281,9 +281,38 @@ struct pool { struct dm_bio_prison_cell **cell_sort_array; }; -static enum pool_mode get_pool_mode(struct pool *pool); static void metadata_operation_failed(struct pool *pool, const char *op, int r); +static enum pool_mode get_pool_mode(struct pool *pool) +{ + return pool->pf.mode; +} + +static void notify_of_pool_mode_change(struct pool *pool) +{ + const char *descs[] = { + "write", + "out-of-data-space", + "read-only", + "read-only", + "fail" + }; + const char *extra_desc = NULL; + enum pool_mode mode = get_pool_mode(pool); + + if (mode == PM_OUT_OF_DATA_SPACE) { + if (!pool->pf.error_if_no_space) + extra_desc = " (queue IO)"; + else + extra_desc = " (error IO)"; + } + + dm_table_event(pool->ti->table); + DMINFO("%s: switching pool to %s%s mode", + dm_device_name(pool->pool_md), + descs[(int)mode], extra_desc ? : ""); +} + /* * Target context for a pool. */ @@ -2362,8 +2391,6 @@ static void do_waker(struct work_struct *ws) queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD); } -static void notify_of_pool_mode_change_to_oods(struct pool *pool); - /* * We're holding onto IO to allow userland time to react. After the * timeout either the pool will have been resized (and thus back in @@ -2376,7 +2403,7 @@ static void do_no_space_timeout(struct work_struct *ws) if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) { pool->pf.error_if_no_space = true; - notify_of_pool_mode_change_to_oods(pool); + notify_of_pool_mode_change(pool); error_retry_list_with_code(pool, BLK_STS_NOSPC); } } @@ -2444,26 +2471,6 @@ static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *)) /*----------------------------------------------------------------*/ -static enum pool_mode get_pool_mode(struct pool *pool) -{ - return pool->pf.mode; -} - -static void notify_of_pool_mode_change(struct pool *pool, const char *new_mode) -{ - dm_table_event(pool->ti->table); - DMINFO("%s: switching pool to %s mode", - dm_device_name(pool->pool_md), new_mode); -} - -static void notify_of_pool_mode_change_to_oods(struct pool *pool) -{ - if (!pool->pf.error_if_no_space) - notify_of_pool_mode_change(pool, "out-of-data-space (queue IO)"); - else - notify_of_pool_mode_change(pool, "out-of-data-space (error IO)"); -} - static bool passdown_enabled(struct pool_c *pt) { return pt->adjusted_pf.discard_passdown; @@ -2512,8 +2519,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) switch (new_mode) { case PM_FAIL: - if (old_mode != new_mode) - notify_of_pool_mode_change(pool, "failure"); dm_pool_metadata_read_only(pool->pmd); pool->process_bio = process_bio_fail; pool->process_discard = process_bio_fail; @@ -2527,8 +2532,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) case PM_OUT_OF_METADATA_SPACE: case PM_READ_ONLY: - if (!is_read_only_pool_mode(old_mode)) - notify_of_pool_mode_change(pool, "read-only"); dm_pool_metadata_read_only(pool->pmd); pool->process_bio = process_bio_read_only; pool->process_discard = process_bio_success; @@ -2549,8 +2552,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) * alarming rate. Adjust your low water mark if you're * frequently seeing this mode. */ - if (old_mode != new_mode) - notify_of_pool_mode_change_to_oods(pool); pool->out_of_data_space = true; pool->process_bio = process_bio_read_only; pool->process_discard = process_discard_bio; @@ -2563,8 +2564,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) break; case PM_WRITE: - if (old_mode != new_mode) - notify_of_pool_mode_change(pool, "write"); if (old_mode == PM_OUT_OF_DATA_SPACE) cancel_delayed_work_sync(&pool->no_space_timeout); pool->out_of_data_space = false; @@ -2584,6 +2583,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) * doesn't cause an unexpected mode transition on resume. */ pt->adjusted_pf.mode = new_mode; + + if (old_mode != new_mode) + notify_of_pool_mode_change(pool); } static void abort_transaction(struct pool *pool) -- GitLab From d17cc664f1ebc528b11a8db74c5b84910bf4ab14 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 9 Nov 2018 11:56:03 -0500 Subject: [PATCH 2207/2269] dm cache metadata: verify cache has blocks in blocks_are_clean_separate_dirty() commit 687cf4412a343a63928a5c9d91bdc0f522939d43 upstream. Otherwise dm_bitset_cursor_begin() return -ENODATA. Other calls to dm_bitset_cursor_begin() have similar negative checks. Fixes inability to create a cache in passthrough mode (even though doing so makes no sense). Fixes: 0d963b6e65 ("dm cache metadata: fix metadata2 format's blocks_are_clean_separate_dirty") Cc: stable@vger.kernel.org Reported-by: David Teigland Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-metadata.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 7f1c64c4ad24c..bc60db87e6f14 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -929,6 +929,10 @@ static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd, bool dirty_flag; *result = true; + if (from_cblock(cmd->cache_blocks) == 0) + /* Nothing to do */ + return 0; + r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root, from_cblock(cmd->cache_blocks), &cmd->dirty_cursor); if (r) { -- GitLab From b2e08ad9d4cb0d2c6776ff0a73903084ddc7588b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 9 Dec 2018 21:17:30 -0500 Subject: [PATCH 2208/2269] tracing: Fix memory leak in set_trigger_filter() commit 3cec638b3d793b7cacdec5b8072364b41caeb0e1 upstream. When create_event_filter() fails in set_trigger_filter(), the filter may still be allocated and needs to be freed. The caller expects the data->filter to be updated with the new filter, even if the new filter failed (we could add an error message by setting set_str parameter of create_event_filter(), but that's another update). But because the error would just exit, filter was left hanging and nothing could free it. Found by kmemleak detector. Cc: stable@vger.kernel.org Fixes: bac5fb97a173a ("tracing: Add and use generic set_trigger_filter() implementation") Reviewed-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_trigger.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 43254c5e7e164..e2da180ca172a 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -744,8 +744,10 @@ int set_trigger_filter(char *filter_str, /* The filter is for the 'trigger' event, not the triggered event */ ret = create_event_filter(file->event_call, filter_str, false, &filter); - if (ret) - goto out; + /* + * If create_event_filter() fails, filter still needs to be freed. + * Which the calling code will do with data->filter. + */ assign: tmp = rcu_access_pointer(data->filter); -- GitLab From a28d5f3d9c08a160801754804490dbac4ef9e2e1 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 10 Dec 2018 23:58:01 -0500 Subject: [PATCH 2209/2269] tracing: Fix memory leak of instance function hash filters commit 2840f84f74035e5a535959d5f17269c69fa6edc5 upstream. The following commands will cause a memory leak: # cd /sys/kernel/tracing # mkdir instances/foo # echo schedule > instance/foo/set_ftrace_filter # rmdir instances/foo The reason is that the hashes that hold the filters to set_ftrace_filter and set_ftrace_notrace are not freed if they contain any data on the instance and the instance is removed. Found by kmemleak detector. Cc: stable@vger.kernel.org Fixes: 591dffdade9f ("ftrace: Allow for function tracing instance to filter functions") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7379bcf3baa05..9937d7cf2a644 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5534,6 +5534,7 @@ void ftrace_destroy_filter_files(struct ftrace_ops *ops) if (ops->flags & FTRACE_OPS_FL_ENABLED) ftrace_shutdown(ops, 0); ops->flags |= FTRACE_OPS_FL_DELETED; + ftrace_free_filter(ops); mutex_unlock(&ftrace_lock); } -- GitLab From e89aa818353bccf626c317f0dee0d1cd7cdc6772 Mon Sep 17 00:00:00 2001 From: Radu Rendec Date: Tue, 27 Nov 2018 22:20:48 -0500 Subject: [PATCH 2210/2269] powerpc/msi: Fix NULL pointer access in teardown code commit 78e7b15e17ac175e7eed9e21c6f92d03d3b0a6fa upstream. The arch_teardown_msi_irqs() function assumes that controller ops pointers were already checked in arch_setup_msi_irqs(), but this assumption is wrong: arch_teardown_msi_irqs() can be called even when arch_setup_msi_irqs() returns an error (-ENOSYS). This can happen in the following scenario: - msi_capability_init() calls pci_msi_setup_msi_irqs() - pci_msi_setup_msi_irqs() returns -ENOSYS - msi_capability_init() notices the error and calls free_msi_irqs() - free_msi_irqs() calls pci_msi_teardown_msi_irqs() This is easier to see when CONFIG_PCI_MSI_IRQ_DOMAIN is not set and pci_msi_setup_msi_irqs() and pci_msi_teardown_msi_irqs() are just aliases to arch_setup_msi_irqs() and arch_teardown_msi_irqs(). The call to free_msi_irqs() upon pci_msi_setup_msi_irqs() failure seems legit, as it does additional cleanup; e.g. list_del(&entry->list) and kfree(entry) inside free_msi_irqs() do happen (MSI descriptors are allocated before pci_msi_setup_msi_irqs() is called and need to be cleaned up if that fails). Fixes: 6b2fd7efeb88 ("PCI/MSI/PPC: Remove arch_msi_check_device()") Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Radu Rendec Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/msi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c index dab616a33b8db..f2197654be070 100644 --- a/arch/powerpc/kernel/msi.c +++ b/arch/powerpc/kernel/msi.c @@ -34,5 +34,10 @@ void arch_teardown_msi_irqs(struct pci_dev *dev) { struct pci_controller *phb = pci_bus_to_host(dev->bus); - phb->controller_ops.teardown_msi_irqs(dev); + /* + * We can be called even when arch_setup_msi_irqs() returns -ENOSYS, + * so check the pointer again. + */ + if (phb->controller_ops.teardown_msi_irqs) + phb->controller_ops.teardown_msi_irqs(dev); } -- GitLab From d0a954cbfbd09c9106192f880dbe52bf448f11a8 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Tue, 11 Dec 2018 18:56:20 -0500 Subject: [PATCH 2211/2269] drm/nouveau/kms: Fix memory leak in nv50_mstm_del() commit 24199c5436f267399afed0c4f1f57663c0408f57 upstream. Noticed this while working on redoing the reference counting scheme in the DP MST helpers. Nouveau doesn't attempt to call drm_dp_mst_topology_mgr_destroy() at all, which leaves it leaking all of the resources for drm_dp_mst_topology_mgr and it's children mstbs+ports. Fixes: f479c0ba4a17 ("drm/nouveau/kms/nv50: initial support for DP 1.2 multi-stream") Signed-off-by: Lyude Paul Cc: # v4.10+ Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nv50_display.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index 926ec51ba5be1..25f6a1f6ce204 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -3378,6 +3378,7 @@ nv50_mstm_del(struct nv50_mstm **pmstm) { struct nv50_mstm *mstm = *pmstm; if (mstm) { + drm_dp_mst_topology_mgr_destroy(&mstm->mgr); kfree(*pmstm); *pmstm = NULL; } -- GitLab From 8b1bdb94166862ac57b5a1f39d8e293265326e58 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 5 Dec 2018 10:16:57 -0800 Subject: [PATCH 2212/2269] Revert "drm/rockchip: Allow driver to be shutdown on reboot/kexec" commit 63238173b2faf3d6b85a416f1c69af6c7be2413f upstream. This reverts commit 7f3ef5dedb146e3d5063b6845781ad1bb59b92b5. It causes new warnings [1] on shutdown when running the Google Kevin or Scarlet (RK3399) boards under Chrome OS. Presumably our usage of DRM is different than what Marc and Heiko test. We're looking at a different approach (e.g., [2]) to replace this, but IMO the revert should be taken first, as it already propagated to -stable. [1] Report here: http://lkml.kernel.org/lkml/20181205030127.GA200921@google.com WARNING: CPU: 4 PID: 2035 at drivers/gpu/drm/drm_mode_config.c:477 drm_mode_config_cleanup+0x1c4/0x294 ... Call trace: drm_mode_config_cleanup+0x1c4/0x294 rockchip_drm_unbind+0x4c/0x8c component_master_del+0x88/0xb8 rockchip_drm_platform_remove+0x2c/0x44 rockchip_drm_platform_shutdown+0x20/0x2c platform_drv_shutdown+0x2c/0x38 device_shutdown+0x164/0x1b8 kernel_restart_prepare+0x40/0x48 kernel_restart+0x20/0x68 ... Memory manager not clean during takedown. WARNING: CPU: 4 PID: 2035 at drivers/gpu/drm/drm_mm.c:950 drm_mm_takedown+0x34/0x44 ... drm_mm_takedown+0x34/0x44 rockchip_drm_unbind+0x64/0x8c component_master_del+0x88/0xb8 rockchip_drm_platform_remove+0x2c/0x44 rockchip_drm_platform_shutdown+0x20/0x2c platform_drv_shutdown+0x2c/0x38 device_shutdown+0x164/0x1b8 kernel_restart_prepare+0x40/0x48 kernel_restart+0x20/0x68 ... [2] https://patchwork.kernel.org/patch/10556151/ https://www.spinics.net/lists/linux-rockchip/msg21342.html [PATCH] drm/rockchip: shutdown drm subsystem on shutdown Fixes: 7f3ef5dedb14 ("drm/rockchip: Allow driver to be shutdown on reboot/kexec") Cc: Jeffy Chen Cc: Robin Murphy Cc: Vicente Bergas Cc: Marc Zyngier Cc: Heiko Stuebner Cc: stable@vger.kernel.org Signed-off-by: Brian Norris Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20181205181657.177703-1-briannorris@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/rockchip/rockchip_drm_drv.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c index 4cacb03f67332..ff3d0f5efbb14 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@ -425,11 +425,6 @@ static int rockchip_drm_platform_remove(struct platform_device *pdev) return 0; } -static void rockchip_drm_platform_shutdown(struct platform_device *pdev) -{ - rockchip_drm_platform_remove(pdev); -} - static const struct of_device_id rockchip_drm_dt_ids[] = { { .compatible = "rockchip,display-subsystem", }, { /* sentinel */ }, @@ -439,7 +434,6 @@ MODULE_DEVICE_TABLE(of, rockchip_drm_dt_ids); static struct platform_driver rockchip_drm_platform_driver = { .probe = rockchip_drm_platform_probe, .remove = rockchip_drm_platform_remove, - .shutdown = rockchip_drm_platform_shutdown, .driver = { .name = "rockchip-drm", .of_match_table = rockchip_drm_dt_ids, -- GitLab From 8a20046277e31905a1be1dc159f81668bc871d8c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 6 Dec 2018 08:44:31 +0000 Subject: [PATCH 2213/2269] drm/i915/execlists: Apply a full mb before execution for Braswell commit cf66b8a0ba142fbd1bf10ac8f3ae92d1b0cb7b8f upstream. Braswell is really picky about having our writes posted to memory before we execute or else the GPU may see stale values. A wmb() is insufficient as it only ensures the writes are visible to other cores, we need a full mb() to ensure the writes are in memory and visible to the GPU. The most frequent failure in flushing before execution is that we see stale PTE values and execute the wrong pages. References: 987abd5c62f9 ("drm/i915/execlists: Force write serialisation into context image vs execution") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: stable@vger.kernel.org Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20181206084431.9805-3-chris@chris-wilson.co.uk (cherry picked from commit 490b8c65b9db45896769e1095e78725775f47b3e) Signed-off-by: Joonas Lahtinen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_lrc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0775e71ea95b9..e0483c068d231 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -343,8 +343,13 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) * may not be visible to the HW prior to the completion of the UC * register write and that we may begin execution from the context * before its image is complete leading to invalid PD chasing. + * + * Furthermore, Braswell, at least, wants a full mb to be sure that + * the writes are coherent in memory (visible to the GPU) prior to + * execution, and not just visible to other CPUs (as is the result of + * wmb). */ - wmb(); + mb(); return ce->lrc_desc; } -- GitLab From c8626858b2d873e2528d3099b896459947996247 Mon Sep 17 00:00:00 2001 From: Junwei Zhang Date: Fri, 7 Dec 2018 15:15:03 +0800 Subject: [PATCH 2214/2269] drm/amdgpu: update SMC firmware image for polaris10 variants commit d55d8be0747c96db28a1d08fc24d22ccd9b448ac upstream. Some new variants require different firmwares. Signed-off-by: Junwei Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index e8d9479615c95..bb4b804255a80 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -723,7 +723,8 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, (adev->pdev->revision == 0xe7) || (adev->pdev->revision == 0xef))) || ((adev->pdev->device == 0x6fdf) && - (adev->pdev->revision == 0xef))) { + ((adev->pdev->revision == 0xef) || + (adev->pdev->revision == 0xff)))) { info->is_kicker = true; strcpy(fw_name, "amdgpu/polaris10_k_smc.bin"); } else -- GitLab From f02ef68bdad39ebf881f64cab027e8696fe57ade Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 5 Dec 2018 15:27:19 +0900 Subject: [PATCH 2215/2269] x86/build: Fix compiler support check for CONFIG_RETPOLINE commit 25896d073d8a0403b07e6dec56f58e6c33678207 upstream. It is troublesome to add a diagnostic like this to the Makefile parse stage because the top-level Makefile could be parsed with a stale include/config/auto.conf. Once you are hit by the error about non-retpoline compiler, the compilation still breaks even after disabling CONFIG_RETPOLINE. The easiest fix is to move this check to the "archprepare" like this commit did: 829fe4aa9ac1 ("x86: Allow generating user-space headers without a compiler") Reported-by: Meelis Roos Tested-by: Meelis Roos Signed-off-by: Masahiro Yamada Acked-by: Zhenzhong Duan Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Zhenzhong Duan Fixes: 4cd24de3a098 ("x86/retpoline: Make CONFIG_RETPOLINE depend on compiler support") Link: http://lkml.kernel.org/r/1543991239-18476-1-git-send-email-yamada.masahiro@socionext.com Link: https://lkml.org/lkml/2018/12/4/206 Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Cc: Gi-Oh Kim Signed-off-by: Greg Kroah-Hartman --- arch/x86/Makefile | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index ce3658dd98e82..c5290aecdf06e 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -241,9 +241,6 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # Avoid indirect branches in kernel to deal with Spectre ifdef CONFIG_RETPOLINE -ifeq ($(RETPOLINE_CFLAGS),) - $(error You are building kernel with non-retpoline compiler, please update your compiler.) -endif KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) endif @@ -260,6 +257,13 @@ archprepare: ifeq ($(CONFIG_KEXEC_FILE),y) $(Q)$(MAKE) $(build)=arch/x86/purgatory arch/x86/purgatory/kexec-purgatory.c endif +ifdef CONFIG_RETPOLINE +ifeq ($(RETPOLINE_CFLAGS),) + @echo "You are building kernel with non-retpoline compiler." >&2 + @echo "Please update your compiler." >&2 + @false +endif +endif ### # Kernel objects -- GitLab From a9febd662cee68712aea69012944df00033d2dd8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 18 Dec 2018 18:13:51 +0100 Subject: [PATCH 2216/2269] locking: Remove smp_read_barrier_depends() from queued_spin_lock_slowpath() commit 548095dea63ffc016d39c35b32c628d033638aca upstream. Queued spinlocks are not used by DEC Alpha, and furthermore operations such as READ_ONCE() and release/relaxed RMW atomics are being changed to imply smp_read_barrier_depends(). This commit therefore removes the now-redundant smp_read_barrier_depends() from queued_spin_lock_slowpath(), and adjusts the comments accordingly. Signed-off-by: Paul E. McKenney Cc: Peter Zijlstra Cc: Ingo Molnar Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Sasha Levin --- kernel/locking/qspinlock.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 50dc42aeaa569..5541acb79e152 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -170,7 +170,7 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock) * @tail : The new queue tail code word * Return: The previous queue tail code word * - * xchg(lock, tail) + * xchg(lock, tail), which heads an address dependency * * p,*,* -> n,*,* ; prev = xchg(lock, node) */ @@ -417,13 +417,11 @@ queue: if (old & _Q_TAIL_MASK) { prev = decode_tail(old); /* - * The above xchg_tail() is also a load of @lock which generates, - * through decode_tail(), a pointer. - * - * The address dependency matches the RELEASE of xchg_tail() - * such that the access to @prev must happen after. + * The above xchg_tail() is also a load of @lock which + * generates, through decode_tail(), a pointer. The address + * dependency matches the RELEASE of xchg_tail() such that + * the subsequent access to @prev happens after. */ - smp_read_barrier_depends(); WRITE_ONCE(prev->next, node); -- GitLab From 13f14c36323d71e2178d7d97dc65808c0e5c872e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 18 Dec 2018 18:13:52 +0100 Subject: [PATCH 2217/2269] locking/qspinlock: Ensure node is initialised before updating prev->next commit 95bcade33a8af38755c9b0636e36a36ad3789fe6 upstream. When a locker ends up queuing on the qspinlock locking slowpath, we initialise the relevant mcs node and publish it indirectly by updating the tail portion of the lock word using xchg_tail. If we find that there was a pre-existing locker in the queue, we subsequently update their ->next field to point at our node so that we are notified when it's our turn to take the lock. This can be roughly illustrated as follows: /* Initialise the fields in node and encode a pointer to node in tail */ tail = initialise_node(node); /* * Exchange tail into the lockword using an atomic read-modify-write * operation with release semantics */ old = xchg_tail(lock, tail); /* If there was a pre-existing waiter ... */ if (old & _Q_TAIL_MASK) { prev = decode_tail(old); smp_read_barrier_depends(); /* ... then update their ->next field to point to node. WRITE_ONCE(prev->next, node); } The conditional update of prev->next therefore relies on the address dependency from the result of xchg_tail ensuring order against the prior initialisation of node. However, since the release semantics of the xchg_tail operation apply only to the write portion of the RmW, then this ordering is not guaranteed and it is possible for the CPU to return old before the writes to node have been published, consequently allowing us to point prev->next to an uninitialised node. This patch fixes the problem by making the update of prev->next a RELEASE operation, which also removes the reliance on dependency ordering. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1518528177-19169-2-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Sasha Levin --- kernel/locking/qspinlock.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 5541acb79e152..d880296245c59 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -416,14 +416,15 @@ queue: */ if (old & _Q_TAIL_MASK) { prev = decode_tail(old); + /* - * The above xchg_tail() is also a load of @lock which - * generates, through decode_tail(), a pointer. The address - * dependency matches the RELEASE of xchg_tail() such that - * the subsequent access to @prev happens after. + * We must ensure that the stores to @node are observed before + * the write to prev->next. The address dependency from + * xchg_tail is not sufficient to ensure this because the read + * component of xchg_tail is unordered with respect to the + * initialisation of @node. */ - - WRITE_ONCE(prev->next, node); + smp_store_release(&prev->next, node); pv_wait_node(node, prev); arch_mcs_spin_lock_contended(&node->locked); -- GitLab From 3dab30f33814dc0b97c001921d3d724004c09b5f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 18 Dec 2018 18:13:53 +0100 Subject: [PATCH 2218/2269] locking/qspinlock: Bound spinning on pending->locked transition in slowpath commit 6512276d97b160d90b53285bd06f7f201459a7e3 upstream. If a locker taking the qspinlock slowpath reads a lock value indicating that only the pending bit is set, then it will spin whilst the concurrent pending->locked transition takes effect. Unfortunately, there is no guarantee that such a transition will ever be observed since concurrent lockers could continuously set pending and hand over the lock amongst themselves, leading to starvation. Whilst this would probably resolve in practice, it means that it is not possible to prove liveness properties about the lock and means that lock acquisition time is unbounded. Rather than removing the pending->locked spinning from the slowpath altogether (which has been shown to heavily penalise a 2-threaded locking stress test on x86), this patch replaces the explicit spinning with a call to atomic_cond_read_relaxed and allows the architecture to provide a bound on the number of spins. For architectures that can respond to changes in cacheline state in their smp_cond_load implementation, it should be sufficient to use the default bound of 1. Suggested-by: Waiman Long Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.feng@gmail.com Cc: linux-arm-kernel@lists.infradead.org Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-4-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Sasha Levin --- kernel/locking/qspinlock.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index d880296245c59..18161264227a3 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -76,6 +76,18 @@ #define MAX_NODES 4 #endif +/* + * The pending bit spinning loop count. + * This heuristic is used to limit the number of lockword accesses + * made by atomic_cond_read_relaxed when waiting for the lock to + * transition out of the "== _Q_PENDING_VAL" state. We don't spin + * indefinitely because there's no guarantee that we'll make forward + * progress. + */ +#ifndef _Q_PENDING_LOOPS +#define _Q_PENDING_LOOPS 1 +#endif + /* * Per-CPU queue node structures; we can never have more than 4 nested * contexts: task, softirq, hardirq, nmi. @@ -306,13 +318,15 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) return; /* - * wait for in-progress pending->locked hand-overs + * Wait for in-progress pending->locked hand-overs with a bounded + * number of spins so that we guarantee forward progress. * * 0,1,0 -> 0,0,1 */ if (val == _Q_PENDING_VAL) { - while ((val = atomic_read(&lock->val)) == _Q_PENDING_VAL) - cpu_relax(); + int cnt = _Q_PENDING_LOOPS; + val = smp_cond_load_acquire(&lock->val.counter, + (VAL != _Q_PENDING_VAL) || !cnt--); } /* -- GitLab From 5261ad70e2d673b9cfea7c81f309cabc847b7a27 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 18 Dec 2018 18:13:54 +0100 Subject: [PATCH 2219/2269] locking/qspinlock: Merge 'struct __qspinlock' into 'struct qspinlock' commit 625e88be1f41b53cec55827c984e4a89ea8ee9f9 upstream. 'struct __qspinlock' provides a handy union of fields so that subcomponents of the lockword can be accessed by name, without having to manage shifts and masks explicitly and take endianness into account. This is useful in qspinlock.h and also potentially in arch headers, so move the 'struct __qspinlock' into 'struct qspinlock' and kill the extra definition. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Acked-by: Boqun Feng Cc: Linus Torvalds Cc: Thomas Gleixner Cc: linux-arm-kernel@lists.infradead.org Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-3-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Sasha Levin --- arch/x86/include/asm/qspinlock.h | 2 +- arch/x86/include/asm/qspinlock_paravirt.h | 3 +- include/asm-generic/qspinlock_types.h | 32 +++++++++++++++- kernel/locking/qspinlock.c | 46 ++--------------------- kernel/locking/qspinlock_paravirt.h | 34 ++++++----------- 5 files changed, 46 insertions(+), 71 deletions(-) diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 9982dd96f093c..cf4cdf508ef42 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -15,7 +15,7 @@ */ static inline void native_queued_spin_unlock(struct qspinlock *lock) { - smp_store_release((u8 *)lock, 0); + smp_store_release(&lock->locked, 0); } #ifdef CONFIG_PARAVIRT_SPINLOCKS diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 923307ea11c71..9ef5ee03d2d79 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -22,8 +22,7 @@ PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath); * * void __pv_queued_spin_unlock(struct qspinlock *lock) * { - * struct __qspinlock *l = (void *)lock; - * u8 lockval = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0); + * u8 lockval = cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0); * * if (likely(lockval == _Q_LOCKED_VAL)) * return; diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h index 034acd0c4956b..0763f065b975a 100644 --- a/include/asm-generic/qspinlock_types.h +++ b/include/asm-generic/qspinlock_types.h @@ -29,13 +29,41 @@ #endif typedef struct qspinlock { - atomic_t val; + union { + atomic_t val; + + /* + * By using the whole 2nd least significant byte for the + * pending bit, we can allow better optimization of the lock + * acquisition for the pending bit holder. + */ +#ifdef __LITTLE_ENDIAN + struct { + u8 locked; + u8 pending; + }; + struct { + u16 locked_pending; + u16 tail; + }; +#else + struct { + u16 tail; + u16 locked_pending; + }; + struct { + u8 reserved[2]; + u8 pending; + u8 locked; + }; +#endif + }; } arch_spinlock_t; /* * Initializier */ -#define __ARCH_SPIN_LOCK_UNLOCKED { ATOMIC_INIT(0) } +#define __ARCH_SPIN_LOCK_UNLOCKED { .val = ATOMIC_INIT(0) } /* * Bitfields in the atomic value: diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 18161264227a3..e60e618287b49 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -126,40 +126,6 @@ static inline __pure struct mcs_spinlock *decode_tail(u32 tail) #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) -/* - * By using the whole 2nd least significant byte for the pending bit, we - * can allow better optimization of the lock acquisition for the pending - * bit holder. - * - * This internal structure is also used by the set_locked function which - * is not restricted to _Q_PENDING_BITS == 8. - */ -struct __qspinlock { - union { - atomic_t val; -#ifdef __LITTLE_ENDIAN - struct { - u8 locked; - u8 pending; - }; - struct { - u16 locked_pending; - u16 tail; - }; -#else - struct { - u16 tail; - u16 locked_pending; - }; - struct { - u8 reserved[2]; - u8 pending; - u8 locked; - }; -#endif - }; -}; - #if _Q_PENDING_BITS == 8 /** * clear_pending_set_locked - take ownership and clear the pending bit. @@ -171,9 +137,7 @@ struct __qspinlock { */ static __always_inline void clear_pending_set_locked(struct qspinlock *lock) { - struct __qspinlock *l = (void *)lock; - - WRITE_ONCE(l->locked_pending, _Q_LOCKED_VAL); + WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL); } /* @@ -188,13 +152,11 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock) */ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) { - struct __qspinlock *l = (void *)lock; - /* * Use release semantics to make sure that the MCS node is properly * initialized before changing the tail code. */ - return (u32)xchg_release(&l->tail, + return (u32)xchg_release(&lock->tail, tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET; } @@ -249,9 +211,7 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) */ static __always_inline void set_locked(struct qspinlock *lock) { - struct __qspinlock *l = (void *)lock; - - WRITE_ONCE(l->locked, _Q_LOCKED_VAL); + WRITE_ONCE(lock->locked, _Q_LOCKED_VAL); } diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 15b6a39366c62..1435ba7954c3d 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -70,10 +70,8 @@ struct pv_node { #define queued_spin_trylock(l) pv_queued_spin_steal_lock(l) static inline bool pv_queued_spin_steal_lock(struct qspinlock *lock) { - struct __qspinlock *l = (void *)lock; - if (!(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) && - (cmpxchg_acquire(&l->locked, 0, _Q_LOCKED_VAL) == 0)) { + (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) { qstat_inc(qstat_pv_lock_stealing, true); return true; } @@ -88,16 +86,12 @@ static inline bool pv_queued_spin_steal_lock(struct qspinlock *lock) #if _Q_PENDING_BITS == 8 static __always_inline void set_pending(struct qspinlock *lock) { - struct __qspinlock *l = (void *)lock; - - WRITE_ONCE(l->pending, 1); + WRITE_ONCE(lock->pending, 1); } static __always_inline void clear_pending(struct qspinlock *lock) { - struct __qspinlock *l = (void *)lock; - - WRITE_ONCE(l->pending, 0); + WRITE_ONCE(lock->pending, 0); } /* @@ -107,10 +101,8 @@ static __always_inline void clear_pending(struct qspinlock *lock) */ static __always_inline int trylock_clear_pending(struct qspinlock *lock) { - struct __qspinlock *l = (void *)lock; - - return !READ_ONCE(l->locked) && - (cmpxchg_acquire(&l->locked_pending, _Q_PENDING_VAL, + return !READ_ONCE(lock->locked) && + (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL, _Q_LOCKED_VAL) == _Q_PENDING_VAL); } #else /* _Q_PENDING_BITS == 8 */ @@ -355,7 +347,6 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) { struct pv_node *pn = (struct pv_node *)node; - struct __qspinlock *l = (void *)lock; /* * If the vCPU is indeed halted, advance its state to match that of @@ -384,7 +375,7 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) * the hash table later on at unlock time, no atomic instruction is * needed. */ - WRITE_ONCE(l->locked, _Q_SLOW_VAL); + WRITE_ONCE(lock->locked, _Q_SLOW_VAL); (void)pv_hash(lock, pn); } @@ -399,7 +390,6 @@ static u32 pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) { struct pv_node *pn = (struct pv_node *)node; - struct __qspinlock *l = (void *)lock; struct qspinlock **lp = NULL; int waitcnt = 0; int loop; @@ -450,13 +440,13 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) * * Matches the smp_rmb() in __pv_queued_spin_unlock(). */ - if (xchg(&l->locked, _Q_SLOW_VAL) == 0) { + if (xchg(&lock->locked, _Q_SLOW_VAL) == 0) { /* * The lock was free and now we own the lock. * Change the lock value back to _Q_LOCKED_VAL * and unhash the table. */ - WRITE_ONCE(l->locked, _Q_LOCKED_VAL); + WRITE_ONCE(lock->locked, _Q_LOCKED_VAL); WRITE_ONCE(*lp, NULL); goto gotlock; } @@ -464,7 +454,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) WRITE_ONCE(pn->state, vcpu_hashed); qstat_inc(qstat_pv_wait_head, true); qstat_inc(qstat_pv_wait_again, waitcnt); - pv_wait(&l->locked, _Q_SLOW_VAL); + pv_wait(&lock->locked, _Q_SLOW_VAL); /* * Because of lock stealing, the queue head vCPU may not be @@ -489,7 +479,6 @@ gotlock: __visible void __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked) { - struct __qspinlock *l = (void *)lock; struct pv_node *node; if (unlikely(locked != _Q_SLOW_VAL)) { @@ -518,7 +507,7 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked) * Now that we have a reference to the (likely) blocked pv_node, * release the lock. */ - smp_store_release(&l->locked, 0); + smp_store_release(&lock->locked, 0); /* * At this point the memory pointed at by lock can be freed/reused, @@ -544,7 +533,6 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked) #ifndef __pv_queued_spin_unlock __visible void __pv_queued_spin_unlock(struct qspinlock *lock) { - struct __qspinlock *l = (void *)lock; u8 locked; /* @@ -552,7 +540,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock) * unhash. Otherwise it would be possible to have multiple @lock * entries, which would be BAD. */ - locked = cmpxchg_release(&l->locked, _Q_LOCKED_VAL, 0); + locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0); if (likely(locked == _Q_LOCKED_VAL)) return; -- GitLab From 7a617996cba577806960e87882e60d7444d01df4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 18 Dec 2018 18:13:55 +0100 Subject: [PATCH 2220/2269] locking/qspinlock: Remove unbounded cmpxchg() loop from locking slowpath commit 59fb586b4a07b4e1a0ee577140ab4842ba451acd upstream. The qspinlock locking slowpath utilises a "pending" bit as a simple form of an embedded test-and-set lock that can avoid the overhead of explicit queuing in cases where the lock is held but uncontended. This bit is managed using a cmpxchg() loop which tries to transition the uncontended lock word from (0,0,0) -> (0,0,1) or (0,0,1) -> (0,1,1). Unfortunately, the cmpxchg() loop is unbounded and lockers can be starved indefinitely if the lock word is seen to oscillate between unlocked (0,0,0) and locked (0,0,1). This could happen if concurrent lockers are able to take the lock in the cmpxchg() loop without queuing and pass it around amongst themselves. This patch fixes the problem by unconditionally setting _Q_PENDING_VAL using atomic_fetch_or, and then inspecting the old value to see whether we need to spin on the current lock owner, or whether we now effectively hold the lock. The tricky scenario is when concurrent lockers end up queuing on the lock and the lock becomes available, causing us to see a lockword of (n,0,0). With pending now set, simply queuing could lead to deadlock as the head of the queue may not have observed the pending flag being cleared. Conversely, if the head of the queue did observe pending being cleared, then it could transition the lock from (n,0,0) -> (0,0,1) meaning that any attempt to "undo" our setting of the pending bit could race with a concurrent locker trying to set it. We handle this race by preserving the pending bit when taking the lock after reaching the head of the queue and leaving the tail entry intact if we saw pending set, because we know that the tail is going to be updated shortly. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.feng@gmail.com Cc: linux-arm-kernel@lists.infradead.org Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-6-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Sasha Levin --- kernel/locking/qspinlock.c | 102 ++++++++++++++++------------ kernel/locking/qspinlock_paravirt.h | 5 -- 2 files changed, 58 insertions(+), 49 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index e60e618287b49..7bd053e528c22 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -127,6 +127,17 @@ static inline __pure struct mcs_spinlock *decode_tail(u32 tail) #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) #if _Q_PENDING_BITS == 8 +/** + * clear_pending - clear the pending bit. + * @lock: Pointer to queued spinlock structure + * + * *,1,* -> *,0,* + */ +static __always_inline void clear_pending(struct qspinlock *lock) +{ + WRITE_ONCE(lock->pending, 0); +} + /** * clear_pending_set_locked - take ownership and clear the pending bit. * @lock: Pointer to queued spinlock structure @@ -162,6 +173,17 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) #else /* _Q_PENDING_BITS == 8 */ +/** + * clear_pending - clear the pending bit. + * @lock: Pointer to queued spinlock structure + * + * *,1,* -> *,0,* + */ +static __always_inline void clear_pending(struct qspinlock *lock) +{ + atomic_andnot(_Q_PENDING_VAL, &lock->val); +} + /** * clear_pending_set_locked - take ownership and clear the pending bit. * @lock: Pointer to queued spinlock structure @@ -266,7 +288,7 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock, void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { struct mcs_spinlock *prev, *next, *node; - u32 new, old, tail; + u32 old, tail; int idx; BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); @@ -289,59 +311,51 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) (VAL != _Q_PENDING_VAL) || !cnt--); } + /* + * If we observe any contention; queue. + */ + if (val & ~_Q_LOCKED_MASK) + goto queue; + /* * trylock || pending * * 0,0,0 -> 0,0,1 ; trylock * 0,0,1 -> 0,1,1 ; pending */ - for (;;) { + val = atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val); + if (!(val & ~_Q_LOCKED_MASK)) { /* - * If we observe any contention; queue. + * We're pending, wait for the owner to go away. + * + * *,1,1 -> *,1,0 + * + * this wait loop must be a load-acquire such that we match the + * store-release that clears the locked bit and create lock + * sequentiality; this is because not all + * clear_pending_set_locked() implementations imply full + * barriers. */ - if (val & ~_Q_LOCKED_MASK) - goto queue; - - new = _Q_LOCKED_VAL; - if (val == new) - new |= _Q_PENDING_VAL; + if (val & _Q_LOCKED_MASK) { + smp_cond_load_acquire(&lock->val.counter, + !(VAL & _Q_LOCKED_MASK)); + } /* - * Acquire semantic is required here as the function may - * return immediately if the lock was free. + * take ownership and clear the pending bit. + * + * *,1,0 -> *,0,1 */ - old = atomic_cmpxchg_acquire(&lock->val, val, new); - if (old == val) - break; - - val = old; - } - - /* - * we won the trylock - */ - if (new == _Q_LOCKED_VAL) + clear_pending_set_locked(lock); return; + } /* - * we're pending, wait for the owner to go away. - * - * *,1,1 -> *,1,0 - * - * this wait loop must be a load-acquire such that we match the - * store-release that clears the locked bit and create lock - * sequentiality; this is because not all clear_pending_set_locked() - * implementations imply full barriers. - */ - smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_MASK)); - - /* - * take ownership and clear the pending bit. - * - * *,1,0 -> *,0,1 + * If pending was clear but there are waiters in the queue, then + * we need to undo our setting of pending before we queue ourselves. */ - clear_pending_set_locked(lock); - return; + if (!(val & _Q_PENDING_MASK)) + clear_pending(lock); /* * End of pending bit optimistic spinning and beginning of MCS @@ -445,15 +459,15 @@ locked: * claim the lock: * * n,0,0 -> 0,0,1 : lock, uncontended - * *,0,0 -> *,0,1 : lock, contended + * *,*,0 -> *,*,1 : lock, contended * - * If the queue head is the only one in the queue (lock value == tail), - * clear the tail code and grab the lock. Otherwise, we only need - * to grab the lock. + * If the queue head is the only one in the queue (lock value == tail) + * and nobody is pending, clear the tail code and grab the lock. + * Otherwise, we only need to grab the lock. */ for (;;) { /* In the PV case we might already have _Q_LOCKED_VAL set */ - if ((val & _Q_TAIL_MASK) != tail) { + if ((val & _Q_TAIL_MASK) != tail || (val & _Q_PENDING_MASK)) { set_locked(lock); break; } diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 1435ba7954c3d..854443f7b60b6 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -89,11 +89,6 @@ static __always_inline void set_pending(struct qspinlock *lock) WRITE_ONCE(lock->pending, 1); } -static __always_inline void clear_pending(struct qspinlock *lock) -{ - WRITE_ONCE(lock->pending, 0); -} - /* * The pending bit check in pv_queued_spin_steal_lock() isn't a memory * barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the -- GitLab From 075703d79c3a0a603a69e09167f96c1a2502eabe Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 18 Dec 2018 18:13:56 +0100 Subject: [PATCH 2221/2269] locking/qspinlock: Remove duplicate clear_pending() function from PV code commit 3bea9adc96842b8a7345c7fb202c16ae9c8d5b25 upstream. The native clear_pending() function is identical to the PV version, so the latter can simply be removed. This fixes the build for systems with >= 16K CPUs using the PV lock implementation. Reported-by: Waiman Long Signed-off-by: Will Deacon Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: boqun.feng@gmail.com Cc: linux-arm-kernel@lists.infradead.org Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/20180427101619.GB21705@arm.com Signed-off-by: Ingo Molnar Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Sasha Levin --- kernel/locking/qspinlock_paravirt.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 854443f7b60b6..1e882dfc8b797 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -106,11 +106,6 @@ static __always_inline void set_pending(struct qspinlock *lock) atomic_or(_Q_PENDING_VAL, &lock->val); } -static __always_inline void clear_pending(struct qspinlock *lock) -{ - atomic_andnot(_Q_PENDING_VAL, &lock->val); -} - static __always_inline int trylock_clear_pending(struct qspinlock *lock) { int val = atomic_read(&lock->val); -- GitLab From f2f76a2c666e9b09fa1fb903bdb20c756dc90e21 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 18 Dec 2018 18:13:57 +0100 Subject: [PATCH 2222/2269] locking/qspinlock: Kill cmpxchg() loop when claiming lock from head of queue commit c61da58d8a9ba9238250a548f00826eaf44af0f7 upstream. When a queued locker reaches the head of the queue, it claims the lock by setting _Q_LOCKED_VAL in the lockword. If there isn't contention, it must also clear the tail as part of this operation so that subsequent lockers can avoid taking the slowpath altogether. Currently this is expressed as a cmpxchg() loop that practically only runs up to two iterations. This is confusing to the reader and unhelpful to the compiler. Rewrite the cmpxchg() loop without the loop, so that a failed cmpxchg() implies that there is contention and we just need to write to _Q_LOCKED_VAL without considering the rest of the lockword. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.feng@gmail.com Cc: linux-arm-kernel@lists.infradead.org Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-7-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Sasha Levin --- kernel/locking/qspinlock.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 7bd053e528c22..841550dfb7b85 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -465,24 +465,21 @@ locked: * and nobody is pending, clear the tail code and grab the lock. * Otherwise, we only need to grab the lock. */ - for (;;) { - /* In the PV case we might already have _Q_LOCKED_VAL set */ - if ((val & _Q_TAIL_MASK) != tail || (val & _Q_PENDING_MASK)) { - set_locked(lock); - break; - } + + /* In the PV case we might already have _Q_LOCKED_VAL set */ + if ((val & _Q_TAIL_MASK) == tail) { /* * The smp_cond_load_acquire() call above has provided the - * necessary acquire semantics required for locking. At most - * two iterations of this loop may be ran. + * necessary acquire semantics required for locking. */ old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL); if (old == val) - goto release; /* No contention */ - - val = old; + goto release; /* No contention */ } + /* Either somebody is queued behind us or _Q_PENDING_VAL is set */ + set_locked(lock); + /* * contended path; wait for next if not observed yet, release. */ -- GitLab From 49849a651b8456263391ae767be6f7d02af8ef26 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Dec 2018 18:13:58 +0100 Subject: [PATCH 2223/2269] locking/qspinlock: Re-order code commit 53bf57fab7321fb42b703056a4c80fc9d986d170 upstream. Flip the branch condition after atomic_fetch_or_acquire(_Q_PENDING_VAL) such that we loose the indent. This also result in a more natural code flow IMO. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: andrea.parri@amarulasolutions.com Cc: longman@redhat.com Link: https://lkml.kernel.org/r/20181003130257.156322446@infradead.org Signed-off-by: Ingo Molnar Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Sasha Levin --- kernel/locking/qspinlock.c | 54 ++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 841550dfb7b85..9ffc2f9af8b8f 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -324,38 +324,36 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) * 0,0,1 -> 0,1,1 ; pending */ val = atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val); - if (!(val & ~_Q_LOCKED_MASK)) { - /* - * We're pending, wait for the owner to go away. - * - * *,1,1 -> *,1,0 - * - * this wait loop must be a load-acquire such that we match the - * store-release that clears the locked bit and create lock - * sequentiality; this is because not all - * clear_pending_set_locked() implementations imply full - * barriers. - */ - if (val & _Q_LOCKED_MASK) { - smp_cond_load_acquire(&lock->val.counter, - !(VAL & _Q_LOCKED_MASK)); - } - - /* - * take ownership and clear the pending bit. - * - * *,1,0 -> *,0,1 - */ - clear_pending_set_locked(lock); - return; + /* + * If we observe any contention; undo and queue. + */ + if (unlikely(val & ~_Q_LOCKED_MASK)) { + if (!(val & _Q_PENDING_MASK)) + clear_pending(lock); + goto queue; } /* - * If pending was clear but there are waiters in the queue, then - * we need to undo our setting of pending before we queue ourselves. + * We're pending, wait for the owner to go away. + * + * 0,1,1 -> 0,1,0 + * + * this wait loop must be a load-acquire such that we match the + * store-release that clears the locked bit and create lock + * sequentiality; this is because not all + * clear_pending_set_locked() implementations imply full + * barriers. + */ + if (val & _Q_LOCKED_MASK) + smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_MASK)); + + /* + * take ownership and clear the pending bit. + * + * 0,1,0 -> 0,0,1 */ - if (!(val & _Q_PENDING_MASK)) - clear_pending(lock); + clear_pending_set_locked(lock); + return; /* * End of pending bit optimistic spinning and beginning of MCS -- GitLab From 4e21502d37a503d4311e2bdb46a66aef783a7cfe Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 18 Dec 2018 18:13:59 +0100 Subject: [PATCH 2224/2269] locking/qspinlock/x86: Increase _Q_PENDING_LOOPS upper bound commit b247be3fe89b6aba928bf80f4453d1c4ba8d2063 upstream. On x86, atomic_cond_read_relaxed will busy-wait with a cpu_relax() loop, so it is desirable to increase the number of times we spin on the qspinlock lockword when it is found to be transitioning from pending to locked. According to Waiman Long: | Ideally, the spinning times should be at least a few times the typical | cacheline load time from memory which I think can be down to 100ns or | so for each cacheline load with the newest systems or up to several | hundreds ns for older systems. which in his benchmarking corresponded to 512 iterations. Suggested-by: Waiman Long Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.feng@gmail.com Cc: linux-arm-kernel@lists.infradead.org Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-5-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Sasha Levin --- arch/x86/include/asm/qspinlock.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index cf4cdf508ef42..2cb6624acaec6 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -6,6 +6,8 @@ #include #include +#define _Q_PENDING_LOOPS (1 << 9) + #define queued_spin_unlock queued_spin_unlock /** * queued_spin_unlock - release a queued spinlock -- GitLab From 5d01e063296a80e93abe93803602e5cf37309865 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Dec 2018 18:14:00 +0100 Subject: [PATCH 2225/2269] locking/qspinlock, x86: Provide liveness guarantee commit 7aa54be2976550f17c11a1c3e3630002dea39303 upstream. On x86 we cannot do fetch_or() with a single instruction and thus end up using a cmpxchg loop, this reduces determinism. Replace the fetch_or() with a composite operation: tas-pending + load. Using two instructions of course opens a window we previously did not have. Consider the scenario: CPU0 CPU1 CPU2 1) lock trylock -> (0,0,1) 2) lock trylock /* fail */ 3) unlock -> (0,0,0) 4) lock trylock -> (0,0,1) 5) tas-pending -> (0,1,1) load-val <- (0,1,0) from 3 6) clear-pending-set-locked -> (0,0,1) FAIL: _2_ owners where 5) is our new composite operation. When we consider each part of the qspinlock state as a separate variable (as we can when _Q_PENDING_BITS == 8) then the above is entirely possible, because tas-pending will only RmW the pending byte, so the later load is able to observe prior tail and lock state (but not earlier than its own trylock, which operates on the whole word, due to coherence). To avoid this we need 2 things: - the load must come after the tas-pending (obviously, otherwise it can trivially observe prior state). - the tas-pending must be a full word RmW instruction, it cannot be an XCHGB for example, such that we cannot observe other state prior to setting pending. On x86 we can realize this by using "LOCK BTS m32, r32" for tas-pending followed by a regular load. Note that observing later state is not a problem: - if we fail to observe a later unlock, we'll simply spin-wait for that store to become visible. - if we observe a later xchg_tail(), there is no difference from that xchg_tail() having taken place before the tas-pending. Suggested-by: Will Deacon Reported-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Will Deacon Cc: Linus Torvalds Cc: Peter Zijlstra Cc: andrea.parri@amarulasolutions.com Cc: longman@redhat.com Fixes: 59fb586b4a07 ("locking/qspinlock: Remove unbounded cmpxchg() loop from locking slowpath") Link: https://lkml.kernel.org/r/20181003130957.183726335@infradead.org Signed-off-by: Ingo Molnar [bigeasy: GEN_BINARY_RMWcc macro redo] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Sasha Levin --- arch/x86/include/asm/qspinlock.h | 21 +++++++++++++++++++++ kernel/locking/qspinlock.c | 17 ++++++++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 2cb6624acaec6..f784b95e44df1 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -5,9 +5,30 @@ #include #include #include +#include #define _Q_PENDING_LOOPS (1 << 9) +#define queued_fetch_set_pending_acquire queued_fetch_set_pending_acquire + +static __always_inline bool __queued_RMW_btsl(struct qspinlock *lock) +{ + GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter, + "I", _Q_PENDING_OFFSET, "%0", c); +} + +static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock) +{ + u32 val = 0; + + if (__queued_RMW_btsl(lock)) + val |= _Q_PENDING_VAL; + + val |= atomic_read(&lock->val) & ~_Q_PENDING_MASK; + + return val; +} + #define queued_spin_unlock queued_spin_unlock /** * queued_spin_unlock - release a queued spinlock diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 9ffc2f9af8b8f..1011a1b292ac4 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -225,6 +225,20 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) } #endif /* _Q_PENDING_BITS == 8 */ +/** + * queued_fetch_set_pending_acquire - fetch the whole lock value and set pending + * @lock : Pointer to queued spinlock structure + * Return: The previous lock value + * + * *,*,* -> *,1,* + */ +#ifndef queued_fetch_set_pending_acquire +static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock) +{ + return atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val); +} +#endif + /** * set_locked - Set the lock bit and own the lock * @lock: Pointer to queued spinlock structure @@ -323,7 +337,8 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) * 0,0,0 -> 0,0,1 ; trylock * 0,0,1 -> 0,1,1 ; pending */ - val = atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val); + val = queued_fetch_set_pending_acquire(lock); + /* * If we observe any contention; undo and queue. */ -- GitLab From 2a35d21a4d12fe774c46d17831e2938781179c83 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 25 Oct 2017 12:33:42 -0600 Subject: [PATCH 2226/2269] elevator: lookup mq vs non-mq elevators [ Upstream commit 2527d99789e248576ac8081530cd4fd88730f8c7 ] If an IO scheduler is selected via elevator= and it doesn't match the driver in question wrt blk-mq support, then we fail to boot. The elevator= parameter is deprecated and only supported for non-mq devices. Augment the elevator lookup API so that we pass in if we're looking for an mq capable scheduler or not, so that we only ever return a valid type for the queue in question. Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=196695 Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/elevator.c | 44 +++++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/block/elevator.c b/block/elevator.c index 153926a909011..8320d97240bec 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -83,12 +83,15 @@ bool elv_bio_merge_ok(struct request *rq, struct bio *bio) } EXPORT_SYMBOL(elv_bio_merge_ok); -static struct elevator_type *elevator_find(const char *name) +/* + * Return scheduler with name 'name' and with matching 'mq capability + */ +static struct elevator_type *elevator_find(const char *name, bool mq) { struct elevator_type *e; list_for_each_entry(e, &elv_list, list) { - if (!strcmp(e->elevator_name, name)) + if (!strcmp(e->elevator_name, name) && (mq == e->uses_mq)) return e; } @@ -100,25 +103,25 @@ static void elevator_put(struct elevator_type *e) module_put(e->elevator_owner); } -static struct elevator_type *elevator_get(const char *name, bool try_loading) +static struct elevator_type *elevator_get(struct request_queue *q, + const char *name, bool try_loading) { struct elevator_type *e; spin_lock(&elv_list_lock); - e = elevator_find(name); + e = elevator_find(name, q->mq_ops != NULL); if (!e && try_loading) { spin_unlock(&elv_list_lock); request_module("%s-iosched", name); spin_lock(&elv_list_lock); - e = elevator_find(name); + e = elevator_find(name, q->mq_ops != NULL); } if (e && !try_module_get(e->elevator_owner)) e = NULL; spin_unlock(&elv_list_lock); - return e; } @@ -144,8 +147,12 @@ void __init load_default_elevator_module(void) if (!chosen_elevator[0]) return; + /* + * Boot parameter is deprecated, we haven't supported that for MQ. + * Only look for non-mq schedulers from here. + */ spin_lock(&elv_list_lock); - e = elevator_find(chosen_elevator); + e = elevator_find(chosen_elevator, false); spin_unlock(&elv_list_lock); if (!e) @@ -202,7 +209,7 @@ int elevator_init(struct request_queue *q, char *name) q->boundary_rq = NULL; if (name) { - e = elevator_get(name, true); + e = elevator_get(q, name, true); if (!e) return -EINVAL; } @@ -214,7 +221,7 @@ int elevator_init(struct request_queue *q, char *name) * allowed from async. */ if (!e && !q->mq_ops && *chosen_elevator) { - e = elevator_get(chosen_elevator, false); + e = elevator_get(q, chosen_elevator, false); if (!e) printk(KERN_ERR "I/O scheduler %s not found\n", chosen_elevator); @@ -229,17 +236,17 @@ int elevator_init(struct request_queue *q, char *name) */ if (q->mq_ops) { if (q->nr_hw_queues == 1) - e = elevator_get("mq-deadline", false); + e = elevator_get(q, "mq-deadline", false); if (!e) return 0; } else - e = elevator_get(CONFIG_DEFAULT_IOSCHED, false); + e = elevator_get(q, CONFIG_DEFAULT_IOSCHED, false); if (!e) { printk(KERN_ERR "Default I/O scheduler not found. " \ "Using noop.\n"); - e = elevator_get("noop", false); + e = elevator_get(q, "noop", false); } } @@ -905,7 +912,7 @@ int elv_register(struct elevator_type *e) /* register, don't allow duplicate names */ spin_lock(&elv_list_lock); - if (elevator_find(e->elevator_name)) { + if (elevator_find(e->elevator_name, e->uses_mq)) { spin_unlock(&elv_list_lock); if (e->icq_cache) kmem_cache_destroy(e->icq_cache); @@ -1066,7 +1073,7 @@ static int __elevator_change(struct request_queue *q, const char *name) return elevator_switch(q, NULL); strlcpy(elevator_name, name, sizeof(elevator_name)); - e = elevator_get(strstrip(elevator_name), true); + e = elevator_get(q, strstrip(elevator_name), true); if (!e) return -EINVAL; @@ -1076,15 +1083,6 @@ static int __elevator_change(struct request_queue *q, const char *name) return 0; } - if (!e->uses_mq && q->mq_ops) { - elevator_put(e); - return -EINVAL; - } - if (e->uses_mq && !q->mq_ops) { - elevator_put(e); - return -EINVAL; - } - return elevator_switch(q, e); } -- GitLab From 5515c5bd3f560e89222cce52ea2f71c71687a2e8 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 12 Jan 2018 11:16:50 +0100 Subject: [PATCH 2227/2269] netfilter: ipset: Fix wraparound in hash:*net* types [ Upstream commit 0b8d9073539e217f79ec1bff65eb205ac796723d ] Fix wraparound bug which could lead to memory exhaustion when adding an x.x.x.x-255.255.255.255 range to any hash:*net* types. Fixes Netfilter's bugzilla id #1212, reported by Thomas Schwark. Fixes: 48596a8ddc46 ("netfilter: ipset: Fix adding an IPv4 range containing more than 2^31 addresses") Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipset/ip_set_hash_ipportnet.c | 26 +++++++-------- net/netfilter/ipset/ip_set_hash_net.c | 9 +++-- net/netfilter/ipset/ip_set_hash_netiface.c | 9 +++-- net/netfilter/ipset/ip_set_hash_netnet.c | 28 ++++++++-------- net/netfilter/ipset/ip_set_hash_netport.c | 19 ++++++----- net/netfilter/ipset/ip_set_hash_netportnet.c | 35 ++++++++++---------- 6 files changed, 63 insertions(+), 63 deletions(-) diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index a2f19b9906e90..543518384aa7e 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -168,7 +168,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, p = 0, port, port_to; - u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2; + u32 ip2_from = 0, ip2_to = 0, ip2; bool with_ports = false; u8 cidr; int ret; @@ -269,22 +269,21 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], ip_set_mask_from_to(ip2_from, ip2_to, e.cidr + 1); } - if (retried) + if (retried) { ip = ntohl(h->next.ip); + p = ntohs(h->next.port); + ip2 = ntohl(h->next.ip2); + } else { + p = port; + ip2 = ip2_from; + } for (; ip <= ip_to; ip++) { e.ip = htonl(ip); - p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) - : port; for (; p <= port_to; p++) { e.port = htons(p); - ip2 = retried && - ip == ntohl(h->next.ip) && - p == ntohs(h->next.port) - ? ntohl(h->next.ip2) : ip2_from; - while (ip2 <= ip2_to) { + do { e.ip2 = htonl(ip2); - ip2_last = ip_set_range_to_cidr(ip2, ip2_to, - &cidr); + ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr); e.cidr = cidr - 1; ret = adtfn(set, &e, &ext, &ext, flags); @@ -292,9 +291,10 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; ret = 0; - ip2 = ip2_last + 1; - } + } while (ip2++ < ip2_to); + ip2 = ip2_from; } + p = port; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 1c67a1761e458..5449e23af13ac 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -143,7 +143,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { .cidr = HOST_MASK }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0, last; + u32 ip = 0, ip_to = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -193,16 +193,15 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], } if (retried) ip = ntohl(h->next.ip); - while (ip <= ip_to) { + do { e.ip = htonl(ip); - last = ip_set_range_to_cidr(ip, ip_to, &e.cidr); + ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; - ip = last + 1; - } + } while (ip++ < ip_to); return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index d417074f1c1a2..f5164c1efce2d 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -200,7 +200,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0, last; + u32 ip = 0, ip_to = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -255,17 +255,16 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - while (ip <= ip_to) { + do { e.ip = htonl(ip); - last = ip_set_range_to_cidr(ip, ip_to, &e.cidr); + ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; - ip = last + 1; - } + } while (ip++ < ip_to); return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 7f9ae2e9645be..5a2b923bd81fc 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -169,8 +169,8 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0, last; - u32 ip2 = 0, ip2_from = 0, ip2_to = 0, last2; + u32 ip = 0, ip_to = 0; + u32 ip2 = 0, ip2_from = 0, ip2_to = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -247,27 +247,27 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); } - if (retried) + if (retried) { ip = ntohl(h->next.ip[0]); + ip2 = ntohl(h->next.ip[1]); + } else { + ip2 = ip2_from; + } - while (ip <= ip_to) { + do { e.ip[0] = htonl(ip); - last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); - ip2 = (retried && - ip == ntohl(h->next.ip[0])) ? ntohl(h->next.ip[1]) - : ip2_from; - while (ip2 <= ip2_to) { + ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); + do { e.ip[1] = htonl(ip2); - last2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]); + ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; - ip2 = last2 + 1; - } - ip = last + 1; - } + } while (ip2++ < ip2_to); + ip2 = ip2_from; + } while (ip++ < ip_to); return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index e6ef382febe46..1a187be9ebc8d 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -161,7 +161,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 port, port_to, p = 0, ip = 0, ip_to = 0, last; + u32 port, port_to, p = 0, ip = 0, ip_to = 0; bool with_ports = false; u8 cidr; int ret; @@ -239,25 +239,26 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], ip_set_mask_from_to(ip, ip_to, e.cidr + 1); } - if (retried) + if (retried) { ip = ntohl(h->next.ip); - while (ip <= ip_to) { + p = ntohs(h->next.port); + } else { + p = port; + } + do { e.ip = htonl(ip); - last = ip_set_range_to_cidr(ip, ip_to, &cidr); + ip = ip_set_range_to_cidr(ip, ip_to, &cidr); e.cidr = cidr - 1; - p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) - : port; for (; p <= port_to; p++) { e.port = htons(p); ret = adtfn(set, &e, &ext, &ext, flags); - if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; } - ip = last + 1; - } + p = port; + } while (ip++ < ip_to); return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 0e6e40c6f6520..613e18e720a44 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -184,8 +184,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netportnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0, ip_last, p = 0, port, port_to; - u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2; + u32 ip = 0, ip_to = 0, p = 0, port, port_to; + u32 ip2_from = 0, ip2_to = 0, ip2; bool with_ports = false; int ret; @@ -288,33 +288,34 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); } - if (retried) + if (retried) { ip = ntohl(h->next.ip[0]); + p = ntohs(h->next.port); + ip2 = ntohl(h->next.ip[1]); + } else { + p = port; + ip2 = ip2_from; + } - while (ip <= ip_to) { + do { e.ip[0] = htonl(ip); - ip_last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); - p = retried && ip == ntohl(h->next.ip[0]) ? ntohs(h->next.port) - : port; + ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); for (; p <= port_to; p++) { e.port = htons(p); - ip2 = (retried && ip == ntohl(h->next.ip[0]) && - p == ntohs(h->next.port)) ? ntohl(h->next.ip[1]) - : ip2_from; - while (ip2 <= ip2_to) { + do { e.ip[1] = htonl(ip2); - ip2_last = ip_set_range_to_cidr(ip2, ip2_to, - &e.cidr[1]); + ip2 = ip_set_range_to_cidr(ip2, ip2_to, + &e.cidr[1]); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; - ip2 = ip2_last + 1; - } + } while (ip2++ < ip2_to); + ip2 = ip2_from; } - ip = ip_last + 1; - } + p = port; + } while (ip++ < ip_to); return ret; } -- GitLab From 603bd4dc73594c8ac39a578d9fae997ea04e3a17 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 26 Mar 2018 16:21:04 +0300 Subject: [PATCH 2228/2269] mac80211: don't WARN on bad WMM parameters from buggy APs [ Upstream commit c470bdc1aaf36669e04ba65faf1092b2d1c6cabe ] Apparently, some APs are buggy enough to send a zeroed WMM IE. Don't WARN on this since this is not caused by a bug on the client's system. This aligns the condition of the WARNING in drv_conf_tx with the validity check in ieee80211_sta_wmm_params. We will now pick the default values whenever we get a zeroed WMM IE. This has been reported here: https://bugzilla.kernel.org/show_bug.cgi?id=199161 Fixes: f409079bb678 ("mac80211: sanity check CW_min/CW_max towards driver") Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/mlme.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 328ac10084e46..75909a744121b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1861,7 +1861,8 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local, params[ac].acm = acm; params[ac].uapsd = uapsd; - if (params[ac].cw_min > params[ac].cw_max) { + if (params->cw_min == 0 || + params[ac].cw_min > params[ac].cw_max) { sdata_info(sdata, "AP has invalid WMM params (CWmin/max=%d/%d for ACI %d), using defaults\n", params[ac].cw_min, params[ac].cw_max, aci); -- GitLab From 8f8a5a9be2b422af6a7ea12a8fe2bdefdab754c5 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Tue, 3 Apr 2018 11:35:22 +0300 Subject: [PATCH 2229/2269] mac80211: Fix condition validating WMM IE [ Upstream commit 911a26484c33e10de6237228ca1d7293548e9f49 ] Commit c470bdc1aaf3 ("mac80211: don't WARN on bad WMM parameters from buggy APs") handled cases where an AP reports a zeroed WMM IE. However, the condition that checks the validity accessed the wrong index in the ieee80211_tx_queue_params array, thus wrongly deducing that the parameters are invalid. Fix it. Fixes: c470bdc1aaf3 ("mac80211: don't WARN on bad WMM parameters from buggy APs") Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 75909a744121b..4c59b5507e7ac 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1861,7 +1861,7 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local, params[ac].acm = acm; params[ac].uapsd = uapsd; - if (params->cw_min == 0 || + if (params[ac].cw_min == 0 || params[ac].cw_min > params[ac].cw_max) { sdata_info(sdata, "AP has invalid WMM params (CWmin/max=%d/%d for ACI %d), using defaults\n", -- GitLab From 12f75e8ad273df5b4e0bdab97d55cc23be1a0d7f Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Tue, 18 Dec 2018 16:04:18 -0500 Subject: [PATCH 2230/2269] IB/hfi1: Remove race conditions in user_sdma send path commit 28a9a9e83ceae2cee25b9af9ad20d53aaa9ab951 upstream Packet queue state is over used to determine SDMA descriptor availablitity and packet queue request state. cpu 0 ret = user_sdma_send_pkts(req, pcount); cpu 0 if (atomic_read(&pq->n_reqs)) cpu 1 IRQ user_sdma_txreq_cb calls pq_update() (state to _INACTIVE) cpu 0 xchg(&pq->state, SDMA_PKT_Q_ACTIVE); At this point pq->n_reqs == 0 and pq->state is incorrectly SDMA_PKT_Q_ACTIVE. The close path will hang waiting for the state to return to _INACTIVE. This can also change the state from _DEFERRED to _ACTIVE. However, this is a mostly benign race. Remove the racy code path. Use n_reqs to determine if a packet queue is active or not. Cc: # 4.14.0> Reviewed-by: Mitko Haralanov Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/user_sdma.c | 24 ++++++++++-------------- drivers/infiniband/hw/hfi1/user_sdma.h | 9 +++++---- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index c14ec04f2a895..cbe5ab26d95bc 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -187,7 +187,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, pq->ctxt = uctxt->ctxt; pq->subctxt = fd->subctxt; pq->n_max_reqs = hfi1_sdma_comp_ring_size; - pq->state = SDMA_PKT_Q_INACTIVE; atomic_set(&pq->n_reqs, 0); init_waitqueue_head(&pq->wait); atomic_set(&pq->n_locked, 0); @@ -276,7 +275,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, /* Wait until all requests have been freed. */ wait_event_interruptible( pq->wait, - (ACCESS_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE)); + !atomic_read(&pq->n_reqs)); kfree(pq->reqs); kfree(pq->req_in_use); kmem_cache_destroy(pq->txreq_cache); @@ -312,6 +311,13 @@ static u8 dlid_to_selector(u16 dlid) return mapping[hash]; } +/** + * hfi1_user_sdma_process_request() - Process and start a user sdma request + * @fd: valid file descriptor + * @iovec: array of io vectors to process + * @dim: overall iovec array size + * @count: number of io vector array entries processed + */ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, struct iovec *iovec, unsigned long dim, unsigned long *count) @@ -560,20 +566,12 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, req->ahg_idx = sdma_ahg_alloc(req->sde); set_comp_state(pq, cq, info.comp_idx, QUEUED, 0); + pq->state = SDMA_PKT_Q_ACTIVE; /* Send the first N packets in the request to buy us some time */ ret = user_sdma_send_pkts(req, pcount); if (unlikely(ret < 0 && ret != -EBUSY)) goto free_req; - /* - * It is possible that the SDMA engine would have processed all the - * submitted packets by the time we get here. Therefore, only set - * packet queue state to ACTIVE if there are still uncompleted - * requests. - */ - if (atomic_read(&pq->n_reqs)) - xchg(&pq->state, SDMA_PKT_Q_ACTIVE); - /* * This is a somewhat blocking send implementation. * The driver will block the caller until all packets of the @@ -1391,10 +1389,8 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq) { - if (atomic_dec_and_test(&pq->n_reqs)) { - xchg(&pq->state, SDMA_PKT_Q_INACTIVE); + if (atomic_dec_and_test(&pq->n_reqs)) wake_up(&pq->wait); - } } static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 5af52334b7dca..2b5326d6db53a 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -94,9 +94,10 @@ #define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */ #define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ -#define SDMA_PKT_Q_INACTIVE BIT(0) -#define SDMA_PKT_Q_ACTIVE BIT(1) -#define SDMA_PKT_Q_DEFERRED BIT(2) +enum pkt_q_sdma_state { + SDMA_PKT_Q_ACTIVE, + SDMA_PKT_Q_DEFERRED, +}; /* * Maximum retry attempts to submit a TX request @@ -124,7 +125,7 @@ struct hfi1_user_sdma_pkt_q { struct user_sdma_request *reqs; unsigned long *req_in_use; struct iowait busy; - unsigned state; + enum pkt_q_sdma_state state; wait_queue_head_t wait; unsigned long unpinned; struct mmu_rb_handler *handler; -- GitLab From 1f972505011c27d09423a46707789192a039b66a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 21 Jun 2018 20:35:26 -0400 Subject: [PATCH 2231/2269] locking/qspinlock: Fix build for anonymous union in older GCC compilers [ Upstream commit 6cc65be4f6f2a7186af8f3e09900787c7912dad2 ] One of my tests compiles the kernel with gcc 4.5.3, and I hit the following build error: include/linux/semaphore.h: In function 'sema_init': include/linux/semaphore.h:35:17: error: unknown field 'val' specified in initializer include/linux/semaphore.h:35:17: warning: missing braces around initializer include/linux/semaphore.h:35:17: warning: (near initialization for '(anonymous).raw_lock..val') I bisected it down to: 625e88be1f41 ("locking/qspinlock: Merge 'struct __qspinlock' into 'struct qspinlock'") ... which makes qspinlock have an anonymous union, which makes initializing it special for older compilers. By adding strategic brackets, it makes the build happy again. Signed-off-by: Steven Rostedt (VMware) Acked-by: Waiman Long Cc: Andrew Morton Cc: Boqun Feng Cc: Linus Torvalds Cc: Peter Zijlstra (Intel) Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Fixes: 625e88be1f41 ("locking/qspinlock: Merge 'struct __qspinlock' into 'struct qspinlock'") Link: http://lkml.kernel.org/r/20180621203526.172ab5c4@vmware.local.home Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- include/asm-generic/qspinlock_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h index 0763f065b975a..d10f1e7d6ba8c 100644 --- a/include/asm-generic/qspinlock_types.h +++ b/include/asm-generic/qspinlock_types.h @@ -63,7 +63,7 @@ typedef struct qspinlock { /* * Initializier */ -#define __ARCH_SPIN_LOCK_UNLOCKED { .val = ATOMIC_INIT(0) } +#define __ARCH_SPIN_LOCK_UNLOCKED { { .val = ATOMIC_INIT(0) } } /* * Bitfields in the atomic value: -- GitLab From 0d2d1629780025966c3fc793c201e387da18f5be Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Fri, 5 Oct 2018 23:22:06 +0300 Subject: [PATCH 2232/2269] mac80211_hwsim: fix module init error paths for netlink [ Upstream commit 05cc09de4c017663a217630682041066f2f9a5cd ] There is no unregister netlink notifier and family on error paths in init_mac80211_hwsim(). Also there is an error path where hwsim_class is not destroyed. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Fixes: 62759361eb49 ("mac80211-hwsim: Provide multicast event for HWSIM_CMD_NEW_RADIO") Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/mac80211_hwsim.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 670224be3c8b4..8f57ca969c9f4 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3472,16 +3472,16 @@ static int __init init_mac80211_hwsim(void) if (err) goto out_unregister_pernet; + err = hwsim_init_netlink(); + if (err) + goto out_unregister_driver; + hwsim_class = class_create(THIS_MODULE, "mac80211_hwsim"); if (IS_ERR(hwsim_class)) { err = PTR_ERR(hwsim_class); - goto out_unregister_driver; + goto out_exit_netlink; } - err = hwsim_init_netlink(); - if (err < 0) - goto out_unregister_driver; - for (i = 0; i < radios; i++) { struct hwsim_new_radio_params param = { 0 }; @@ -3587,6 +3587,8 @@ out_free_mon: free_netdev(hwsim_mon); out_free_radios: mac80211_hwsim_free(); +out_exit_netlink: + hwsim_exit_netlink(); out_unregister_driver: platform_driver_unregister(&mac80211_hwsim_driver); out_unregister_pernet: -- GitLab From ce28c745aafcd5ae6a5a85731c8585e1cf0e3e2e Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 15 Nov 2018 11:05:10 -0800 Subject: [PATCH 2233/2269] Input: hyper-v - fix wakeup from suspend-to-idle [ Upstream commit 10f91c73cc41ceead210a905dbd196398e99c7d2 ] It makes little sense but still possible to put Hyper-V guests into suspend-to-idle state. To wake them up two wakeup sources were registered in the past: hyperv-keyboard and hid-hyperv. However, since commit eed4d47efe95 ("ACPI / sleep: Ignore spurious SCI wakeups from suspend-to-idle") pm_wakeup_event() from these devices is ignored. Switch to pm_wakeup_hard_event() API as these devices are actually the only possible way to wakeup Hyper-V guests. Fixes: eed4d47efe95 (ACPI / sleep: Ignore spurious SCI wakeups from suspend-to-idle) Reviewed-by: Rafael J. Wysocki Acked-by: K. Y. Srinivasan Acked-by: Jiri Kosina Signed-off-by: Vitaly Kuznetsov Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/hid/hid-hyperv.c | 2 +- drivers/input/serio/hyperv-keyboard.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c index 6039f071fab13..5f1de24206ab2 100644 --- a/drivers/hid/hid-hyperv.c +++ b/drivers/hid/hid-hyperv.c @@ -309,7 +309,7 @@ static void mousevsc_on_receive(struct hv_device *device, hid_input_report(input_dev->hid_device, HID_INPUT_REPORT, input_dev->input_buf, len, 1); - pm_wakeup_event(&input_dev->device->device, 0); + pm_wakeup_hard_event(&input_dev->device->device); break; default: diff --git a/drivers/input/serio/hyperv-keyboard.c b/drivers/input/serio/hyperv-keyboard.c index 25151d9214e05..55288a026e4e2 100644 --- a/drivers/input/serio/hyperv-keyboard.c +++ b/drivers/input/serio/hyperv-keyboard.c @@ -177,7 +177,7 @@ static void hv_kbd_on_receive(struct hv_device *hv_dev, * state because the Enter-UP can trigger a wakeup at once. */ if (!(info & IS_BREAK)) - pm_wakeup_event(&hv_dev->device, 0); + pm_wakeup_hard_event(&hv_dev->device); break; -- GitLab From 1e30cdb191150be718bb01d0eb21c790ac24a1c3 Mon Sep 17 00:00:00 2001 From: Fred Herard Date: Tue, 20 Nov 2018 20:22:45 -0500 Subject: [PATCH 2234/2269] scsi: libiscsi: Fix NULL pointer dereference in iscsi_eh_session_reset [ Upstream commit 5db6dd14b31397e8cccaaddab2ff44ebec1acf25 ] This commit addresses NULL pointer dereference in iscsi_eh_session_reset. Reference should not be made to session->leadconn when session->state is set to ISCSI_STATE_TERMINATE. Signed-off-by: Fred Herard Reviewed-by: Konrad Rzeszutek Wilk Reviewed-by: Lee Duncan Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/libiscsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index cf8a15e54d83f..3ff536b350a1c 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -2416,8 +2416,8 @@ int iscsi_eh_session_reset(struct scsi_cmnd *sc) failed: ISCSI_DBG_EH(session, "failing session reset: Could not log back into " - "%s, %s [age %d]\n", session->targetname, - conn->persistent_address, session->age); + "%s [age %d]\n", session->targetname, + session->age); spin_unlock_bh(&session->frwd_lock); mutex_unlock(&session->eh_mutex); return FAILED; -- GitLab From 3a2c2aae1bca2e176bcc93f02ce4cc8fe8bda326 Mon Sep 17 00:00:00 2001 From: Cathy Avery Date: Tue, 27 Nov 2018 14:28:53 -0500 Subject: [PATCH 2235/2269] scsi: vmw_pscsi: Rearrange code to avoid multiple calls to free_irq during unload [ Upstream commit 02f425f811cefcc4d325d7a72272651e622dc97e ] Currently pvscsi_remove calls free_irq more than once as pvscsi_release_resources and __pvscsi_shutdown both call pvscsi_shutdown_intr. This results in a 'Trying to free already-free IRQ' warning and stack trace. To solve the problem pvscsi_shutdown_intr has been moved out of pvscsi_release_resources. Signed-off-by: Cathy Avery Reviewed-by: Ewan D. Milne Reviewed-by: Dan Carpenter Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/vmw_pvscsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c index 0cd947f78b5bf..890b8aaf95e10 100644 --- a/drivers/scsi/vmw_pvscsi.c +++ b/drivers/scsi/vmw_pvscsi.c @@ -1202,8 +1202,6 @@ static void pvscsi_shutdown_intr(struct pvscsi_adapter *adapter) static void pvscsi_release_resources(struct pvscsi_adapter *adapter) { - pvscsi_shutdown_intr(adapter); - if (adapter->workqueue) destroy_workqueue(adapter->workqueue); @@ -1535,6 +1533,7 @@ static int pvscsi_probe(struct pci_dev *pdev, const struct pci_device_id *id) out_reset_adapter: ll_adapter_reset(adapter); out_release_resources: + pvscsi_shutdown_intr(adapter); pvscsi_release_resources(adapter); scsi_host_put(host); out_disable_device: @@ -1543,6 +1542,7 @@ out_disable_device: return error; out_release_resources_and_disable: + pvscsi_shutdown_intr(adapter); pvscsi_release_resources(adapter); goto out_disable_device; } -- GitLab From 81710cedad2fa7ea3221cdf77bcb1d39683f07ac Mon Sep 17 00:00:00 2001 From: YiFei Zhu Date: Thu, 29 Nov 2018 18:12:30 +0100 Subject: [PATCH 2236/2269] x86/earlyprintk/efi: Fix infinite loop on some screen widths [ Upstream commit 79c2206d369b87b19ac29cb47601059b6bf5c291 ] An affected screen resolution is 1366 x 768, which width is not divisible by 8, the default font width. On such screens, when longer lines are earlyprintk'ed, overflow-to-next-line can never trigger, due to the left-most x-coordinate of the next character always less than the screen width. Earlyprintk will infinite loop in trying to print the rest of the string but unable to, due to the line being full. This patch makes the trigger consider the right-most x-coordinate, instead of left-most, as the value to compare against the screen width threshold. Signed-off-by: YiFei Zhu Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Arend van Spriel Cc: Bhupesh Sharma Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Snowberg Cc: Hans de Goede Cc: Joe Perches Cc: Jon Hunter Cc: Julien Thierry Cc: Linus Torvalds Cc: Marc Zyngier Cc: Matt Fleming Cc: Nathan Chancellor Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Sedat Dilek Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20181129171230.18699-12-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/platform/efi/early_printk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/efi/early_printk.c b/arch/x86/platform/efi/early_printk.c index 5fdacb322ceb4..c3e6be110b7d0 100644 --- a/arch/x86/platform/efi/early_printk.c +++ b/arch/x86/platform/efi/early_printk.c @@ -179,7 +179,7 @@ early_efi_write(struct console *con, const char *str, unsigned int num) num--; } - if (efi_x >= si->lfb_width) { + if (efi_x + font->width > si->lfb_width) { efi_x = 0; efi_y += font->height; } -- GitLab From 5094dea5acab748453e75fcb53db6fe9ce629ab7 Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Wed, 3 Oct 2018 16:22:31 -0400 Subject: [PATCH 2237/2269] drm/msm: Grab a vblank reference when waiting for commit_done [ Upstream commit 3b712e43e3876b42b38321ecf790a1f5fe59c834 ] Similar to the atomic helpers, we should enable vblank while we're waiting for the commit to finish. DPU needs this, MDP5 seems to work fine without it. Reviewed-by: Abhinav Kumar Signed-off-by: Sean Paul Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/msm_atomic.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c index 025d454163b04..8f77047a226de 100644 --- a/drivers/gpu/drm/msm/msm_atomic.c +++ b/drivers/gpu/drm/msm/msm_atomic.c @@ -93,7 +93,12 @@ static void msm_atomic_wait_for_commit_done(struct drm_device *dev, if (!new_crtc_state->active) continue; + if (drm_crtc_vblank_get(crtc)) + continue; + kms->funcs->wait_for_crtc_commit_done(kms, crtc); + + drm_crtc_vblank_put(crtc); } } -- GitLab From 32f91e02d0e93535b914bb8ed7111e5004dfdecd Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Fri, 30 Nov 2018 09:47:31 +0000 Subject: [PATCH 2238/2269] ARC: io.h: Implement reads{x}()/writes{x}() [ Upstream commit 10d443431dc2bb733cf7add99b453e3fb9047a2e ] Some ARC CPU's do not support unaligned loads/stores. Currently, generic implementation of reads{b/w/l}()/writes{b/w/l}() is being used with ARC. This can lead to misfunction of some drivers as generic functions do a plain dereference of a pointer that can be unaligned. Let's use {get/put}_unaligned() helpers instead of plain dereference of pointer in order to fix. The helpers allow to get and store data from an unaligned address whilst preserving the CPU internal alignment. According to [1], the use of these helpers are costly in terms of performance so we added an initial check for a buffer already aligned so that the usage of the helpers can be avoided, when possible. [1] Documentation/unaligned-memory-access.txt Cc: Alexey Brodkin Cc: Joao Pinto Cc: David Laight Tested-by: Vitor Soares Signed-off-by: Jose Abreu Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin --- arch/arc/include/asm/io.h | 72 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h index c22b181e8206f..2f39d9b3886e4 100644 --- a/arch/arc/include/asm/io.h +++ b/arch/arc/include/asm/io.h @@ -12,6 +12,7 @@ #include #include #include +#include #ifdef CONFIG_ISA_ARCV2 #include @@ -94,6 +95,42 @@ static inline u32 __raw_readl(const volatile void __iomem *addr) return w; } +/* + * {read,write}s{b,w,l}() repeatedly access the same IO address in + * native endianness in 8-, 16-, 32-bit chunks {into,from} memory, + * @count times + */ +#define __raw_readsx(t,f) \ +static inline void __raw_reads##f(const volatile void __iomem *addr, \ + void *ptr, unsigned int count) \ +{ \ + bool is_aligned = ((unsigned long)ptr % ((t) / 8)) == 0; \ + u##t *buf = ptr; \ + \ + if (!count) \ + return; \ + \ + /* Some ARC CPU's don't support unaligned accesses */ \ + if (is_aligned) { \ + do { \ + u##t x = __raw_read##f(addr); \ + *buf++ = x; \ + } while (--count); \ + } else { \ + do { \ + u##t x = __raw_read##f(addr); \ + put_unaligned(x, buf++); \ + } while (--count); \ + } \ +} + +#define __raw_readsb __raw_readsb +__raw_readsx(8, b) +#define __raw_readsw __raw_readsw +__raw_readsx(16, w) +#define __raw_readsl __raw_readsl +__raw_readsx(32, l) + #define __raw_writeb __raw_writeb static inline void __raw_writeb(u8 b, volatile void __iomem *addr) { @@ -126,6 +163,35 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr) } +#define __raw_writesx(t,f) \ +static inline void __raw_writes##f(volatile void __iomem *addr, \ + const void *ptr, unsigned int count) \ +{ \ + bool is_aligned = ((unsigned long)ptr % ((t) / 8)) == 0; \ + const u##t *buf = ptr; \ + \ + if (!count) \ + return; \ + \ + /* Some ARC CPU's don't support unaligned accesses */ \ + if (is_aligned) { \ + do { \ + __raw_write##f(*buf++, addr); \ + } while (--count); \ + } else { \ + do { \ + __raw_write##f(get_unaligned(buf++), addr); \ + } while (--count); \ + } \ +} + +#define __raw_writesb __raw_writesb +__raw_writesx(8, b) +#define __raw_writesw __raw_writesw +__raw_writesx(16, w) +#define __raw_writesl __raw_writesl +__raw_writesx(32, l) + /* * MMIO can also get buffered/optimized in micro-arch, so barriers needed * Based on ARM model for the typical use case @@ -141,10 +207,16 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr) #define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(); __v; }) #define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(); __v; }) #define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(); __v; }) +#define readsb(p,d,l) ({ __raw_readsb(p,d,l); __iormb(); }) +#define readsw(p,d,l) ({ __raw_readsw(p,d,l); __iormb(); }) +#define readsl(p,d,l) ({ __raw_readsl(p,d,l); __iormb(); }) #define writeb(v,c) ({ __iowmb(); writeb_relaxed(v,c); }) #define writew(v,c) ({ __iowmb(); writew_relaxed(v,c); }) #define writel(v,c) ({ __iowmb(); writel_relaxed(v,c); }) +#define writesb(p,d,l) ({ __iowmb(); __raw_writesb(p,d,l); }) +#define writesw(p,d,l) ({ __iowmb(); __raw_writesw(p,d,l); }) +#define writesl(p,d,l) ({ __iowmb(); __raw_writesl(p,d,l); }) /* * Relaxed API for drivers which can handle barrier ordering themselves -- GitLab From 87c1a07b0e7f7127aa4be68864ef9cf6112e0535 Mon Sep 17 00:00:00 2001 From: Toni Peltonen Date: Tue, 27 Nov 2018 16:56:57 +0200 Subject: [PATCH 2239/2269] bonding: fix 802.3ad state sent to partner when unbinding slave [ Upstream commit 3b5b3a3331d141e8f2a7aaae3a94dfa1e61ecbe4 ] Previously when unbinding a slave the 802.3ad implementation only told partner that the port is not suitable for aggregation by setting the port aggregation state from aggregatable to individual. This is not enough. If the physical layer still stays up and we only unbinded this port from the bond there is nothing in the aggregation status alone to prevent the partner from sending traffic towards us. To ensure that the partner doesn't consider this port at all anymore we should also disable collecting and distributing to signal that this actor is going away. Also clear AD_STATE_SYNCHRONIZATION to ensure partner exits collecting + distributing state. I have tested this behaviour againts Arista EOS switches with mlx5 cards (physical link stays up even when interface is down) and simulated the same situation virtually Linux <-> Linux with two network namespaces running two veth device pairs. In both cases setting aggregation to individual doesn't alone prevent traffic from being to sent towards this port given that the link stays up in partners end. Partner still keeps it's end in collecting + distributing state and continues until timeout is reached. In most cases this means we are losing the traffic partner sends towards our port while we wait for timeout. This is most visible with slow periodic time (LACP rate slow). Other open source implementations like Open VSwitch and libreswitch, and vendor implementations like Arista EOS, seem to disable collecting + distributing to when doing similar port disabling/detaching/removing change. With this patch kernel implementation would behave the same way and ensure partner doesn't consider our actor viable anymore. Signed-off-by: Toni Peltonen Signed-off-by: Jay Vosburgh Acked-by: Jonathan Toppins Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/bonding/bond_3ad.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index f43fb2f958a54..93dfcef8afc4b 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2086,6 +2086,9 @@ void bond_3ad_unbind_slave(struct slave *slave) aggregator->aggregator_identifier); /* Tell the partner that this port is not suitable for aggregation */ + port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION; + port->actor_oper_port_state &= ~AD_STATE_COLLECTING; + port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING; port->actor_oper_port_state &= ~AD_STATE_AGGREGATION; __update_lacpdu_from_port(port); ad_lacpdu_send(port); -- GitLab From 27be23532ed8cdc4dbe872b9b3b16887d3252e17 Mon Sep 17 00:00:00 2001 From: David Miller Date: Wed, 28 Nov 2018 22:33:53 -0800 Subject: [PATCH 2240/2269] bpf: Fix verifier log string check for bad alignment. [ Upstream commit c01ac66b38660f2b507ccd0b75d28e3002d56fbb ] The message got changed a lot time ago. This was responsible for 36 test case failures on sparc64. Fixes: f1174f77b50c ("bpf/verifier: rework value tracking") Signed-off-by: David S. Miller Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/test_verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 041dbbb30ff0a..a0591d06c61bf 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -8070,7 +8070,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, reject_from_alignment = fd_prog < 0 && (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) && - strstr(bpf_vlog, "Unknown alignment."); + strstr(bpf_vlog, "misaligned"); #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS if (reject_from_alignment) { printf("FAIL\nFailed due to alignment despite having efficient unaligned access: '%s'!\n", -- GitLab From 76413fbfc7c1193b9b5b2a0fde8a90003adb32d3 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Tue, 27 Nov 2018 19:31:30 +0000 Subject: [PATCH 2241/2269] nfs: don't dirty kernel pages read by direct-io [ Upstream commit ad3cba223ac02dc769c3bbe88efe277bbb457566 ] When we use direct_IO with an NFS backing store, we can trigger a WARNING in __set_page_dirty(), as below, since we're dirtying the page unnecessarily in nfs_direct_read_completion(). To fix, replicate the logic in commit 53cbf3b157a0 ("fs: direct-io: don't dirtying pages for ITER_BVEC/ITER_KVEC direct read"). Other filesystems that implement direct_IO handle this; most use blockdev_direct_IO(). ceph and cifs have similar logic. mount 127.0.0.1:/export /nfs dd if=/dev/zero of=/nfs/image bs=1M count=200 losetup --direct-io=on -f /nfs/image mkfs.btrfs /dev/loop0 mount -t btrfs /dev/loop0 /mnt/ kernel: WARNING: CPU: 0 PID: 8067 at fs/buffer.c:580 __set_page_dirty+0xaf/0xd0 kernel: Modules linked in: loop(E) nfsv3(E) rpcsec_gss_krb5(E) nfsv4(E) dns_resolver(E) nfs(E) fscache(E) nfsd(E) auth_rpcgss(E) nfs_acl(E) lockd(E) grace(E) fuse(E) tun(E) ip6t_rpfilter(E) ipt_REJECT(E) nf_ kernel: snd_seq(E) snd_seq_device(E) snd_pcm(E) video(E) snd_timer(E) snd(E) soundcore(E) ip_tables(E) xfs(E) libcrc32c(E) sd_mod(E) sr_mod(E) cdrom(E) ata_generic(E) pata_acpi(E) crc32c_intel(E) ahci(E) li kernel: CPU: 0 PID: 8067 Comm: kworker/0:2 Tainted: G E 4.20.0-rc1.master.20181111.ol7.x86_64 #1 kernel: Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 kernel: Workqueue: nfsiod rpc_async_release [sunrpc] kernel: RIP: 0010:__set_page_dirty+0xaf/0xd0 kernel: Code: c3 48 8b 02 f6 c4 04 74 d4 48 89 df e8 ba 05 f7 ff 48 89 c6 eb cb 48 8b 43 08 a8 01 75 1f 48 89 d8 48 8b 00 a8 04 74 02 eb 87 <0f> 0b eb 83 48 83 e8 01 eb 9f 48 83 ea 01 0f 1f 00 eb 8b 48 83 e8 kernel: RSP: 0000:ffffc1c8825b7d78 EFLAGS: 00013046 kernel: RAX: 000fffffc0020089 RBX: fffff2b603308b80 RCX: 0000000000000001 kernel: RDX: 0000000000000001 RSI: ffff9d11478115c8 RDI: ffff9d11478115d0 kernel: RBP: ffffc1c8825b7da0 R08: 0000646f6973666e R09: 8080808080808080 kernel: R10: 0000000000000001 R11: 0000000000000000 R12: ffff9d11478115d0 kernel: R13: ffff9d11478115c8 R14: 0000000000003246 R15: 0000000000000001 kernel: FS: 0000000000000000(0000) GS:ffff9d115ba00000(0000) knlGS:0000000000000000 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 kernel: CR2: 00007f408686f640 CR3: 0000000104d8e004 CR4: 00000000000606f0 kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 kernel: DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 kernel: Call Trace: kernel: __set_page_dirty_buffers+0xb6/0x110 kernel: set_page_dirty+0x52/0xb0 kernel: nfs_direct_read_completion+0xc4/0x120 [nfs] kernel: nfs_pgio_release+0x10/0x20 [nfs] kernel: rpc_free_task+0x30/0x70 [sunrpc] kernel: rpc_async_release+0x12/0x20 [sunrpc] kernel: process_one_work+0x174/0x390 kernel: worker_thread+0x4f/0x3e0 kernel: kthread+0x102/0x140 kernel: ? drain_workqueue+0x130/0x130 kernel: ? kthread_stop+0x110/0x110 kernel: ret_from_fork+0x35/0x40 kernel: ---[ end trace 01341980905412c9 ]--- Signed-off-by: Dave Kleikamp Signed-off-by: Santosh Shilimkar [forward-ported to v4.20] Signed-off-by: Calum Mackay Reviewed-by: Dave Kleikamp Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/direct.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 621c517b325c6..89c03a507dd9d 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -98,8 +98,11 @@ struct nfs_direct_req { struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */ struct work_struct work; int flags; + /* for write */ #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ #define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ + /* for read */ +#define NFS_ODIRECT_SHOULD_DIRTY (3) /* dirty user-space page after read */ struct nfs_writeverf verf; /* unstable write verifier */ }; @@ -412,7 +415,8 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) struct nfs_page *req = nfs_list_entry(hdr->pages.next); struct page *page = req->wb_page; - if (!PageCompound(page) && bytes < hdr->good_bytes) + if (!PageCompound(page) && bytes < hdr->good_bytes && + (dreq->flags == NFS_ODIRECT_SHOULD_DIRTY)) set_page_dirty(page); bytes += req->wb_bytes; nfs_list_remove_request(req); @@ -587,6 +591,9 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; + if (iter_is_iovec(iter)) + dreq->flags = NFS_ODIRECT_SHOULD_DIRTY; + nfs_start_io_direct(inode); NFS_I(inode)->read_io += count; -- GitLab From c3f68415c9871e4828255ffb52b01ed4fe28dca1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 1 Dec 2018 23:18:00 -0500 Subject: [PATCH 2242/2269] SUNRPC: Fix a potential race in xprt_connect() [ Upstream commit 0a9a4304f3614e25d9de9b63502ca633c01c0d70 ] If an asynchronous connection attempt completes while another task is in xprt_connect(), then the call to rpc_sleep_on() could end up racing with the call to xprt_wake_pending_tasks(). So add a second test of the connection state after we've put the task to sleep and set the XPRT_CONNECTING flag, when we know that there can be no asynchronous connection attempts still in progress. Fixes: 0b9e79431377d ("SUNRPC: Move the test for XPRT_CONNECTING into...") Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- net/sunrpc/xprt.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 8eb0c4f3b3e96..d0282cc88b145 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -780,8 +780,15 @@ void xprt_connect(struct rpc_task *task) return; if (xprt_test_and_set_connecting(xprt)) return; - xprt->stat.connect_start = jiffies; - xprt->ops->connect(xprt, task); + /* Race breaker */ + if (!xprt_connected(xprt)) { + xprt->stat.connect_start = jiffies; + xprt->ops->connect(xprt, task); + } else { + xprt_clear_connecting(xprt); + task->tk_status = 0; + rpc_wake_up_queued_task(&xprt->pending, task); + } } xprt_release_write(xprt, task); } -- GitLab From 923e09c74a9ca2761d3fa5f2c31b497360138244 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 20 Nov 2018 08:30:40 -0500 Subject: [PATCH 2243/2269] sbus: char: add of_node_put() [ Upstream commit 87d81a23e24f24ebe014891e8bdf3ff8785031e8 ] use of_node_put() to release the refcount. Signed-off-by: Yangtao Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/sbus/char/display7seg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/sbus/char/display7seg.c b/drivers/sbus/char/display7seg.c index f32765d3cbd89..db761aca86676 100644 --- a/drivers/sbus/char/display7seg.c +++ b/drivers/sbus/char/display7seg.c @@ -221,6 +221,7 @@ static int d7s_probe(struct platform_device *op) dev_set_drvdata(&op->dev, p); d7s_device = p; err = 0; + of_node_put(opts); out: return err; -- GitLab From 6b37106bc20ab5265b113a67563d4fb0b5191153 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 20 Nov 2018 08:38:26 -0500 Subject: [PATCH 2244/2269] drivers/sbus/char: add of_node_put() [ Upstream commit 6bd520ab7cf69486ea81fd3cdfd2d5a390ad1100 ] use of_node_put() to release the refcount. Signed-off-by: Yangtao Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/sbus/char/envctrl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/sbus/char/envctrl.c b/drivers/sbus/char/envctrl.c index 56e962a014939..b8481927bfe40 100644 --- a/drivers/sbus/char/envctrl.c +++ b/drivers/sbus/char/envctrl.c @@ -910,8 +910,10 @@ static void envctrl_init_i2c_child(struct device_node *dp, for (len = 0; len < PCF8584_MAX_CHANNELS; ++len) { pchild->mon_type[len] = ENVCTRL_NOMON; } + of_node_put(root_node); return; } + of_node_put(root_node); } /* Get the monitor channels. */ -- GitLab From a19e57563ce7416727c7b318162972d780334020 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 21 Nov 2018 10:22:54 -0500 Subject: [PATCH 2245/2269] drivers/tty: add missing of_node_put() [ Upstream commit dac097c4546e4c5b16dd303a1e97c1d319c8ab3e ] of_find_node_by_path() acquires a reference to the node returned by it and that reference needs to be dropped by its caller. This place is not doing this, so fix it. Signed-off-by: Yangtao Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/tty/serial/suncore.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/suncore.c b/drivers/tty/serial/suncore.c index 127472bd6a7cf..209f314745ab4 100644 --- a/drivers/tty/serial/suncore.c +++ b/drivers/tty/serial/suncore.c @@ -111,6 +111,7 @@ void sunserial_console_termios(struct console *con, struct device_node *uart_dp) mode = of_get_property(dp, mode_prop, NULL); if (!mode) mode = "9600,8,n,1,-"; + of_node_put(dp); } cflag = CREAD | HUPCL | CLOCAL; -- GitLab From 8b9860ad1efa031be8b37bbd60cdb699d0fdb64c Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 20 Nov 2018 08:02:49 -0500 Subject: [PATCH 2246/2269] ide: pmac: add of_node_put() [ Upstream commit a51921c0db3fd26c4ed83dc0ec5d32988fa02aa5 ] use of_node_put() to release the refcount. Signed-off-by: Yangtao Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/ide/pmac.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c index c5b902b86b444..203ed4adc04ae 100644 --- a/drivers/ide/pmac.c +++ b/drivers/ide/pmac.c @@ -920,6 +920,7 @@ static u8 pmac_ide_cable_detect(ide_hwif_t *hwif) struct device_node *root = of_find_node_by_path("/"); const char *model = of_get_property(root, "model", NULL); + of_node_put(root); /* Get cable type from device-tree. */ if (cable && !strncmp(cable, "80-", 3)) { /* Some drives fail to detect 80c cable in PowerBook */ -- GitLab From 308d65a18461c90b2e4e14dac55478376382a5d3 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Thu, 29 Nov 2018 14:01:50 +0800 Subject: [PATCH 2247/2269] drm/msm: Fix error return checking [ Upstream commit 098336deb946f37a70afc0979af388b615c378bf ] The error checks on ret for a negative error return always fails because the return value of iommu_map_sg() is unsigned and can never be negative. Detected with Coccinelle: drivers/gpu/drm/msm/msm_iommu.c:69:9-12: WARNING: Unsigned expression compared with zero: ret < 0 Signed-off-by: Wen Yang CC: Rob Clark CC: David Airlie CC: Julia Lawall CC: linux-arm-msm@vger.kernel.org CC: dri-devel@lists.freedesktop.org CC: freedreno@lists.freedesktop.org CC: linux-kernel@vger.kernel.org Signed-off-by: Sean Paul Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/msm_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index b23d33622f374..2a90aa4caec08 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -66,7 +66,7 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova, // pm_runtime_get_sync(mmu->dev); ret = iommu_map_sg(iommu->domain, iova, sgt->sgl, sgt->nents, prot); // pm_runtime_put_sync(mmu->dev); - WARN_ON(ret < 0); + WARN_ON(!ret); return (ret == len) ? 0 : -EINVAL; } -- GitLab From d5f2432339ba2827d75f682887eac604afb06535 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 3 Dec 2018 17:50:55 +0300 Subject: [PATCH 2248/2269] clk: mvebu: Off by one bugs in cp110_of_clk_get() [ Upstream commit d9f5b7f5dd0fa74a89de5a7ac1e26366f211ccee ] These > comparisons should be >= to prevent reading beyond the end of of the clk_data->hws[] buffer. The clk_data->hws[] array is allocated in cp110_syscon_common_probe() when we do: cp110_clk_data = devm_kzalloc(dev, sizeof(*cp110_clk_data) + sizeof(struct clk_hw *) * CP110_CLK_NUM, GFP_KERNEL); As you can see, it has CP110_CLK_NUM elements which is equivalent to CP110_MAX_CORE_CLOCKS + CP110_MAX_GATABLE_CLOCKS. Fixes: d3da3eaef7f4 ("clk: mvebu: new driver for Armada CP110 system controller") Signed-off-by: Dan Carpenter Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/mvebu/cp110-system-controller.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/mvebu/cp110-system-controller.c b/drivers/clk/mvebu/cp110-system-controller.c index ca9a0a5361747..05c127cafa468 100644 --- a/drivers/clk/mvebu/cp110-system-controller.c +++ b/drivers/clk/mvebu/cp110-system-controller.c @@ -203,11 +203,11 @@ static struct clk_hw *cp110_of_clk_get(struct of_phandle_args *clkspec, unsigned int idx = clkspec->args[1]; if (type == CP110_CLK_TYPE_CORE) { - if (idx > CP110_MAX_CORE_CLOCKS) + if (idx >= CP110_MAX_CORE_CLOCKS) return ERR_PTR(-EINVAL); return clk_data->hws[idx]; } else if (type == CP110_CLK_TYPE_GATABLE) { - if (idx > CP110_MAX_GATABLE_CLOCKS) + if (idx >= CP110_MAX_GATABLE_CLOCKS) return ERR_PTR(-EINVAL); return clk_data->hws[CP110_MAX_CORE_CLOCKS + idx]; } -- GitLab From 2f74717d0237cde6fa63a70be9de7b275e2e8cd0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 3 Dec 2018 17:51:43 +0300 Subject: [PATCH 2249/2269] clk: mmp: Off by one in mmp_clk_add() [ Upstream commit 2e85c57493e391b93445c1e0d530b36b95becc64 ] The > comparison should be >= or we write one element beyond the end of the unit->clk_table[] array. (The unit->clk_table[] array is allocated in the mmp_clk_init() function and it has unit->nr_clks elements). Fixes: 4661fda10f8b ("clk: mmp: add basic support functions for DT support") Signed-off-by: Dan Carpenter Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/mmp/clk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/mmp/clk.c b/drivers/clk/mmp/clk.c index ad8d483a35cd5..ca7d37e2c7be6 100644 --- a/drivers/clk/mmp/clk.c +++ b/drivers/clk/mmp/clk.c @@ -183,7 +183,7 @@ void mmp_clk_add(struct mmp_clk_unit *unit, unsigned int id, pr_err("CLK %d has invalid pointer %p\n", id, clk); return; } - if (id > unit->nr_clks) { + if (id >= unit->nr_clks) { pr_err("CLK %d is invalid\n", id); return; } -- GitLab From 2dc84c590cea6b1852e866132edb0079d477dbf6 Mon Sep 17 00:00:00 2001 From: Teika Kazura Date: Mon, 3 Dec 2018 11:26:03 -0800 Subject: [PATCH 2250/2269] Input: synaptics - enable SMBus for HP 15-ay000 [ Upstream commit 5a6dab15f7a79817cab4af612ddd99eda793fce6 ] SMBus works fine for the touchpad with id SYN3221, used in the HP 15-ay000 series, This device has been reported in these messages in the "linux-input" mailing list: * https://marc.info/?l=linux-input&m=152016683003369&w=2 * https://www.spinics.net/lists/linux-input/msg52525.html Reported-by: Nitesh Debnath Reported-by: Teika Kazura Signed-off-by: Teika Kazura Reviewed-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 65c9095eb5177..54f0d037b5b69 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -178,6 +178,7 @@ static const char * const smbus_pnp_ids[] = { "LEN0096", /* X280 */ "LEN0097", /* X280 -> ALPS trackpoint */ "LEN200f", /* T450s */ + "SYN3221", /* HP 15-ay000 */ NULL }; -- GitLab From 647492ad7341cc76a9ebafeb67cea1664f674cca Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 3 Dec 2018 11:24:30 -0800 Subject: [PATCH 2251/2269] Input: omap-keypad - fix keyboard debounce configuration [ Upstream commit 6c3516fed7b61a3527459ccfa67fab130d910610 ] I noticed that the Android v3.0.8 kernel on droid4 is using different keypad values from the mainline kernel and does not have issues with keys occasionally being stuck until pressed again. Turns out there was an earlier patch posted to fix this as "Input: omap-keypad: errata i689: Correct debounce time", but it was never reposted to fix use macros for timing calculations. This updated version is using macros, and also fixes the use of the input clock rate to use 32768KiHz instead of 32000KiHz. And we want to use the known good Android kernel values of 3 and 6 instead of 2 and 6 in the earlier patch. Reported-by: Pavel Machek Signed-off-by: Tony Lindgren Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/keyboard/omap4-keypad.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/input/keyboard/omap4-keypad.c b/drivers/input/keyboard/omap4-keypad.c index 940d38b08e6b5..ce8e2baf31bbb 100644 --- a/drivers/input/keyboard/omap4-keypad.c +++ b/drivers/input/keyboard/omap4-keypad.c @@ -60,8 +60,18 @@ /* OMAP4 values */ #define OMAP4_VAL_IRQDISABLE 0x0 -#define OMAP4_VAL_DEBOUNCINGTIME 0x7 -#define OMAP4_VAL_PVT 0x7 + +/* + * Errata i689: If a key is released for a time shorter than debounce time, + * the keyboard will idle and never detect the key release. The workaround + * is to use at least a 12ms debounce time. See omap5432 TRM chapter + * "26.4.6.2 Keyboard Controller Timer" for more information. + */ +#define OMAP4_KEYPAD_PTV_DIV_128 0x6 +#define OMAP4_KEYPAD_DEBOUNCINGTIME_MS(dbms, ptv) \ + ((((dbms) * 1000) / ((1 << ((ptv) + 1)) * (1000000 / 32768))) - 1) +#define OMAP4_VAL_DEBOUNCINGTIME_16MS \ + OMAP4_KEYPAD_DEBOUNCINGTIME_MS(16, OMAP4_KEYPAD_PTV_DIV_128) enum { KBD_REVISION_OMAP4 = 0, @@ -181,9 +191,9 @@ static int omap4_keypad_open(struct input_dev *input) kbd_writel(keypad_data, OMAP4_KBD_CTRL, OMAP4_DEF_CTRL_NOSOFTMODE | - (OMAP4_VAL_PVT << OMAP4_DEF_CTRL_PTV_SHIFT)); + (OMAP4_KEYPAD_PTV_DIV_128 << OMAP4_DEF_CTRL_PTV_SHIFT)); kbd_writel(keypad_data, OMAP4_KBD_DEBOUNCINGTIME, - OMAP4_VAL_DEBOUNCINGTIME); + OMAP4_VAL_DEBOUNCINGTIME_16MS); /* clear pending interrupts */ kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS, kbd_read_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS)); -- GitLab From 4b427e57f4f6eb3ad1c02d3faa3d551336f2fd59 Mon Sep 17 00:00:00 2001 From: Juha-Matti Tilli Date: Sun, 2 Dec 2018 12:47:08 +0200 Subject: [PATCH 2252/2269] libata: whitelist all SAMSUNG MZ7KM* solid-state disks [ Upstream commit fd6f32f78645db32b6b95a42e45da2ddd6de0e67 ] These devices support read zero after trim (RZAT), as they advertise to the OS. However, the OS doesn't believe the SSDs unless they are explicitly whitelisted. Acked-by: Martin K. Petersen Signed-off-by: Juha-Matti Tilli Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/ata/libata-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 6938bd86ff1c8..04f406d7e9734 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4593,6 +4593,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "SSD*INTEL*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Samsung*SSD*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "SAMSUNG*SSD*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "SAMSUNG*MZ7KM*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "ST[1248][0248]0[FH]*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, /* -- GitLab From 1a130bc711d3931567575c0449cdd255859c1e58 Mon Sep 17 00:00:00 2001 From: Anderson Luiz Alves Date: Fri, 30 Nov 2018 21:58:36 -0200 Subject: [PATCH 2253/2269] mv88e6060: disable hardware level MAC learning [ Upstream commit a74515604a7b171f2702bdcbd1e231225fb456d0 ] Disable hardware level MAC learning because it breaks station roaming. When enabled it drops all frames that arrive from a MAC address that is on a different port at learning table. Signed-off-by: Anderson Luiz Alves Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/mv88e6060.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index f123ed57630d5..86b41840f41ad 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -114,8 +114,7 @@ static int mv88e6060_switch_reset(struct dsa_switch *ds) /* Reset the switch. */ REG_WRITE(REG_GLOBAL, GLOBAL_ATU_CONTROL, GLOBAL_ATU_CONTROL_SWRESET | - GLOBAL_ATU_CONTROL_ATUSIZE_1024 | - GLOBAL_ATU_CONTROL_ATE_AGE_5MIN); + GLOBAL_ATU_CONTROL_LEARNDIS); /* Wait up to one second for reset to complete. */ timeout = jiffies + 1 * HZ; @@ -140,13 +139,10 @@ static int mv88e6060_setup_global(struct dsa_switch *ds) */ REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, GLOBAL_CONTROL_MAX_FRAME_1536); - /* Enable automatic address learning, set the address - * database size to 1024 entries, and set the default aging - * time to 5 minutes. + /* Disable automatic address learning. */ REG_WRITE(REG_GLOBAL, GLOBAL_ATU_CONTROL, - GLOBAL_ATU_CONTROL_ATUSIZE_1024 | - GLOBAL_ATU_CONTROL_ATE_AGE_5MIN); + GLOBAL_ATU_CONTROL_LEARNDIS); return 0; } -- GitLab From e7c36edac701bad12b54f182de7b63b137307e62 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sun, 2 Dec 2018 14:34:37 +0200 Subject: [PATCH 2254/2269] net/mlx4_en: Fix build break when CONFIG_INET is off MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1b603f9e4313348608f256b564ed6e3d9e67f377 ] MLX4_EN depends on NETDEVICES, ETHERNET and INET Kconfigs. Make sure they are listed in MLX4_EN Kconfig dependencies. This fixes the following build break: drivers/net/ethernet/mellanox/mlx4/en_rx.c:582:18: warning: ‘struct iphdr’ declared inside parameter list [enabled by default] struct iphdr *iph) ^ drivers/net/ethernet/mellanox/mlx4/en_rx.c:582:18: warning: its scope is only this definition or declaration, which is probably not what you want [enabled by default] drivers/net/ethernet/mellanox/mlx4/en_rx.c: In function ‘get_fixed_ipv4_csum’: drivers/net/ethernet/mellanox/mlx4/en_rx.c:586:20: error: dereferencing pointer to incomplete type _u8 ipproto = iph->protocol; Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx4/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig index 22b1cc012bc92..c1a39be0dbe7a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig @@ -5,7 +5,7 @@ config MLX4_EN tristate "Mellanox Technologies 1/10/40Gbit Ethernet support" depends on MAY_USE_DEVLINK - depends on PCI + depends on PCI && NETDEVICES && ETHERNET && INET select MLX4_CORE imply PTP_1588_CLOCK ---help--- -- GitLab From 394f9a113b4c42672c24ce03697712643dc87dee Mon Sep 17 00:00:00 2001 From: Chris Cole Date: Fri, 23 Nov 2018 12:20:45 +0100 Subject: [PATCH 2255/2269] ARM: 8814/1: mm: improve/fix ARM v7_dma_inv_range() unaligned address handling [ Upstream commit a1208f6a822ac29933e772ef1f637c5d67838da9 ] This patch addresses possible memory corruption when v7_dma_inv_range(start_address, end_address) address parameters are not aligned to whole cache lines. This function issues "invalidate" cache management operations to all cache lines from start_address (inclusive) to end_address (exclusive). When start_address and/or end_address are not aligned, the start and/or end cache lines are first issued "clean & invalidate" operation. The assumption is this is done to ensure that any dirty data addresses outside the address range (but part of the first or last cache lines) are cleaned/flushed so that data is not lost, which could happen if just an invalidate is issued. The problem is that these first/last partial cache lines are issued "clean & invalidate" and then "invalidate". This second "invalidate" is not required and worse can cause "lost" writes to addresses outside the address range but part of the cache line. If another component writes to its part of the cache line between the "clean & invalidate" and "invalidate" operations, the write can get lost. This fix is to remove the extra "invalidate" operation when unaligned addressed are used. A kernel module is available that has a stress test to reproduce the issue and a unit test of the updated v7_dma_inv_range(). It can be downloaded from http://ftp.sageembedded.com/outgoing/linux/cache-test-20181107.tgz. v7_dma_inv_range() is call by dmac_[un]map_area(addr, len, direction) when the direction is DMA_FROM_DEVICE. One can (I believe) successfully argue that DMA from a device to main memory should use buffers aligned to cache line size, because the "clean & invalidate" might overwrite data that the device just wrote using DMA. But if a driver does use unaligned buffers, at least this fix will prevent memory corruption outside the buffer. Signed-off-by: Chris Cole Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/mm/cache-v7.S | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index de78109d002db..50a70edbc8635 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -359,14 +359,16 @@ v7_dma_inv_range: ALT_UP(W(nop)) #endif mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line + addne r0, r0, r2 tst r1, r3 bic r1, r1, r3 mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D / U line -1: - mcr p15, 0, r0, c7, c6, 1 @ invalidate D / U line - add r0, r0, r2 cmp r0, r1 +1: + mcrlo p15, 0, r0, c7, c6, 1 @ invalidate D / U line + addlo r0, r0, r2 + cmplo r0, r1 blo 1b dsb st ret lr -- GitLab From a2cc1b2d1954ec291ef61aa6976ae10467895f8d Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Fri, 23 Nov 2018 12:25:21 +0100 Subject: [PATCH 2256/2269] ARM: 8815/1: V7M: align v7m_dma_inv_range() with v7 counterpart [ Upstream commit 3d0358d0ba048c5afb1385787aaec8fa5ad78fcc ] Chris has discovered and reported that v7_dma_inv_range() may corrupt memory if address range is not aligned to cache line size. Since the whole cache-v7m.S was lifted form cache-v7.S the same observation applies to v7m_dma_inv_range(). So the fix just mirrors what has been done for v7 with a little specific of M-class. Cc: Chris Cole Signed-off-by: Vladimir Murzin Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/mm/cache-v7m.S | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/arm/mm/cache-v7m.S b/arch/arm/mm/cache-v7m.S index 788486e830d3e..32aa2a2aa260c 100644 --- a/arch/arm/mm/cache-v7m.S +++ b/arch/arm/mm/cache-v7m.S @@ -73,9 +73,11 @@ /* * dcimvac: Invalidate data cache line by MVA to PoC */ -.macro dcimvac, rt, tmp - v7m_cacheop \rt, \tmp, V7M_SCB_DCIMVAC +.irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo +.macro dcimvac\c, rt, tmp + v7m_cacheop \rt, \tmp, V7M_SCB_DCIMVAC, \c .endm +.endr /* * dccmvau: Clean data cache line by MVA to PoU @@ -369,14 +371,16 @@ v7m_dma_inv_range: tst r0, r3 bic r0, r0, r3 dccimvacne r0, r3 + addne r0, r0, r2 subne r3, r2, #1 @ restore r3, corrupted by v7m's dccimvac tst r1, r3 bic r1, r1, r3 dccimvacne r1, r3 -1: - dcimvac r0, r3 - add r0, r0, r2 cmp r0, r1 +1: + dcimvaclo r0, r3 + addlo r0, r0, r2 + cmplo r0, r1 blo 1b dsb st ret lr -- GitLab From d126d4bf34807707445c8716d9d7c3584a478cfe Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Mon, 3 Dec 2018 13:21:01 +0100 Subject: [PATCH 2257/2269] ethernet: fman: fix wrong of_node_put() in probe function [ Upstream commit ecb239d96d369c23c33d41708646df646de669f4 ] After getting a reference to the platform device's of_node the probe function ends up calling of_find_matching_node() using the node as an argument. The function takes care of decreasing the refcount on it. We are then incorrectly decreasing the refcount on that node again. This patch removes the unwarranted call to of_node_put(). Fixes: 414fd46e7762 ("fsl/fman: Add FMan support") Signed-off-by: Nicolas Saenz Julienne Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/fman/fman.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c index 9530405030a70..97425d94e280d 100644 --- a/drivers/net/ethernet/freescale/fman/fman.c +++ b/drivers/net/ethernet/freescale/fman/fman.c @@ -2786,7 +2786,7 @@ static struct fman *read_dts_node(struct platform_device *of_dev) if (!muram_node) { dev_err(&of_dev->dev, "%s: could not find MURAM node\n", __func__); - goto fman_node_put; + goto fman_free; } err = of_address_to_resource(muram_node, 0, @@ -2795,11 +2795,10 @@ static struct fman *read_dts_node(struct platform_device *of_dev) of_node_put(muram_node); dev_err(&of_dev->dev, "%s: of_address_to_resource() = %d\n", __func__, err); - goto fman_node_put; + goto fman_free; } of_node_put(muram_node); - of_node_put(fm_node); err = devm_request_irq(&of_dev->dev, irq, fman_irq, 0, "fman", fman); if (err < 0) { -- GitLab From 938199486ba9bc203fd312a6e02f7edb1640b2b4 Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Mon, 3 Dec 2018 11:53:21 +1100 Subject: [PATCH 2258/2269] drm/ast: Fix connector leak during driver unload [ Upstream commit e594a5e349ddbfdaca1951bb3f8d72f3f1660d73 ] When unloading the ast driver, a warning message is printed by drm_mode_config_cleanup() because a reference is still held to one of the drm_connector structs. Correct this by calling drm_crtc_force_disable_all() in ast_fbdev_destroy(). Signed-off-by: Sam Bobroff Reviewed-by: Daniel Vetter Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/1e613f3c630c7bbc72e04a44b178259b9164d2f6.1543798395.git.sbobroff@linux.ibm.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/ast/ast_fb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/ast/ast_fb.c b/drivers/gpu/drm/ast/ast_fb.c index 0cd827e11fa20..de26df0c6044d 100644 --- a/drivers/gpu/drm/ast/ast_fb.c +++ b/drivers/gpu/drm/ast/ast_fb.c @@ -263,6 +263,7 @@ static void ast_fbdev_destroy(struct drm_device *dev, { struct ast_framebuffer *afb = &afbdev->afb; + drm_crtc_force_disable_all(dev); drm_fb_helper_unregister_fbi(&afbdev->helper); if (afb->obj) { -- GitLab From ab9697222a1da17b6a4e909fab554b6ae5691789 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 3 Nov 2018 15:02:44 -0500 Subject: [PATCH 2259/2269] cifs: In Kconfig CONFIG_CIFS_POSIX needs depends on legacy (insecure cifs) [ Upstream commit 6e785302dad32228819d8066e5376acd15d0e6ba ] Missing a dependency. Shouldn't show cifs posix extensions in Kconfig if CONFIG_CIFS_ALLOW_INSECURE_DIALECTS (ie SMB1 protocol) is disabled. Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Signed-off-by: Sasha Levin --- fs/cifs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index cb0f1fbe836dc..7b95e7971d189 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -121,7 +121,7 @@ config CIFS_XATTR config CIFS_POSIX bool "CIFS POSIX Extensions" - depends on CIFS_XATTR + depends on CIFS && CIFS_ALLOW_INSECURE_LEGACY && CIFS_XATTR help Enabling this option will cause the cifs client to attempt to negotiate a newer dialect with servers, such as Samba 3.0.5 -- GitLab From 7e43eec4b4587716505fcf388879e4191331e6fa Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 6 Dec 2018 19:14:34 +0000 Subject: [PATCH 2260/2269] vhost/vsock: fix reset orphans race with close timeout [ Upstream commit c38f57da428b033f2721b611d84b1f40bde674a8 ] If a local process has closed a connected socket and hasn't received a RST packet yet, then the socket remains in the table until a timeout expires. When a vhost_vsock instance is released with the timeout still pending, the socket is never freed because vhost_vsock has already set the SOCK_DONE flag. Check if the close timer is pending and let it close the socket. This prevents the race which can leak sockets. Reported-by: Maximilian Riemensberger Cc: Graham Whaley Signed-off-by: Stefan Hajnoczi Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/vhost/vsock.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index b044a08008050..248533c0f9ac8 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -561,13 +561,21 @@ static void vhost_vsock_reset_orphans(struct sock *sk) * executing. */ - if (!vhost_vsock_get(vsk->remote_addr.svm_cid)) { - sock_set_flag(sk, SOCK_DONE); - vsk->peer_shutdown = SHUTDOWN_MASK; - sk->sk_state = SS_UNCONNECTED; - sk->sk_err = ECONNRESET; - sk->sk_error_report(sk); - } + /* If the peer is still valid, no need to reset connection */ + if (vhost_vsock_get(vsk->remote_addr.svm_cid)) + return; + + /* If the close timeout is pending, let it expire. This avoids races + * with the timeout callback. + */ + if (vsk->close_work_scheduled) + return; + + sock_set_flag(sk, SOCK_DONE); + vsk->peer_shutdown = SHUTDOWN_MASK; + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = ECONNRESET; + sk->sk_error_report(sk); } static int vhost_vsock_dev_release(struct inode *inode, struct file *file) -- GitLab From 73c821e378e86d9152bb53d9e261ac128b05e582 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 6 Dec 2018 17:44:53 +0000 Subject: [PATCH 2261/2269] mlxsw: spectrum_switchdev: Fix VLAN device deletion via ioctl [ Upstream commit 993107fea5eefdfdfde1ca38d3f01f0bebf76e77 ] When deleting a VLAN device using an ioctl the netdev is unregistered before the VLAN filter is updated via ndo_vlan_rx_kill_vid(). It can lead to a use-after-free in mlxsw in case the VLAN device is deleted while being enslaved to a bridge. The reason for the above is that when mlxsw receives the CHANGEUPPER event, it wrongly assumes that the VLAN device is no longer its upper and thus destroys the internal representation of the bridge port despite the reference count being non-zero. Fix this by checking if the VLAN device is our upper using its real device. In net-next I'm going to remove this trick and instead make mlxsw completely agnostic to the order of the events. Fixes: c57529e1d5d8 ("mlxsw: spectrum: Replace vPorts with Port-VLAN") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 32c25772f7558..21611613f44c7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -295,7 +295,13 @@ static bool mlxsw_sp_bridge_port_should_destroy(const struct mlxsw_sp_bridge_port * bridge_port) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_port->dev); + struct net_device *dev = bridge_port->dev; + struct mlxsw_sp *mlxsw_sp; + + if (is_vlan_dev(dev)) + mlxsw_sp = mlxsw_sp_lower_get(vlan_dev_real_dev(dev)); + else + mlxsw_sp = mlxsw_sp_lower_get(dev); /* In case ports were pulled from out of a bridged LAG, then * it's possible the reference count isn't zero, yet the bridge @@ -1646,7 +1652,7 @@ mlxsw_sp_bridge_8021d_port_leave(struct mlxsw_sp_bridge_device *bridge_device, u16 vid = vlan_dev_vlan_id(bridge_port->dev); mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); - if (WARN_ON(!mlxsw_sp_port_vlan)) + if (!mlxsw_sp_port_vlan) return; mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan); -- GitLab From 4bdfff5c4f913c50a419b1e28e5893ebc04a09ff Mon Sep 17 00:00:00 2001 From: "Adamski, Krzysztof (Nokia - PL/Wroclaw)" Date: Fri, 16 Nov 2018 13:24:41 +0000 Subject: [PATCH 2262/2269] i2c: axxia: properly handle master timeout [ Upstream commit 6c7f25cae54b840302e4f1b371dbf318fbf09ab2 ] According to Intel (R) Axxia TM Lionfish Communication Processor Peripheral Subsystem Hardware Reference Manual, the AXXIA I2C module have a programmable Master Wait Timer, which among others, checks the time between commands send in manual mode. When a timeout (25ms) passes, TSS bit is set in Master Interrupt Status register and a Stop command is issued by the hardware. The axxia_i2c_xfer(), does not properly handle this situation, however. For each message a separate axxia_i2c_xfer_msg() is called and this function incorrectly assumes that any interrupt might happen only when waiting for completion. This is mostly correct but there is one exception - a master timeout can trigger if enough time has passed between individual transfers. It will, by definition, happen between transfers when the interrupts are disabled by the code. If that happens, the hardware issues Stop command. The interrupt indicating timeout will not be triggered as soon as we enable them since the Master Interrupt Status is cleared when master mode is entered again (which happens before enabling irqs) meaning this error is lost and the transfer is continued even though the Stop was issued on the bus. The subsequent operations completes without error but a bogus value (0xFF in case of read) is read as the client device is confused because aborted transfer. No error is returned from master_xfer() making caller believe that a valid value was read. To fix the problem, the TSS bit (indicating timeout) in Master Interrupt Status register is checked before each transfer. If it is set, there was a timeout before this transfer and (as described above) the hardware already issued Stop command so the transaction should be aborted thus -ETIMEOUT is returned from the master_xfer() callback. In order to be sure no timeout was issued we can't just read the status just before starting new transaction as there will always be a small window of time (few CPU cycles at best) where this might still happen. For this reason we have to temporally disable the timer before checking for TSS bit. Disabling it will, however, clear the TSS bit so in order to preserve that information, we have to read it in ISR so we have to ensure that the TSS interrupt is not masked between transfers of one transaction. There is no need to call bus recovery or controller reinitialization if that happens so it's skipped. Signed-off-by: Krzysztof Adamski Reviewed-by: Alexander Sverdlin Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-axxia.c | 40 ++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/drivers/i2c/busses/i2c-axxia.c b/drivers/i2c/busses/i2c-axxia.c index 13f07482ec68c..deea13838648b 100644 --- a/drivers/i2c/busses/i2c-axxia.c +++ b/drivers/i2c/busses/i2c-axxia.c @@ -74,8 +74,7 @@ MST_STATUS_ND) #define MST_STATUS_ERR (MST_STATUS_NAK | \ MST_STATUS_AL | \ - MST_STATUS_IP | \ - MST_STATUS_TSS) + MST_STATUS_IP) #define MST_TX_BYTES_XFRD 0x50 #define MST_RX_BYTES_XFRD 0x54 #define SCL_HIGH_PERIOD 0x80 @@ -241,7 +240,7 @@ static int axxia_i2c_empty_rx_fifo(struct axxia_i2c_dev *idev) */ if (c <= 0 || c > I2C_SMBUS_BLOCK_MAX) { idev->msg_err = -EPROTO; - i2c_int_disable(idev, ~0); + i2c_int_disable(idev, ~MST_STATUS_TSS); complete(&idev->msg_complete); break; } @@ -299,14 +298,19 @@ static irqreturn_t axxia_i2c_isr(int irq, void *_dev) if (status & MST_STATUS_SCC) { /* Stop completed */ - i2c_int_disable(idev, ~0); + i2c_int_disable(idev, ~MST_STATUS_TSS); complete(&idev->msg_complete); } else if (status & MST_STATUS_SNS) { /* Transfer done */ - i2c_int_disable(idev, ~0); + i2c_int_disable(idev, ~MST_STATUS_TSS); if (i2c_m_rd(idev->msg) && idev->msg_xfrd < idev->msg->len) axxia_i2c_empty_rx_fifo(idev); complete(&idev->msg_complete); + } else if (status & MST_STATUS_TSS) { + /* Transfer timeout */ + idev->msg_err = -ETIMEDOUT; + i2c_int_disable(idev, ~MST_STATUS_TSS); + complete(&idev->msg_complete); } else if (unlikely(status & MST_STATUS_ERR)) { /* Transfer error */ i2c_int_disable(idev, ~0); @@ -339,10 +343,10 @@ static int axxia_i2c_xfer_msg(struct axxia_i2c_dev *idev, struct i2c_msg *msg) u32 rx_xfer, tx_xfer; u32 addr_1, addr_2; unsigned long time_left; + unsigned int wt_value; idev->msg = msg; idev->msg_xfrd = 0; - idev->msg_err = 0; reinit_completion(&idev->msg_complete); if (i2c_m_ten(msg)) { @@ -382,9 +386,18 @@ static int axxia_i2c_xfer_msg(struct axxia_i2c_dev *idev, struct i2c_msg *msg) else if (axxia_i2c_fill_tx_fifo(idev) != 0) int_mask |= MST_STATUS_TFL; + wt_value = WT_VALUE(readl(idev->base + WAIT_TIMER_CONTROL)); + /* Disable wait timer temporarly */ + writel(wt_value, idev->base + WAIT_TIMER_CONTROL); + /* Check if timeout error happened */ + if (idev->msg_err) + goto out; + /* Start manual mode */ writel(CMD_MANUAL, idev->base + MST_COMMAND); + writel(WT_EN | wt_value, idev->base + WAIT_TIMER_CONTROL); + i2c_int_enable(idev, int_mask); time_left = wait_for_completion_timeout(&idev->msg_complete, @@ -395,13 +408,15 @@ static int axxia_i2c_xfer_msg(struct axxia_i2c_dev *idev, struct i2c_msg *msg) if (readl(idev->base + MST_COMMAND) & CMD_BUSY) dev_warn(idev->dev, "busy after xfer\n"); - if (time_left == 0) + if (time_left == 0) { idev->msg_err = -ETIMEDOUT; - - if (idev->msg_err == -ETIMEDOUT) i2c_recover_bus(&idev->adapter); + axxia_i2c_init(idev); + } - if (unlikely(idev->msg_err) && idev->msg_err != -ENXIO) +out: + if (unlikely(idev->msg_err) && idev->msg_err != -ENXIO && + idev->msg_err != -ETIMEDOUT) axxia_i2c_init(idev); return idev->msg_err; @@ -409,7 +424,7 @@ static int axxia_i2c_xfer_msg(struct axxia_i2c_dev *idev, struct i2c_msg *msg) static int axxia_i2c_stop(struct axxia_i2c_dev *idev) { - u32 int_mask = MST_STATUS_ERR | MST_STATUS_SCC; + u32 int_mask = MST_STATUS_ERR | MST_STATUS_SCC | MST_STATUS_TSS; unsigned long time_left; reinit_completion(&idev->msg_complete); @@ -436,6 +451,9 @@ axxia_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) int i; int ret = 0; + idev->msg_err = 0; + i2c_int_enable(idev, MST_STATUS_TSS); + for (i = 0; ret == 0 && i < num; ++i) ret = axxia_i2c_xfer_msg(idev, &msgs[i]); -- GitLab From c39910b57c7f557d304f50775eab28b6c892a7fd Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 21 Nov 2018 10:19:55 +0100 Subject: [PATCH 2263/2269] i2c: scmi: Fix probe error on devices with an empty SMB0001 ACPI device node [ Upstream commit 0544ee4b1ad574aec3b6379af5f5cdee42840971 ] Some AMD based HP laptops have a SMB0001 ACPI device node which does not define any methods. This leads to the following error in dmesg: [ 5.222731] cmi: probe of SMB0001:00 failed with error -5 This commit makes acpi_smbus_cmi_add() return -ENODEV instead in this case silencing the error. In case of a failure of the i2c_add_adapter() call this commit now propagates the error from that call instead of -EIO. Signed-off-by: Hans de Goede Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-scmi.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-scmi.c b/drivers/i2c/busses/i2c-scmi.c index efefcfa24a4c0..d2178f701b418 100644 --- a/drivers/i2c/busses/i2c-scmi.c +++ b/drivers/i2c/busses/i2c-scmi.c @@ -364,6 +364,7 @@ static int acpi_smbus_cmi_add(struct acpi_device *device) { struct acpi_smbus_cmi *smbus_cmi; const struct acpi_device_id *id; + int ret; smbus_cmi = kzalloc(sizeof(struct acpi_smbus_cmi), GFP_KERNEL); if (!smbus_cmi) @@ -385,8 +386,10 @@ static int acpi_smbus_cmi_add(struct acpi_device *device) acpi_walk_namespace(ACPI_TYPE_METHOD, smbus_cmi->handle, 1, acpi_smbus_cmi_query_methods, NULL, smbus_cmi, NULL); - if (smbus_cmi->cap_info == 0) + if (smbus_cmi->cap_info == 0) { + ret = -ENODEV; goto err; + } snprintf(smbus_cmi->adapter.name, sizeof(smbus_cmi->adapter.name), "SMBus CMI adapter %s", @@ -397,7 +400,8 @@ static int acpi_smbus_cmi_add(struct acpi_device *device) smbus_cmi->adapter.class = I2C_CLASS_HWMON | I2C_CLASS_SPD; smbus_cmi->adapter.dev.parent = &device->dev; - if (i2c_add_adapter(&smbus_cmi->adapter)) { + ret = i2c_add_adapter(&smbus_cmi->adapter); + if (ret) { dev_err(&device->dev, "Couldn't register adapter!\n"); goto err; } @@ -407,7 +411,7 @@ static int acpi_smbus_cmi_add(struct acpi_device *device) err: kfree(smbus_cmi); device->driver_data = NULL; - return -EIO; + return ret; } static int acpi_smbus_cmi_remove(struct acpi_device *device) -- GitLab From 90265a60aff7ad21eed76a9e62159760d1d364b2 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 6 Dec 2018 12:55:27 +0900 Subject: [PATCH 2264/2269] i2c: uniphier: fix violation of tLOW requirement for Fast-mode [ Upstream commit 8469636ab5d8c77645b953746c10fda6983a8830 ] Currently, the clock duty is set as tLOW/tHIGH = 1/1. For Fast-mode, tLOW is set to 1.25 us while the I2C spec requires tLOW >= 1.3 us. tLOW/tHIGH = 5/4 would meet both Standard-mode and Fast-mode: Standard-mode: tLOW = 5.56 us, tHIGH = 4.44 us Fast-mode: tLOW = 1.39 us, tHIGH = 1.11 us Signed-off-by: Masahiro Yamada Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-uniphier.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-uniphier.c b/drivers/i2c/busses/i2c-uniphier.c index 454f914ae66db..c488e558aef70 100644 --- a/drivers/i2c/busses/i2c-uniphier.c +++ b/drivers/i2c/busses/i2c-uniphier.c @@ -320,7 +320,13 @@ static void uniphier_i2c_hw_init(struct uniphier_i2c_priv *priv) uniphier_i2c_reset(priv, true); - writel((cyc / 2 << 16) | cyc, priv->membase + UNIPHIER_I2C_CLK); + /* + * Bit30-16: clock cycles of tLOW. + * Standard-mode: tLOW = 4.7 us, tHIGH = 4.0 us + * Fast-mode: tLOW = 1.3 us, tHIGH = 0.6 us + * "tLow/tHIGH = 5/4" meets both. + */ + writel((cyc * 5 / 9 << 16) | cyc, priv->membase + UNIPHIER_I2C_CLK); uniphier_i2c_reset(priv, false); } -- GitLab From 07d4f1c4cd57b6ba4b23d243bcb8d1a63316e784 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 6 Dec 2018 12:55:28 +0900 Subject: [PATCH 2265/2269] i2c: uniphier-f: fix violation of tLOW requirement for Fast-mode [ Upstream commit ece27a337d42a3197935711997f2880f0957ed7e ] Currently, the clock duty is set as tLOW/tHIGH = 1/1. For Fast-mode, tLOW is set to 1.25 us while the I2C spec requires tLOW >= 1.3 us. tLOW/tHIGH = 5/4 would meet both Standard-mode and Fast-mode: Standard-mode: tLOW = 5.56 us, tHIGH = 4.44 us Fast-mode: tLOW = 1.39 us, tHIGH = 1.11 us Signed-off-by: Masahiro Yamada Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-uniphier-f.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-uniphier-f.c b/drivers/i2c/busses/i2c-uniphier-f.c index a403e8579b652..bc26ec822e268 100644 --- a/drivers/i2c/busses/i2c-uniphier-f.c +++ b/drivers/i2c/busses/i2c-uniphier-f.c @@ -470,9 +470,26 @@ static void uniphier_fi2c_hw_init(struct uniphier_fi2c_priv *priv) uniphier_fi2c_reset(priv); + /* + * Standard-mode: tLOW + tHIGH = 10 us + * Fast-mode: tLOW + tHIGH = 2.5 us + */ writel(cyc, priv->membase + UNIPHIER_FI2C_CYC); - writel(cyc / 2, priv->membase + UNIPHIER_FI2C_LCTL); + /* + * Standard-mode: tLOW = 4.7 us, tHIGH = 4.0 us, tBUF = 4.7 us + * Fast-mode: tLOW = 1.3 us, tHIGH = 0.6 us, tBUF = 1.3 us + * "tLow/tHIGH = 5/4" meets both. + */ + writel(cyc * 5 / 9, priv->membase + UNIPHIER_FI2C_LCTL); + /* + * Standard-mode: tHD;STA = 4.0 us, tSU;STA = 4.7 us, tSU;STO = 4.0 us + * Fast-mode: tHD;STA = 0.6 us, tSU;STA = 0.6 us, tSU;STO = 0.6 us + */ writel(cyc / 2, priv->membase + UNIPHIER_FI2C_SSUT); + /* + * Standard-mode: tSU;DAT = 250 ns + * Fast-mode: tSU;DAT = 100 ns + */ writel(cyc / 16, priv->membase + UNIPHIER_FI2C_DSUT); uniphier_fi2c_prepare_operation(priv); -- GitLab From 60e3480e0bbf00ffcec4b1409131cf01b69c7cf1 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Wed, 5 Dec 2018 16:54:57 +0000 Subject: [PATCH 2266/2269] nvmet-rdma: fix response use after free [ Upstream commit d7dcdf9d4e15189ecfda24cc87339a3425448d5c ] nvmet_rdma_release_rsp() may free the response before using it at error flow. Fixes: 8407879 ("nvmet-rdma: fix possible bogus dereference under heavy load") Signed-off-by: Israel Rukshin Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/target/rdma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index a70b3d24936d3..5d8140e58f6fc 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -524,6 +524,7 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) { struct nvmet_rdma_rsp *rsp = container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe); + struct nvmet_rdma_queue *queue = cq->cq_context; nvmet_rdma_release_rsp(rsp); @@ -531,7 +532,7 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) wc->status != IB_WC_WR_FLUSH_ERR)) { pr_err("SEND for CQE 0x%p failed with status %s (%d).\n", wc->wr_cqe, ib_wc_status_msg(wc->status), wc->status); - nvmet_rdma_error_comp(rsp->queue); + nvmet_rdma_error_comp(queue); } } -- GitLab From f1e1eb5c90b6b6a359e584e01468ce0c05801f4d Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Wed, 16 May 2018 16:45:51 -0700 Subject: [PATCH 2267/2269] rtc: snvs: Add timeouts to avoid kernel lockups [ Upstream commit cd7f3a249dbed2858e6c2f30e5be7f1f7a709ee2 ] In order to read correctly from asynchronously updated RTC registers, it's necessary to read repeatedly until their values do not change from read to read. It's also necessary to wait for three RTC clock ticks for certain operations. There are no timeouts in this code and these operations could possibly loop forever. To avoid kernel hangs, put in timeouts. The iMX7d can be configured to stop the SRTC on a tamper event, which will lockup the kernel inside this driver as described above. These hangs can happen when running under qemu, which doesn't emulate the SNVS RTC, though currently the driver will refuse to load on qemu due to a timeout in the driver probe method. It could also happen if the SRTC block where somehow placed into reset or the slow speed clock that drives the SRTC counter (but not the CPU) were to stop. The symptoms on a two core iMX7d are a work queue hang on rtc_timer_do_work(), which eventually blocks a systemd fsnotify operation that triggers a work queue flush, causing systemd to hang and thus causing all services that should be started by systemd, like a console getty, to fail to start or stop. Also optimize the wait code to wait less. It only needs to wait for the clock to advance three ticks, not to see it change three times. Cc: Alexandre Belloni Cc: Alessandro Zummo Cc: Fabio Estevam Cc: Shawn Guo Cc: Bryan O'Donoghue Signed-off-by: Trent Piepho Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/rtc-snvs.c | 105 +++++++++++++++++++++++++++-------------- 1 file changed, 70 insertions(+), 35 deletions(-) diff --git a/drivers/rtc/rtc-snvs.c b/drivers/rtc/rtc-snvs.c index 9af591d5223c3..71eee39520f0b 100644 --- a/drivers/rtc/rtc-snvs.c +++ b/drivers/rtc/rtc-snvs.c @@ -47,49 +47,83 @@ struct snvs_rtc_data { struct clk *clk; }; +/* Read 64 bit timer register, which could be in inconsistent state */ +static u64 rtc_read_lpsrt(struct snvs_rtc_data *data) +{ + u32 msb, lsb; + + regmap_read(data->regmap, data->offset + SNVS_LPSRTCMR, &msb); + regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &lsb); + return (u64)msb << 32 | lsb; +} + +/* Read the secure real time counter, taking care to deal with the cases of the + * counter updating while being read. + */ static u32 rtc_read_lp_counter(struct snvs_rtc_data *data) { u64 read1, read2; - u32 val; + unsigned int timeout = 100; + /* As expected, the registers might update between the read of the LSB + * reg and the MSB reg. It's also possible that one register might be + * in partially modified state as well. + */ + read1 = rtc_read_lpsrt(data); do { - regmap_read(data->regmap, data->offset + SNVS_LPSRTCMR, &val); - read1 = val; - read1 <<= 32; - regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &val); - read1 |= val; - - regmap_read(data->regmap, data->offset + SNVS_LPSRTCMR, &val); - read2 = val; - read2 <<= 32; - regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &val); - read2 |= val; - } while (read1 != read2); + read2 = read1; + read1 = rtc_read_lpsrt(data); + } while (read1 != read2 && --timeout); + if (!timeout) + dev_err(&data->rtc->dev, "Timeout trying to get valid LPSRT Counter read\n"); /* Convert 47-bit counter to 32-bit raw second count */ return (u32) (read1 >> CNTR_TO_SECS_SH); } -static void rtc_write_sync_lp(struct snvs_rtc_data *data) +/* Just read the lsb from the counter, dealing with inconsistent state */ +static int rtc_read_lp_counter_lsb(struct snvs_rtc_data *data, u32 *lsb) { - u32 count1, count2, count3; - int i; - - /* Wait for 3 CKIL cycles */ - for (i = 0; i < 3; i++) { - do { - regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count1); - regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count2); - } while (count1 != count2); - - /* Now wait until counter value changes */ - do { - do { - regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count2); - regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count3); - } while (count2 != count3); - } while (count3 == count1); + u32 count1, count2; + unsigned int timeout = 100; + + regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count1); + do { + count2 = count1; + regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count1); + } while (count1 != count2 && --timeout); + if (!timeout) { + dev_err(&data->rtc->dev, "Timeout trying to get valid LPSRT Counter read\n"); + return -ETIMEDOUT; } + + *lsb = count1; + return 0; +} + +static int rtc_write_sync_lp(struct snvs_rtc_data *data) +{ + u32 count1, count2; + u32 elapsed; + unsigned int timeout = 1000; + int ret; + + ret = rtc_read_lp_counter_lsb(data, &count1); + if (ret) + return ret; + + /* Wait for 3 CKIL cycles, about 61.0-91.5 µs */ + do { + ret = rtc_read_lp_counter_lsb(data, &count2); + if (ret) + return ret; + elapsed = count2 - count1; /* wrap around _is_ handled! */ + } while (elapsed < 3 && --timeout); + if (!timeout) { + dev_err(&data->rtc->dev, "Timeout waiting for LPSRT Counter to change\n"); + return -ETIMEDOUT; + } + return 0; } static int snvs_rtc_enable(struct snvs_rtc_data *data, bool enable) @@ -173,9 +207,7 @@ static int snvs_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) (SNVS_LPCR_LPTA_EN | SNVS_LPCR_LPWUI_EN), enable ? (SNVS_LPCR_LPTA_EN | SNVS_LPCR_LPWUI_EN) : 0); - rtc_write_sync_lp(data); - - return 0; + return rtc_write_sync_lp(data); } static int snvs_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) @@ -183,11 +215,14 @@ static int snvs_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) struct snvs_rtc_data *data = dev_get_drvdata(dev); struct rtc_time *alrm_tm = &alrm->time; unsigned long time; + int ret; rtc_tm_to_time(alrm_tm, &time); regmap_update_bits(data->regmap, data->offset + SNVS_LPCR, SNVS_LPCR_LPTA_EN, 0); - rtc_write_sync_lp(data); + ret = rtc_write_sync_lp(data); + if (ret) + return ret; regmap_write(data->regmap, data->offset + SNVS_LPTAR, time); /* Clear alarm interrupt status bit */ -- GitLab From ad962d20d4eb1e690ace8ca9e390e2b05a786999 Mon Sep 17 00:00:00 2001 From: Nicolas Schichan Date: Wed, 19 Dec 2018 22:40:42 +0100 Subject: [PATCH 2268/2269] bpf, arm: fix emit_ldx_r and emit_mov_i using TMP_REG_1 emit_ldx_r() and emit_a32_mov_i() were both using TMP_REG_1 and clashing with each other. Using TMP_REG_2 in emit_ldx_r() fixes the issue. Fixes: ec19e02b343 ("ARM: net: bpf: fix LDX instructions") Cc: Russell King Signed-off-by: Nicolas Schichan Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- arch/arm/net/bpf_jit_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index ece2d1d43724b..dafeb5f81353e 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -915,7 +915,7 @@ static inline void emit_str_r(const u8 dst, const u8 src, bool dstk, /* dst = *(size*)(src + off) */ static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk, s32 off, struct jit_ctx *ctx, const u8 sz){ - const u8 *tmp = bpf2a32[TMP_REG_1]; + const u8 *tmp = bpf2a32[TMP_REG_2]; const u8 *rd = dstk ? tmp : dst; u8 rm = src; s32 off_max; -- GitLab From 592f5569e18471c07208f74540f4e0f646b226f7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Dec 2018 14:13:19 +0100 Subject: [PATCH 2269/2269] Linux 4.14.90 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b83477be8d0c1..280c7193e246e 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 89 +SUBLEVEL = 90 EXTRAVERSION = NAME = Petit Gorille -- GitLab